Line data Source code
1 : /* RTL-level loop invariant motion.
2 : Copyright (C) 2004-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* This implements the loop invariant motion pass. It is very simple
21 : (no calls, no loads/stores, etc.). This should be sufficient to cleanup
22 : things like address arithmetics -- other more complicated invariants should
23 : be eliminated on GIMPLE either in tree-ssa-loop-im.cc or in tree-ssa-pre.cc.
24 :
25 : We proceed loop by loop -- it is simpler than trying to handle things
26 : globally and should not lose much. First we inspect all sets inside loop
27 : and create a dependency graph on insns (saying "to move this insn, you must
28 : also move the following insns").
29 :
30 : We then need to determine what to move. We estimate the number of registers
31 : used and move as many invariants as possible while we still have enough free
32 : registers. We prefer the expensive invariants.
33 :
34 : Then we move the selected invariants out of the loop, creating a new
35 : temporaries for them if necessary. */
36 :
37 : #include "config.h"
38 : #include "system.h"
39 : #include "coretypes.h"
40 : #include "backend.h"
41 : #include "target.h"
42 : #include "rtl.h"
43 : #include "tree.h"
44 : #include "cfghooks.h"
45 : #include "df.h"
46 : #include "memmodel.h"
47 : #include "tm_p.h"
48 : #include "insn-config.h"
49 : #include "regs.h"
50 : #include "ira.h"
51 : #include "recog.h"
52 : #include "cfgrtl.h"
53 : #include "cfgloop.h"
54 : #include "expr.h"
55 : #include "rtl-iter.h"
56 : #include "dumpfile.h"
57 :
58 : /* The data stored for the loop. */
59 :
60 : class loop_data
61 : {
62 : public:
63 : class loop *outermost_exit; /* The outermost exit of the loop. */
64 : bool has_call; /* True if the loop contains a call. */
65 : /* Maximal register pressure inside loop for given register class
66 : (defined only for the pressure classes). */
67 : int max_reg_pressure[N_REG_CLASSES];
68 : /* Loop regs referenced and live pseudo-registers. */
69 : bitmap_head regs_ref;
70 : bitmap_head regs_live;
71 : };
72 :
73 : #define LOOP_DATA(LOOP) ((class loop_data *) (LOOP)->aux)
74 :
75 : /* The description of an use. */
76 :
77 : struct use
78 : {
79 : rtx *pos; /* Position of the use. */
80 : rtx_insn *insn; /* The insn in that the use occurs. */
81 : unsigned addr_use_p; /* Whether the use occurs in an address. */
82 : struct use *next; /* Next use in the list. */
83 : };
84 :
85 : /* The description of a def. */
86 :
87 : struct def
88 : {
89 : struct use *uses; /* The list of uses that are uniquely reached
90 : by it. */
91 : unsigned n_uses; /* Number of such uses. */
92 : unsigned n_addr_uses; /* Number of uses in addresses. */
93 : unsigned invno; /* The corresponding invariant. */
94 : bool can_prop_to_addr_uses; /* True if the corresponding inv can be
95 : propagated into its address uses. */
96 : };
97 :
98 : /* The data stored for each invariant. */
99 :
100 : struct invariant
101 : {
102 : /* The number of the invariant. */
103 : unsigned invno;
104 :
105 : /* The number of the invariant with the same value. */
106 : unsigned eqto;
107 :
108 : /* The number of invariants which eqto this. */
109 : unsigned eqno;
110 :
111 : /* If we moved the invariant out of the loop, the original regno
112 : that contained its value. */
113 : int orig_regno;
114 :
115 : /* If we moved the invariant out of the loop, the register that contains its
116 : value. */
117 : rtx reg;
118 :
119 : /* The definition of the invariant. */
120 : struct def *def;
121 :
122 : /* The insn in that it is defined. */
123 : rtx_insn *insn;
124 :
125 : /* Whether it is always executed. */
126 : bool always_executed;
127 :
128 : /* Whether to move the invariant. */
129 : bool move;
130 :
131 : /* Whether the invariant is cheap when used as an address. */
132 : bool cheap_address;
133 :
134 : /* Cost of the invariant. */
135 : unsigned cost;
136 :
137 : /* Used for detecting already visited invariants during determining
138 : costs of movements. */
139 : unsigned stamp;
140 :
141 : /* The invariants it depends on. */
142 : bitmap depends_on;
143 : };
144 :
145 : /* Currently processed loop. */
146 : static class loop *curr_loop;
147 :
148 : /* Table of invariants indexed by the df_ref uid field. */
149 :
150 : static unsigned int invariant_table_size = 0;
151 : static struct invariant ** invariant_table;
152 :
153 : /* Entry for hash table of invariant expressions. */
154 :
155 : struct invariant_expr_entry
156 : {
157 : /* The invariant. */
158 : struct invariant *inv;
159 :
160 : /* Its value. */
161 : rtx expr;
162 :
163 : /* Its mode. */
164 : machine_mode mode;
165 :
166 : /* Its hash. */
167 : hashval_t hash;
168 : };
169 :
170 : /* The actual stamp for marking already visited invariants during determining
171 : costs of movements. */
172 :
173 : static unsigned actual_stamp;
174 :
175 : typedef struct invariant *invariant_p;
176 :
177 :
178 : /* The invariants. */
179 :
180 : static vec<invariant_p> invariants;
181 :
182 : /* Check the size of the invariant table and realloc if necessary. */
183 :
184 : static void
185 24590341 : check_invariant_table_size (void)
186 : {
187 24590341 : if (invariant_table_size < DF_DEFS_TABLE_SIZE ())
188 : {
189 296981 : unsigned int new_size = DF_DEFS_TABLE_SIZE () + (DF_DEFS_TABLE_SIZE () / 4);
190 296981 : invariant_table = XRESIZEVEC (struct invariant *, invariant_table, new_size);
191 296981 : memset (&invariant_table[invariant_table_size], 0,
192 296981 : (new_size - invariant_table_size) * sizeof (struct invariant *));
193 296981 : invariant_table_size = new_size;
194 : }
195 24590341 : }
196 :
197 : /* Test for possibility of invariantness of X. */
198 :
199 : static bool
200 18978282 : check_maybe_invariant (rtx x)
201 : {
202 18978282 : enum rtx_code code = GET_CODE (x);
203 18978282 : int i, j;
204 18978282 : const char *fmt;
205 :
206 18978282 : switch (code)
207 : {
208 : CASE_CONST_ANY:
209 : case SYMBOL_REF:
210 : case CONST:
211 : case LABEL_REF:
212 : return true;
213 :
214 : case PC:
215 : case UNSPEC_VOLATILE:
216 : case CALL:
217 : return false;
218 :
219 : case REG:
220 : return true;
221 :
222 1816824 : case MEM:
223 : /* Load/store motion is done elsewhere. ??? Perhaps also add it here?
224 : It should not be hard, and might be faster than "elsewhere". */
225 :
226 : /* Just handle the most trivial case where we load from an unchanging
227 : location (most importantly, pic tables). */
228 1816824 : if (MEM_READONLY_P (x) && !MEM_VOLATILE_P (x))
229 : break;
230 :
231 : return false;
232 :
233 496 : case ASM_OPERANDS:
234 : /* Don't mess with insns declared volatile. */
235 496 : if (MEM_VOLATILE_P (x))
236 : return false;
237 : break;
238 :
239 : default:
240 : break;
241 : }
242 :
243 4980240 : fmt = GET_RTX_FORMAT (code);
244 13941629 : for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
245 : {
246 9120555 : if (fmt[i] == 'e')
247 : {
248 8175114 : if (!check_maybe_invariant (XEXP (x, i)))
249 : return false;
250 : }
251 945441 : else if (fmt[i] == 'E')
252 : {
253 1655707 : for (j = 0; j < XVECLEN (x, i); j++)
254 1287691 : if (!check_maybe_invariant (XVECEXP (x, i, j)))
255 : return false;
256 : }
257 : }
258 :
259 : return true;
260 : }
261 :
262 : /* Returns the invariant definition for USE, or NULL if USE is not
263 : invariant. */
264 :
265 : static struct invariant *
266 24376906 : invariant_for_use (df_ref use)
267 : {
268 24376906 : struct df_link *defs;
269 24376906 : df_ref def;
270 24376906 : basic_block bb = DF_REF_BB (use), def_bb;
271 :
272 24376906 : if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
273 : return NULL;
274 :
275 24331815 : defs = DF_REF_CHAIN (use);
276 24331815 : if (!defs || defs->next)
277 : return NULL;
278 17464953 : def = defs->ref;
279 17464953 : check_invariant_table_size ();
280 17464953 : if (!invariant_table[DF_REF_ID (def)])
281 : return NULL;
282 :
283 1495219 : def_bb = DF_REF_BB (def);
284 1495219 : if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb))
285 : return NULL;
286 1494886 : return invariant_table[DF_REF_ID (def)];
287 : }
288 :
289 : /* Computes hash value for invariant expression X in INSN. */
290 :
291 : static hashval_t
292 1831337 : hash_invariant_expr_1 (rtx_insn *insn, rtx x)
293 : {
294 1831337 : enum rtx_code code = GET_CODE (x);
295 1831337 : int i, j;
296 1831337 : const char *fmt;
297 1831337 : hashval_t val = code;
298 1831337 : int do_not_record_p;
299 1831337 : df_ref use;
300 1831337 : struct invariant *inv;
301 :
302 1831337 : switch (code)
303 : {
304 797657 : CASE_CONST_ANY:
305 797657 : case SYMBOL_REF:
306 797657 : case CONST:
307 797657 : case LABEL_REF:
308 797657 : return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false);
309 :
310 719893 : case REG:
311 719893 : use = df_find_use (insn, x);
312 719893 : if (!use)
313 0 : return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false);
314 719893 : inv = invariant_for_use (use);
315 719893 : if (!inv)
316 461287 : return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false);
317 :
318 258606 : gcc_assert (inv->eqto != ~0u);
319 : return inv->eqto;
320 :
321 313787 : default:
322 313787 : break;
323 : }
324 :
325 313787 : fmt = GET_RTX_FORMAT (code);
326 896149 : for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
327 : {
328 582362 : if (fmt[i] == 'e')
329 535058 : val ^= hash_invariant_expr_1 (insn, XEXP (x, i));
330 : else if (fmt[i] == 'E')
331 : {
332 4804 : for (j = 0; j < XVECLEN (x, i); j++)
333 3921 : val ^= hash_invariant_expr_1 (insn, XVECEXP (x, i, j));
334 : }
335 : else if (fmt[i] == 'i' || fmt[i] == 'n')
336 249 : val ^= XINT (x, i);
337 : else if (fmt[i] == 'L')
338 51 : val ^= XLOC (x, i);
339 : else if (fmt[i] == 'p')
340 7768 : val ^= constant_lower_bound (SUBREG_BYTE (x));
341 : }
342 :
343 : return val;
344 : }
345 :
346 : /* Returns true if the invariant expressions E1 and E2 used in insns INSN1
347 : and INSN2 have always the same value. */
348 :
349 : static bool
350 2412470 : invariant_expr_equal_p (rtx_insn *insn1, rtx e1, rtx_insn *insn2, rtx e2)
351 : {
352 2412470 : enum rtx_code code = GET_CODE (e1);
353 2412470 : int i, j;
354 2412470 : const char *fmt;
355 2412470 : df_ref use1, use2;
356 2412470 : struct invariant *inv1 = NULL, *inv2 = NULL;
357 2412470 : rtx sub1, sub2;
358 :
359 : /* If mode of only one of the operands is VOIDmode, it is not equivalent to
360 : the other one. If both are VOIDmode, we rely on the caller of this
361 : function to verify that their modes are the same. */
362 2412470 : if (code != GET_CODE (e2) || GET_MODE (e1) != GET_MODE (e2))
363 : return false;
364 :
365 1240323 : switch (code)
366 : {
367 502964 : CASE_CONST_ANY:
368 502964 : case SYMBOL_REF:
369 502964 : case CONST:
370 502964 : case LABEL_REF:
371 502964 : return rtx_equal_p (e1, e2);
372 :
373 546251 : case REG:
374 546251 : use1 = df_find_use (insn1, e1);
375 546251 : use2 = df_find_use (insn2, e2);
376 546251 : if (use1)
377 546251 : inv1 = invariant_for_use (use1);
378 546251 : if (use2)
379 546251 : inv2 = invariant_for_use (use2);
380 :
381 546251 : if (!inv1 && !inv2)
382 214485 : return rtx_equal_p (e1, e2);
383 :
384 331766 : if (!inv1 || !inv2)
385 : return false;
386 :
387 234465 : gcc_assert (inv1->eqto != ~0u);
388 234465 : gcc_assert (inv2->eqto != ~0u);
389 234465 : return inv1->eqto == inv2->eqto;
390 :
391 191108 : default:
392 191108 : break;
393 : }
394 :
395 191108 : fmt = GET_RTX_FORMAT (code);
396 246035 : for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
397 : {
398 222286 : if (fmt[i] == 'e')
399 : {
400 196798 : sub1 = XEXP (e1, i);
401 196798 : sub2 = XEXP (e2, i);
402 :
403 196798 : if (!invariant_expr_equal_p (insn1, sub1, insn2, sub2))
404 : return false;
405 : }
406 :
407 : else if (fmt[i] == 'E')
408 : {
409 516 : if (XVECLEN (e1, i) != XVECLEN (e2, i))
410 : return false;
411 :
412 1384 : for (j = 0; j < XVECLEN (e1, i); j++)
413 : {
414 1145 : sub1 = XVECEXP (e1, i, j);
415 1145 : sub2 = XVECEXP (e2, i, j);
416 :
417 1145 : if (!invariant_expr_equal_p (insn1, sub1, insn2, sub2))
418 : return false;
419 : }
420 : }
421 : else if (fmt[i] == 'i' || fmt[i] == 'n')
422 : {
423 125 : if (XINT (e1, i) != XINT (e2, i))
424 : return false;
425 : }
426 : else if (fmt[i] == 'p')
427 : {
428 4892 : if (maybe_ne (SUBREG_BYTE (e1), SUBREG_BYTE (e2)))
429 : return false;
430 : }
431 : /* Unhandled type of subexpression, we fail conservatively. */
432 : else
433 : return false;
434 : }
435 :
436 : return true;
437 : }
438 :
439 : struct invariant_expr_hasher : free_ptr_hash <invariant_expr_entry>
440 : {
441 : static inline hashval_t hash (const invariant_expr_entry *);
442 : static inline bool equal (const invariant_expr_entry *,
443 : const invariant_expr_entry *);
444 : };
445 :
446 : /* Returns hash value for invariant expression entry ENTRY. */
447 :
448 : inline hashval_t
449 2957773 : invariant_expr_hasher::hash (const invariant_expr_entry *entry)
450 : {
451 2957773 : return entry->hash;
452 : }
453 :
454 : /* Compares invariant expression entries ENTRY1 and ENTRY2. */
455 :
456 : inline bool
457 3424952 : invariant_expr_hasher::equal (const invariant_expr_entry *entry1,
458 : const invariant_expr_entry *entry2)
459 : {
460 3424952 : if (entry1->mode != entry2->mode)
461 : return 0;
462 :
463 2214527 : return invariant_expr_equal_p (entry1->inv->insn, entry1->expr,
464 2214527 : entry2->inv->insn, entry2->expr);
465 : }
466 :
467 : typedef hash_table<invariant_expr_hasher> invariant_htab_type;
468 :
469 : /* Checks whether invariant with value EXPR in machine mode MODE is
470 : recorded in EQ. If this is the case, return the invariant. Otherwise
471 : insert INV to the table for this expression and return INV. */
472 :
473 : static struct invariant *
474 1292358 : find_or_insert_inv (invariant_htab_type *eq, rtx expr, machine_mode mode,
475 : struct invariant *inv)
476 : {
477 1292358 : hashval_t hash = hash_invariant_expr_1 (inv->insn, expr);
478 1292358 : struct invariant_expr_entry *entry;
479 1292358 : struct invariant_expr_entry pentry;
480 1292358 : invariant_expr_entry **slot;
481 :
482 1292358 : pentry.expr = expr;
483 1292358 : pentry.inv = inv;
484 1292358 : pentry.mode = mode;
485 1292358 : slot = eq->find_slot_with_hash (&pentry, hash, INSERT);
486 1292358 : entry = *slot;
487 :
488 1292358 : if (entry)
489 361617 : return entry->inv;
490 :
491 930741 : entry = XNEW (struct invariant_expr_entry);
492 930741 : entry->inv = inv;
493 930741 : entry->expr = expr;
494 930741 : entry->mode = mode;
495 930741 : entry->hash = hash;
496 930741 : *slot = entry;
497 :
498 930741 : return inv;
499 : }
500 :
501 : /* Finds invariants identical to INV and records the equivalence. EQ is the
502 : hash table of the invariants. */
503 :
504 : static void
505 1552453 : find_identical_invariants (invariant_htab_type *eq, struct invariant *inv)
506 : {
507 1552453 : unsigned depno;
508 1552453 : bitmap_iterator bi;
509 1552453 : struct invariant *dep;
510 1552453 : rtx expr, set;
511 1552453 : machine_mode mode;
512 1552453 : struct invariant *tmp;
513 :
514 1552453 : if (inv->eqto != ~0u)
515 260095 : return;
516 :
517 1552453 : EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, depno, bi)
518 : {
519 260095 : dep = invariants[depno];
520 260095 : find_identical_invariants (eq, dep);
521 : }
522 :
523 1292358 : set = single_set (inv->insn);
524 1292358 : expr = SET_SRC (set);
525 1292358 : mode = GET_MODE (expr);
526 1292358 : if (mode == VOIDmode)
527 383663 : mode = GET_MODE (SET_DEST (set));
528 :
529 1292358 : tmp = find_or_insert_inv (eq, expr, mode, inv);
530 1292358 : inv->eqto = tmp->invno;
531 :
532 1292358 : if (tmp->invno != inv->invno && inv->always_executed)
533 64056 : tmp->eqno++;
534 :
535 1292358 : if (dump_file && inv->eqto != inv->invno)
536 0 : fprintf (dump_file,
537 : "Invariant %d is equivalent to invariant %d.\n",
538 : inv->invno, inv->eqto);
539 : }
540 :
541 : /* Find invariants with the same value and record the equivalences. */
542 :
543 : static void
544 661851 : merge_identical_invariants (void)
545 : {
546 661851 : unsigned i;
547 661851 : struct invariant *inv;
548 661851 : invariant_htab_type eq (invariants.length ());
549 :
550 2616060 : FOR_EACH_VEC_ELT (invariants, i, inv)
551 1292358 : find_identical_invariants (&eq, inv);
552 661851 : }
553 :
554 : /* Determines the basic blocks inside LOOP that are always executed and
555 : stores their bitmap to ALWAYS_REACHED. MAY_EXIT is a bitmap of
556 : basic blocks that may either exit the loop, or contain the call that
557 : does not have to return. BODY is body of the loop obtained by
558 : get_loop_body_in_dom_order. */
559 :
560 : static void
561 1323702 : compute_always_reached (class loop *loop, basic_block *body,
562 : bitmap may_exit, bitmap always_reached)
563 : {
564 1323702 : unsigned i;
565 :
566 2507375 : for (i = 0; i < loop->num_nodes; i++)
567 : {
568 2493429 : if (dominated_by_p (CDI_DOMINATORS, loop->latch, body[i]))
569 1580188 : bitmap_set_bit (always_reached, i);
570 :
571 2493429 : if (bitmap_bit_p (may_exit, i))
572 : return;
573 : }
574 : }
575 :
576 : /* Finds exits out of the LOOP with body BODY. Marks blocks in that we may
577 : exit the loop by cfg edge to HAS_EXIT and MAY_EXIT. In MAY_EXIT
578 : additionally mark blocks that may exit due to a call. */
579 :
580 : static void
581 661851 : find_exits (class loop *loop, basic_block *body,
582 : bitmap may_exit, bitmap has_exit)
583 : {
584 661851 : unsigned i;
585 661851 : edge_iterator ei;
586 661851 : edge e;
587 661851 : class loop *outermost_exit = loop, *aexit;
588 661851 : bool has_call = false;
589 661851 : rtx_insn *insn;
590 :
591 4750719 : for (i = 0; i < loop->num_nodes; i++)
592 : {
593 4088868 : if (body[i]->loop_father == loop)
594 : {
595 29675147 : FOR_BB_INSNS (body[i], insn)
596 : {
597 27023704 : if (CALL_P (insn)
598 27023704 : && (RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
599 474036 : || !RTL_CONST_OR_PURE_CALL_P (insn)))
600 : {
601 431986 : has_call = true;
602 431986 : bitmap_set_bit (may_exit, i);
603 431986 : break;
604 : }
605 : }
606 :
607 7882807 : FOR_EACH_EDGE (e, ei, body[i]->succs)
608 : {
609 4799378 : if (! flow_bb_inside_loop_p (loop, e->dest))
610 : {
611 1119946 : bitmap_set_bit (may_exit, i);
612 1119946 : bitmap_set_bit (has_exit, i);
613 1119946 : outermost_exit = find_common_loop (outermost_exit,
614 1119946 : e->dest->loop_father);
615 : }
616 : /* If we enter a subloop that might never terminate treat
617 : it like a possible exit. */
618 4799378 : if (flow_loop_nested_p (loop, e->dest->loop_father))
619 152740 : bitmap_set_bit (may_exit, i);
620 : }
621 3083429 : continue;
622 3083429 : }
623 :
624 : /* Use the data stored for the subloop to decide whether we may exit
625 : through it. It is sufficient to do this for header of the loop,
626 : as other basic blocks inside it must be dominated by it. */
627 1005439 : if (body[i]->loop_father->header != body[i])
628 775946 : continue;
629 :
630 229493 : if (LOOP_DATA (body[i]->loop_father)->has_call)
631 : {
632 62777 : has_call = true;
633 62777 : bitmap_set_bit (may_exit, i);
634 : }
635 229493 : aexit = LOOP_DATA (body[i]->loop_father)->outermost_exit;
636 229493 : if (aexit != loop)
637 : {
638 114126 : bitmap_set_bit (may_exit, i);
639 114126 : bitmap_set_bit (has_exit, i);
640 :
641 114126 : if (flow_loop_nested_p (aexit, outermost_exit))
642 4088868 : outermost_exit = aexit;
643 : }
644 : }
645 :
646 661851 : if (loop->aux == NULL)
647 : {
648 661850 : loop->aux = xcalloc (1, sizeof (class loop_data));
649 661850 : bitmap_initialize (&LOOP_DATA (loop)->regs_ref, ®_obstack);
650 661850 : bitmap_initialize (&LOOP_DATA (loop)->regs_live, ®_obstack);
651 : }
652 661851 : LOOP_DATA (loop)->outermost_exit = outermost_exit;
653 661851 : LOOP_DATA (loop)->has_call = has_call;
654 661851 : }
655 :
656 : /* Check whether we may assign a value to X from a register. */
657 :
658 : static bool
659 13350306 : may_assign_reg_p (rtx x)
660 : {
661 13350306 : return (GET_MODE (x) != VOIDmode
662 13348807 : && GET_MODE (x) != BLKmode
663 13331766 : && can_copy_p (GET_MODE (x))
664 : /* Do not mess with the frame pointer adjustments that can
665 : be generated e.g. by expand_builtin_setjmp_receiver. */
666 11357144 : && x != frame_pointer_rtx
667 24707450 : && (!REG_P (x)
668 9603926 : || !HARD_REGISTER_P (x)
669 1639464 : || REGNO_REG_CLASS (REGNO (x)) != NO_REGS));
670 : }
671 :
672 : /* Finds definitions that may correspond to invariants in LOOP with body
673 : BODY. */
674 :
675 : static void
676 661851 : find_defs (class loop *loop)
677 : {
678 661851 : if (dump_file)
679 : {
680 21 : fprintf (dump_file,
681 : "*****starting processing of loop %d ******\n",
682 : loop->num);
683 : }
684 :
685 661851 : df_chain_add_problem (DF_UD_CHAIN);
686 661851 : df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
687 661851 : df_analyze_loop (loop);
688 661851 : check_invariant_table_size ();
689 :
690 661851 : if (dump_file)
691 : {
692 21 : df_dump_region (dump_file);
693 21 : fprintf (dump_file,
694 : "*****ending processing of loop %d ******\n",
695 : loop->num);
696 : }
697 661851 : }
698 :
699 : /* Creates a new invariant for definition DEF in INSN, depending on invariants
700 : in DEPENDS_ON. ALWAYS_EXECUTED is true if the insn is always executed,
701 : unless the program ends due to a function call. The newly created invariant
702 : is returned. */
703 :
704 : static struct invariant *
705 1292358 : create_new_invariant (struct def *def, rtx_insn *insn, bitmap depends_on,
706 : bool always_executed)
707 : {
708 1292358 : struct invariant *inv = XNEW (struct invariant);
709 1292358 : rtx set = single_set (insn);
710 1292358 : bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
711 :
712 1292358 : inv->def = def;
713 1292358 : inv->always_executed = always_executed;
714 1292358 : inv->depends_on = depends_on;
715 :
716 : /* If the set is simple, usually by moving it we move the whole store out of
717 : the loop. Otherwise we save only cost of the computation. */
718 1292358 : if (def)
719 : {
720 615411 : inv->cost = set_rtx_cost (set, speed);
721 : /* ??? Try to determine cheapness of address computation. Unfortunately
722 : the address cost is only a relative measure, we can't really compare
723 : it with any absolute number, but only with other address costs.
724 : But here we don't have any other addresses, so compare with a magic
725 : number anyway. It has to be large enough to not regress PR33928
726 : (by avoiding to move reg+8,reg+16,reg+24 invariants), but small
727 : enough to not regress 410.bwaves either (by still moving reg+reg
728 : invariants).
729 : See http://gcc.gnu.org/ml/gcc-patches/2009-10/msg01210.html . */
730 615411 : if (SCALAR_INT_MODE_P (GET_MODE (SET_DEST (set))))
731 467380 : inv->cheap_address = address_cost (SET_SRC (set), word_mode,
732 467380 : ADDR_SPACE_GENERIC, speed) < 3;
733 : else
734 148031 : inv->cheap_address = false;
735 : }
736 : else
737 : {
738 676947 : inv->cost = set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)),
739 : speed);
740 676947 : inv->cheap_address = false;
741 : }
742 :
743 1292358 : inv->move = false;
744 1292358 : inv->reg = NULL_RTX;
745 1292358 : inv->orig_regno = -1;
746 1292358 : inv->stamp = 0;
747 1292358 : inv->insn = insn;
748 :
749 1292358 : inv->invno = invariants.length ();
750 1292358 : inv->eqto = ~0u;
751 :
752 : /* Itself. */
753 1292358 : inv->eqno = 1;
754 :
755 1292358 : if (def)
756 615411 : def->invno = inv->invno;
757 1292358 : invariants.safe_push (inv);
758 :
759 1292358 : if (dump_file)
760 : {
761 12 : fprintf (dump_file,
762 : "Set in insn %d is invariant (%d), cost %d, depends on ",
763 12 : INSN_UID (insn), inv->invno, inv->cost);
764 12 : dump_bitmap (dump_file, inv->depends_on);
765 : }
766 :
767 1292358 : return inv;
768 : }
769 :
770 : /* Return a canonical version of X for the address, from the point of view,
771 : that all multiplications are represented as MULT instead of the multiply
772 : by a power of 2 being represented as ASHIFT.
773 :
774 : Callers should prepare a copy of X because this function may modify it
775 : in place. */
776 :
777 : static void
778 30396 : canonicalize_address_mult (rtx x)
779 : {
780 30396 : subrtx_var_iterator::array_type array;
781 211642 : FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
782 : {
783 181246 : rtx sub = *iter;
784 181246 : scalar_int_mode sub_mode;
785 312004 : if (is_a <scalar_int_mode> (GET_MODE (sub), &sub_mode)
786 130758 : && GET_CODE (sub) == ASHIFT
787 180 : && CONST_INT_P (XEXP (sub, 1))
788 360 : && INTVAL (XEXP (sub, 1)) < GET_MODE_BITSIZE (sub_mode)
789 180 : && INTVAL (XEXP (sub, 1)) >= 0)
790 : {
791 180 : HOST_WIDE_INT shift = INTVAL (XEXP (sub, 1));
792 180 : PUT_CODE (sub, MULT);
793 180 : XEXP (sub, 1) = gen_int_mode (HOST_WIDE_INT_1 << shift, sub_mode);
794 180 : iter.skip_subrtxes ();
795 : }
796 : }
797 30396 : }
798 :
799 : /* Maximum number of sub expressions in address. We set it to
800 : a small integer since it's unlikely to have a complicated
801 : address expression. */
802 :
803 : #define MAX_CANON_ADDR_PARTS (5)
804 :
805 : /* Collect sub expressions in address X with PLUS as the seperator.
806 : Sub expressions are stored in vector ADDR_PARTS. */
807 :
808 : static void
809 30396 : collect_address_parts (rtx x, vec<rtx> *addr_parts)
810 : {
811 30396 : subrtx_var_iterator::array_type array;
812 172846 : FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
813 : {
814 142450 : rtx sub = *iter;
815 :
816 142450 : if (GET_CODE (sub) != PLUS)
817 : {
818 86423 : addr_parts->safe_push (sub);
819 86423 : iter.skip_subrtxes ();
820 : }
821 : }
822 30396 : }
823 :
824 : /* Compare function for sorting sub expressions X and Y based on
825 : precedence defined for communitive operations. */
826 :
827 : static int
828 301517 : compare_address_parts (const void *x, const void *y)
829 : {
830 301517 : const rtx *rx = (const rtx *)x;
831 301517 : const rtx *ry = (const rtx *)y;
832 301517 : int px = commutative_operand_precedence (*rx);
833 301517 : int py = commutative_operand_precedence (*ry);
834 :
835 301517 : return (py - px);
836 : }
837 :
838 : /* Return a canonical version address for X by following steps:
839 : 1) Rewrite ASHIFT into MULT recursively.
840 : 2) Divide address into sub expressions with PLUS as the
841 : separator.
842 : 3) Sort sub expressions according to precedence defined
843 : for communative operations.
844 : 4) Simplify CONST_INT_P sub expressions.
845 : 5) Create new canonicalized address and return.
846 : Callers should prepare a copy of X because this function may
847 : modify it in place. */
848 :
849 : static rtx
850 30396 : canonicalize_address (rtx x)
851 : {
852 30396 : rtx res;
853 30396 : unsigned int i, j;
854 30396 : machine_mode mode = GET_MODE (x);
855 30396 : auto_vec<rtx, MAX_CANON_ADDR_PARTS> addr_parts;
856 :
857 : /* Rewrite ASHIFT into MULT. */
858 30396 : canonicalize_address_mult (x);
859 : /* Divide address into sub expressions. */
860 30396 : collect_address_parts (x, &addr_parts);
861 : /* Unlikely to have very complicated address. */
862 30396 : if (addr_parts.length () < 2
863 30396 : || addr_parts.length () > MAX_CANON_ADDR_PARTS)
864 : return x;
865 :
866 : /* Sort sub expressions according to canonicalization precedence. */
867 30371 : addr_parts.qsort (compare_address_parts);
868 :
869 : /* Simplify all constant int summary if possible. */
870 115335 : for (i = 0; i < addr_parts.length (); i++)
871 82797 : if (CONST_INT_P (addr_parts[i]))
872 : break;
873 :
874 33972 : for (j = i + 1; j < addr_parts.length (); j++)
875 : {
876 3601 : gcc_assert (CONST_INT_P (addr_parts[j]));
877 3601 : addr_parts[i] = simplify_gen_binary (PLUS, mode,
878 3601 : addr_parts[i],
879 3601 : addr_parts[j]);
880 : }
881 :
882 : /* Chain PLUS operators to the left for !CONST_INT_P sub expressions. */
883 30371 : res = addr_parts[0];
884 54607 : for (j = 1; j < i; j++)
885 24236 : res = simplify_gen_binary (PLUS, mode, res, addr_parts[j]);
886 :
887 : /* Pickup the last CONST_INT_P sub expression. */
888 58600 : if (i < addr_parts.length ())
889 28204 : res = simplify_gen_binary (PLUS, mode, res, addr_parts[i]);
890 :
891 : return res;
892 30396 : }
893 :
894 : /* Given invariant DEF and its address USE, check if the corresponding
895 : invariant expr can be propagated into the use or not. */
896 :
897 : static bool
898 53595 : inv_can_prop_to_addr_use (struct def *def, df_ref use)
899 : {
900 53595 : struct invariant *inv;
901 53595 : rtx *pos = DF_REF_REAL_LOC (use), def_set, use_set;
902 53595 : rtx_insn *use_insn = DF_REF_INSN (use);
903 53595 : rtx_insn *def_insn;
904 53595 : bool ok;
905 :
906 53595 : inv = invariants[def->invno];
907 : /* No need to check if address expression is expensive. */
908 53595 : if (!inv->cheap_address)
909 : return false;
910 :
911 46102 : def_insn = inv->insn;
912 46102 : def_set = single_set (def_insn);
913 46102 : if (!def_set)
914 : return false;
915 :
916 46102 : validate_unshare_change (use_insn, pos, SET_SRC (def_set), true);
917 46102 : ok = verify_changes (0);
918 : /* Try harder with canonicalization in address expression. */
919 46102 : if (!ok && (use_set = single_set (use_insn)) != NULL_RTX)
920 : {
921 34008 : rtx src, dest, mem = NULL_RTX;
922 :
923 34008 : src = SET_SRC (use_set);
924 34008 : dest = SET_DEST (use_set);
925 34008 : if (MEM_P (src))
926 : mem = src;
927 10555 : else if (MEM_P (dest))
928 : mem = dest;
929 :
930 : if (mem != NULL_RTX
931 64430 : && !memory_address_addr_space_p (GET_MODE (mem),
932 : XEXP (mem, 0),
933 30422 : MEM_ADDR_SPACE (mem)))
934 : {
935 30396 : rtx addr = canonicalize_address (copy_rtx (XEXP (mem, 0)));
936 60792 : if (memory_address_addr_space_p (GET_MODE (mem),
937 30396 : addr, MEM_ADDR_SPACE (mem)))
938 46102 : ok = true;
939 : }
940 : }
941 46102 : cancel_changes (0);
942 46102 : return ok;
943 : }
944 :
945 : /* Record USE at DEF. */
946 :
947 : static void
948 670049 : record_use (struct def *def, df_ref use)
949 : {
950 670049 : struct use *u = XNEW (struct use);
951 :
952 670049 : u->pos = DF_REF_REAL_LOC (use);
953 670049 : u->insn = DF_REF_INSN (use);
954 670049 : u->addr_use_p = (DF_REF_TYPE (use) == DF_REF_REG_MEM_LOAD
955 670049 : || DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE);
956 670049 : u->next = def->uses;
957 670049 : def->uses = u;
958 670049 : def->n_uses++;
959 670049 : if (u->addr_use_p)
960 : {
961 : /* Initialize propagation information if this is the first addr
962 : use of the inv def. */
963 59334 : if (def->n_addr_uses == 0)
964 44701 : def->can_prop_to_addr_uses = true;
965 :
966 59334 : def->n_addr_uses++;
967 59334 : if (def->can_prop_to_addr_uses && !inv_can_prop_to_addr_use (def, use))
968 13649 : def->can_prop_to_addr_uses = false;
969 : }
970 670049 : }
971 :
972 : /* Finds the invariants USE depends on and store them to the DEPENDS_ON
973 : bitmap. Returns true if all dependencies of USE are known to be
974 : loop invariants, false otherwise. */
975 :
976 : static bool
977 7035522 : check_dependency (basic_block bb, df_ref use, bitmap depends_on)
978 : {
979 7035522 : df_ref def;
980 7035522 : basic_block def_bb;
981 7035522 : struct df_link *defs;
982 7035522 : struct def *def_data;
983 7035522 : struct invariant *inv;
984 :
985 7035522 : if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
986 : return false;
987 :
988 7001570 : defs = DF_REF_CHAIN (use);
989 7001570 : if (!defs)
990 : {
991 1283972 : unsigned int regno = DF_REF_REGNO (use);
992 :
993 : /* If this is the use of an uninitialized argument register that is
994 : likely to be spilled, do not move it lest this might extend its
995 : lifetime and cause reload to die. This can occur for a call to
996 : a function taking complex number arguments and moving the insns
997 : preparing the arguments without moving the call itself wouldn't
998 : gain much in practice. */
999 1283972 : if ((DF_REF_FLAGS (use) & DF_HARD_REG_LIVE)
1000 325 : && FUNCTION_ARG_REGNO_P (regno)
1001 1283977 : && targetm.class_likely_spilled_p (REGNO_REG_CLASS (regno)))
1002 : return false;
1003 :
1004 1283971 : return true;
1005 : }
1006 :
1007 5717598 : if (defs->next)
1008 : return false;
1009 :
1010 5186275 : def = defs->ref;
1011 5186275 : check_invariant_table_size ();
1012 5186275 : inv = invariant_table[DF_REF_ID (def)];
1013 5186275 : if (!inv)
1014 : return false;
1015 :
1016 286809 : def_data = inv->def;
1017 286809 : gcc_assert (def_data != NULL);
1018 :
1019 286809 : def_bb = DF_REF_BB (def);
1020 : /* Note that in case bb == def_bb, we know that the definition
1021 : dominates insn, because def has invariant_table[DF_REF_ID(def)]
1022 : defined and we process the insns in the basic block bb
1023 : sequentially. */
1024 286809 : if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb))
1025 : return false;
1026 :
1027 286728 : bitmap_set_bit (depends_on, def_data->invno);
1028 286728 : return true;
1029 : }
1030 :
1031 :
1032 : /* Finds the invariants INSN depends on and store them to the DEPENDS_ON
1033 : bitmap. Returns true if all dependencies of INSN are known to be
1034 : loop invariants, false otherwise. */
1035 :
1036 : static bool
1037 6757181 : check_dependencies (rtx_insn *insn, bitmap depends_on)
1038 : {
1039 6757181 : struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1040 6757181 : df_ref use;
1041 6757181 : basic_block bb = BLOCK_FOR_INSN (insn);
1042 :
1043 8192221 : FOR_EACH_INSN_INFO_USE (use, insn_info)
1044 6899863 : if (!check_dependency (bb, use, depends_on))
1045 : return false;
1046 1428017 : FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
1047 135659 : if (!check_dependency (bb, use, depends_on))
1048 : return false;
1049 :
1050 : return true;
1051 : }
1052 :
1053 : /* Pre-check candidate DEST to skip the one which cannot make a valid insn
1054 : during move_invariant_reg. SIMPLE is to skip HARD_REGISTER. */
1055 : static bool
1056 11357144 : pre_check_invariant_p (bool simple, rtx dest)
1057 : {
1058 11357144 : if (simple && REG_P (dest) && DF_REG_DEF_COUNT (REGNO (dest)) > 1)
1059 : {
1060 2660206 : df_ref use;
1061 2660206 : unsigned int i = REGNO (dest);
1062 2660206 : struct df_insn_info *insn_info;
1063 2660206 : df_ref def_rec;
1064 :
1065 12085970 : for (use = DF_REG_USE_CHAIN (i); use; use = DF_REF_NEXT_REG (use))
1066 : {
1067 11303128 : rtx_insn *ref = DF_REF_INSN (use);
1068 11303128 : insn_info = DF_INSN_INFO_GET (ref);
1069 :
1070 19865353 : FOR_EACH_INSN_INFO_DEF (def_rec, insn_info)
1071 10439589 : if (DF_REF_REGNO (def_rec) == i)
1072 : {
1073 : /* Multi definitions at this stage, most likely are due to
1074 : instruction constraints, which requires both read and write
1075 : on the same register. Since move_invariant_reg is not
1076 : powerful enough to handle such cases, just ignore the INV
1077 : and leave the chance to others. */
1078 : return false;
1079 : }
1080 : }
1081 : }
1082 : return true;
1083 : }
1084 :
1085 : /* Finds invariant in INSN. ALWAYS_REACHED is true if the insn is always
1086 : executed. ALWAYS_EXECUTED is true if the insn is always executed,
1087 : unless the program ends due to a function call. */
1088 :
1089 : static void
1090 15584079 : find_invariant_insn (rtx_insn *insn, bool always_reached, bool always_executed)
1091 : {
1092 15584079 : df_ref ref;
1093 15584079 : struct def *def;
1094 15584079 : bitmap depends_on;
1095 15584079 : rtx set, dest;
1096 15584079 : bool simple = true;
1097 15584079 : struct invariant *inv;
1098 :
1099 : /* Jumps have control flow side-effects. */
1100 15584079 : if (JUMP_P (insn))
1101 : return;
1102 :
1103 13713159 : set = single_set (insn);
1104 13713159 : if (!set)
1105 : return;
1106 13350306 : dest = SET_DEST (set);
1107 :
1108 13350306 : if (!REG_P (dest)
1109 13350306 : || HARD_REGISTER_P (dest))
1110 : simple = false;
1111 :
1112 13350306 : if (!may_assign_reg_p (dest)
1113 11357144 : || !pre_check_invariant_p (simple, dest)
1114 22830086 : || !check_maybe_invariant (SET_SRC (set)))
1115 : return;
1116 :
1117 : /* If the insn can throw exception, we cannot move it at all without changing
1118 : cfg. */
1119 7418337 : if (can_throw_internal (insn))
1120 : return;
1121 :
1122 : /* We cannot make trapping insn executed, unless it was executed before. */
1123 7411551 : if (may_trap_or_fault_p (PATTERN (insn)) && !always_reached)
1124 : return;
1125 :
1126 : /* inline-asm that is not always executed cannot be moved
1127 : as it might conditionally trap. */
1128 6757226 : if (!always_reached && asm_noperands (PATTERN (insn)) >= 0)
1129 : return;
1130 :
1131 6757181 : depends_on = BITMAP_ALLOC (NULL);
1132 6757181 : if (!check_dependencies (insn, depends_on))
1133 : {
1134 5464823 : BITMAP_FREE (depends_on);
1135 5464823 : return;
1136 : }
1137 :
1138 1292358 : if (simple)
1139 615411 : def = XCNEW (struct def);
1140 : else
1141 : def = NULL;
1142 :
1143 1292358 : inv = create_new_invariant (def, insn, depends_on, always_executed);
1144 :
1145 1292358 : if (simple)
1146 : {
1147 615411 : ref = df_find_def (insn, dest);
1148 615411 : check_invariant_table_size ();
1149 615411 : invariant_table[DF_REF_ID (ref)] = inv;
1150 : }
1151 : }
1152 :
1153 : /* Record registers used in INSN that have a unique invariant definition. */
1154 :
1155 : static void
1156 15584079 : record_uses (rtx_insn *insn)
1157 : {
1158 15584079 : struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1159 15584079 : df_ref use;
1160 15584079 : struct invariant *inv;
1161 :
1162 37703483 : FOR_EACH_INSN_INFO_USE (use, insn_info)
1163 : {
1164 22119404 : inv = invariant_for_use (use);
1165 22119404 : if (inv)
1166 669257 : record_use (inv->def, use);
1167 : }
1168 16029186 : FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
1169 : {
1170 445107 : inv = invariant_for_use (use);
1171 445107 : if (inv)
1172 792 : record_use (inv->def, use);
1173 : }
1174 15584079 : }
1175 :
1176 : /* Finds invariants in INSN. ALWAYS_REACHED is true if the insn is always
1177 : executed. ALWAYS_EXECUTED is true if the insn is always executed,
1178 : unless the program ends due to a function call. */
1179 :
1180 : static void
1181 15584079 : find_invariants_insn (rtx_insn *insn, bool always_reached, bool always_executed)
1182 : {
1183 0 : find_invariant_insn (insn, always_reached, always_executed);
1184 15584079 : record_uses (insn);
1185 0 : }
1186 :
1187 : /* Finds invariants in basic block BB. ALWAYS_REACHED is true if the
1188 : basic block is always executed. ALWAYS_EXECUTED is true if the basic
1189 : block is always executed, unless the program ends due to a function
1190 : call. */
1191 :
1192 : static void
1193 4088868 : find_invariants_bb (class loop *loop, basic_block bb, bool always_reached,
1194 : bool always_executed)
1195 : {
1196 4088868 : rtx_insn *insn;
1197 4088868 : basic_block preheader = loop_preheader_edge (loop)->src;
1198 :
1199 : /* Don't move insn of cold BB out of loop to preheader to reduce calculations
1200 : and register live range in hot loop with cold BB. */
1201 4088868 : if (!always_executed && preheader->count > bb->count)
1202 : {
1203 674128 : if (dump_file)
1204 2 : fprintf (dump_file, "Don't move invariant from bb: %d out of loop %d\n",
1205 : bb->index, loop->num);
1206 674128 : return;
1207 : }
1208 :
1209 38622814 : FOR_BB_INSNS (bb, insn)
1210 : {
1211 35208074 : if (!NONDEBUG_INSN_P (insn))
1212 19623995 : continue;
1213 :
1214 15584079 : find_invariants_insn (insn, always_reached, always_executed);
1215 :
1216 15584079 : if (always_reached
1217 4061678 : && CALL_P (insn)
1218 15701561 : && (RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
1219 116763 : || ! RTL_CONST_OR_PURE_CALL_P (insn)))
1220 : always_reached = false;
1221 : }
1222 : }
1223 :
1224 : /* Finds invariants in LOOP with body BODY. ALWAYS_REACHED is the bitmap of
1225 : basic blocks in BODY that are always executed. ALWAYS_EXECUTED is the
1226 : bitmap of basic blocks in BODY that are always executed unless the program
1227 : ends due to a function call. */
1228 :
1229 : static void
1230 661851 : find_invariants_body (class loop *loop, basic_block *body,
1231 : bitmap always_reached, bitmap always_executed)
1232 : {
1233 661851 : unsigned i;
1234 :
1235 4750719 : for (i = 0; i < loop->num_nodes; i++)
1236 4088868 : find_invariants_bb (loop, body[i], bitmap_bit_p (always_reached, i),
1237 4088868 : bitmap_bit_p (always_executed, i));
1238 661851 : }
1239 :
1240 : /* Finds invariants in LOOP. */
1241 :
1242 : static void
1243 661851 : find_invariants (class loop *loop)
1244 : {
1245 661851 : auto_bitmap may_exit;
1246 661851 : auto_bitmap always_reached;
1247 661851 : auto_bitmap has_exit;
1248 661851 : auto_bitmap always_executed;
1249 661851 : basic_block *body = get_loop_body_in_dom_order (loop);
1250 :
1251 661851 : find_exits (loop, body, may_exit, has_exit);
1252 661851 : compute_always_reached (loop, body, may_exit, always_reached);
1253 661851 : compute_always_reached (loop, body, has_exit, always_executed);
1254 :
1255 661851 : find_defs (loop);
1256 661851 : find_invariants_body (loop, body, always_reached, always_executed);
1257 661851 : merge_identical_invariants ();
1258 :
1259 661851 : free (body);
1260 661851 : }
1261 :
1262 : /* Frees a list of uses USE. */
1263 :
1264 : static void
1265 615411 : free_use_list (struct use *use)
1266 : {
1267 615411 : struct use *next;
1268 :
1269 1285460 : for (; use; use = next)
1270 : {
1271 670049 : next = use->next;
1272 670049 : free (use);
1273 : }
1274 0 : }
1275 :
1276 : /* Return pressure class and number of hard registers (through *NREGS)
1277 : for destination of INSN. */
1278 : static enum reg_class
1279 15 : get_pressure_class_and_nregs (rtx_insn *insn, int *nregs)
1280 : {
1281 15 : rtx reg;
1282 15 : enum reg_class pressure_class;
1283 15 : rtx set = single_set (insn);
1284 :
1285 : /* Considered invariant insns have only one set. */
1286 15 : gcc_assert (set != NULL_RTX);
1287 15 : reg = SET_DEST (set);
1288 15 : if (GET_CODE (reg) == SUBREG)
1289 0 : reg = SUBREG_REG (reg);
1290 15 : if (MEM_P (reg))
1291 : {
1292 0 : *nregs = 0;
1293 0 : pressure_class = NO_REGS;
1294 : }
1295 : else
1296 : {
1297 15 : if (! REG_P (reg))
1298 : reg = NULL_RTX;
1299 15 : if (reg == NULL_RTX)
1300 : pressure_class = GENERAL_REGS;
1301 : else
1302 : {
1303 15 : pressure_class = reg_allocno_class (REGNO (reg));
1304 15 : pressure_class = ira_pressure_class_translate[pressure_class];
1305 : }
1306 15 : *nregs
1307 15 : = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))];
1308 : }
1309 15 : return pressure_class;
1310 : }
1311 :
1312 : /* Calculates cost and number of registers needed for moving invariant INV
1313 : out of the loop and stores them to *COST and *REGS_NEEDED. *CL will be
1314 : the REG_CLASS of INV. Return
1315 : -1: if INV is invalid.
1316 : 0: if INV and its depends_on have same reg_class
1317 : 1: if INV and its depends_on have different reg_classes. */
1318 :
1319 : static int
1320 2558341 : get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed,
1321 : enum reg_class *cl)
1322 : {
1323 2558341 : int i, acomp_cost;
1324 2558341 : unsigned aregs_needed[N_REG_CLASSES];
1325 2558341 : unsigned depno;
1326 2558341 : struct invariant *dep;
1327 2558341 : bitmap_iterator bi;
1328 2558341 : int ret = 1;
1329 :
1330 : /* Find the representative of the class of the equivalent invariants. */
1331 2558341 : inv = invariants[inv->eqto];
1332 :
1333 2558341 : *comp_cost = 0;
1334 2558341 : if (! flag_ira_loop_pressure)
1335 2558326 : regs_needed[0] = 0;
1336 : else
1337 : {
1338 75 : for (i = 0; i < ira_pressure_classes_num; i++)
1339 60 : regs_needed[ira_pressure_classes[i]] = 0;
1340 : }
1341 :
1342 2558341 : if (inv->move
1343 2555914 : || inv->stamp == actual_stamp)
1344 : return -1;
1345 2554709 : inv->stamp = actual_stamp;
1346 :
1347 2554709 : if (! flag_ira_loop_pressure)
1348 2554694 : regs_needed[0]++;
1349 : else
1350 : {
1351 15 : int nregs;
1352 15 : enum reg_class pressure_class;
1353 :
1354 15 : pressure_class = get_pressure_class_and_nregs (inv->insn, &nregs);
1355 15 : regs_needed[pressure_class] += nregs;
1356 15 : *cl = pressure_class;
1357 15 : ret = 0;
1358 : }
1359 :
1360 2554709 : if (!inv->cheap_address
1361 1018554 : || inv->def->n_uses == 0
1362 810152 : || inv->def->n_addr_uses < inv->def->n_uses
1363 : /* Count cost if the inv can't be propagated into address uses. */
1364 70197 : || !inv->def->can_prop_to_addr_uses)
1365 2493041 : (*comp_cost) += inv->cost * inv->eqno;
1366 :
1367 : #ifdef STACK_REGS
1368 2554709 : {
1369 : /* Hoisting constant pool constants into stack regs may cost more than
1370 : just single register. On x87, the balance is affected both by the
1371 : small number of FP registers, and by its register stack organization,
1372 : that forces us to add compensation code in and around the loop to
1373 : shuffle the operands to the top of stack before use, and pop them
1374 : from the stack after the loop finishes.
1375 :
1376 : To model this effect, we increase the number of registers needed for
1377 : stack registers by two: one register push, and one register pop.
1378 : This usually has the effect that FP constant loads from the constant
1379 : pool are not moved out of the loop.
1380 :
1381 : Note that this also means that dependent invariants cannot be moved.
1382 : However, the primary purpose of this pass is to move loop invariant
1383 : address arithmetic out of loops, and address arithmetic that depends
1384 : on floating point constants is unlikely to ever occur. */
1385 2554709 : rtx set = single_set (inv->insn);
1386 2554709 : if (set
1387 2554709 : && IS_STACK_MODE (GET_MODE (SET_SRC (set)))
1388 2566368 : && constant_pool_constant_p (SET_SRC (set)))
1389 : {
1390 7487 : if (flag_ira_loop_pressure)
1391 0 : regs_needed[ira_stack_reg_pressure_class] += 2;
1392 : else
1393 7487 : regs_needed[0] += 2;
1394 : }
1395 : }
1396 : #endif
1397 :
1398 3171131 : EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, depno, bi)
1399 : {
1400 616422 : bool check_p;
1401 616422 : enum reg_class dep_cl = ALL_REGS;
1402 616422 : int dep_ret;
1403 :
1404 616422 : dep = invariants[depno];
1405 :
1406 : /* If DEP is moved out of the loop, it is not a depends_on any more. */
1407 616422 : if (dep->move)
1408 95792 : continue;
1409 :
1410 520630 : dep_ret = get_inv_cost (dep, &acomp_cost, aregs_needed, &dep_cl);
1411 :
1412 520630 : if (! flag_ira_loop_pressure)
1413 520630 : check_p = aregs_needed[0] != 0;
1414 : else
1415 : {
1416 0 : for (i = 0; i < ira_pressure_classes_num; i++)
1417 0 : if (aregs_needed[ira_pressure_classes[i]] != 0)
1418 : break;
1419 0 : check_p = i < ira_pressure_classes_num;
1420 :
1421 0 : if ((dep_ret == 1) || ((dep_ret == 0) && (*cl != dep_cl)))
1422 : {
1423 0 : *cl = ALL_REGS;
1424 0 : ret = 1;
1425 : }
1426 : }
1427 520630 : if (check_p
1428 : /* We need to check always_executed, since if the original value of
1429 : the invariant may be preserved, we may need to keep it in a
1430 : separate register. TODO check whether the register has an
1431 : use outside of the loop. */
1432 516998 : && dep->always_executed
1433 155540 : && !dep->def->uses->next)
1434 : {
1435 : /* If this is a single use, after moving the dependency we will not
1436 : need a new register. */
1437 94295 : if (! flag_ira_loop_pressure)
1438 94295 : aregs_needed[0]--;
1439 : else
1440 : {
1441 0 : int nregs;
1442 0 : enum reg_class pressure_class;
1443 :
1444 0 : pressure_class = get_pressure_class_and_nregs (inv->insn, &nregs);
1445 0 : aregs_needed[pressure_class] -= nregs;
1446 : }
1447 : }
1448 :
1449 520630 : if (! flag_ira_loop_pressure)
1450 520630 : regs_needed[0] += aregs_needed[0];
1451 : else
1452 : {
1453 0 : for (i = 0; i < ira_pressure_classes_num; i++)
1454 0 : regs_needed[ira_pressure_classes[i]]
1455 0 : += aregs_needed[ira_pressure_classes[i]];
1456 : }
1457 520630 : (*comp_cost) += acomp_cost;
1458 : }
1459 : return ret;
1460 : }
1461 :
1462 : /* Calculates gain for eliminating invariant INV. REGS_USED is the number
1463 : of registers used in the loop, NEW_REGS is the number of new variables
1464 : already added due to the invariant motion. The number of registers needed
1465 : for it is stored in *REGS_NEEDED. SPEED and CALL_P are flags passed
1466 : through to estimate_reg_pressure_cost. */
1467 :
1468 : static int
1469 2037711 : gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
1470 : unsigned *new_regs, unsigned regs_used,
1471 : bool speed, bool call_p)
1472 : {
1473 2037711 : int comp_cost, size_cost;
1474 : /* Workaround -Wmaybe-uninitialized false positive during
1475 : profiledbootstrap by initializing it. */
1476 2037711 : enum reg_class cl = NO_REGS;
1477 2037711 : int ret;
1478 :
1479 2037711 : actual_stamp++;
1480 :
1481 2037711 : ret = get_inv_cost (inv, &comp_cost, regs_needed, &cl);
1482 :
1483 2037711 : if (! flag_ira_loop_pressure)
1484 : {
1485 2037696 : size_cost = (estimate_reg_pressure_cost (new_regs[0] + regs_needed[0],
1486 : regs_used, speed, call_p)
1487 2037696 : - estimate_reg_pressure_cost (new_regs[0],
1488 : regs_used, speed, call_p));
1489 : }
1490 15 : else if (ret < 0)
1491 : return -1;
1492 15 : else if ((ret == 0) && (cl == NO_REGS))
1493 : /* Hoist it anyway since it does not impact register pressure. */
1494 : return 1;
1495 : else
1496 : {
1497 : int i;
1498 : enum reg_class pressure_class;
1499 :
1500 19 : for (i = 0; i < ira_pressure_classes_num; i++)
1501 : {
1502 18 : pressure_class = ira_pressure_classes[i];
1503 :
1504 18 : if (!reg_classes_intersect_p (pressure_class, cl))
1505 3 : continue;
1506 :
1507 15 : if ((int) new_regs[pressure_class]
1508 15 : + (int) regs_needed[pressure_class]
1509 15 : + LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
1510 15 : + param_ira_loop_reserved_regs
1511 15 : > ira_class_hard_regs_num[pressure_class])
1512 : break;
1513 : }
1514 15 : if (i < ira_pressure_classes_num)
1515 : /* There will be register pressure excess and we want not to
1516 : make this loop invariant motion. All loop invariants with
1517 : non-positive gains will be rejected in function
1518 : find_invariants_to_move. Therefore we return the negative
1519 : number here.
1520 :
1521 : One could think that this rejects also expensive loop
1522 : invariant motions and this will hurt code performance.
1523 : However numerous experiments with different heuristics
1524 : taking invariant cost into account did not confirm this
1525 : assumption. There are possible explanations for this
1526 : result:
1527 : o probably all expensive invariants were already moved out
1528 : of the loop by PRE and gimple invariant motion pass.
1529 : o expensive invariant execution will be hidden by insn
1530 : scheduling or OOO processor hardware because usually such
1531 : invariants have a lot of freedom to be executed
1532 : out-of-order.
1533 : Another reason for ignoring invariant cost vs spilling cost
1534 : heuristics is also in difficulties to evaluate accurately
1535 : spill cost at this stage. */
1536 : return -1;
1537 : else
1538 : size_cost = 0;
1539 : }
1540 :
1541 2037697 : return comp_cost - size_cost;
1542 : }
1543 :
1544 : /* Finds invariant with best gain for moving. Returns the gain, stores
1545 : the invariant in *BEST and number of registers needed for it to
1546 : *REGS_NEEDED. REGS_USED is the number of registers used in the loop.
1547 : NEW_REGS is the number of new variables already added due to invariant
1548 : motion. */
1549 :
1550 : static int
1551 491721 : best_gain_for_invariant (struct invariant **best, unsigned *regs_needed,
1552 : unsigned *new_regs, unsigned regs_used,
1553 : bool speed, bool call_p)
1554 : {
1555 491721 : struct invariant *inv;
1556 491721 : int i, gain = 0, again;
1557 491721 : unsigned aregs_needed[N_REG_CLASSES], invno;
1558 :
1559 4246081 : FOR_EACH_VEC_ELT (invariants, invno, inv)
1560 : {
1561 3754360 : if (inv->move)
1562 636322 : continue;
1563 :
1564 : /* Only consider the "representatives" of equivalent invariants. */
1565 3118038 : if (inv->eqto != inv->invno)
1566 1080327 : continue;
1567 :
1568 2037711 : again = gain_for_invariant (inv, aregs_needed, new_regs, regs_used,
1569 : speed, call_p);
1570 2037711 : if (again > gain)
1571 : {
1572 348294 : gain = again;
1573 348294 : *best = inv;
1574 348294 : if (! flag_ira_loop_pressure)
1575 348293 : regs_needed[0] = aregs_needed[0];
1576 : else
1577 : {
1578 5 : for (i = 0; i < ira_pressure_classes_num; i++)
1579 4 : regs_needed[ira_pressure_classes[i]]
1580 4 : = aregs_needed[ira_pressure_classes[i]];
1581 : }
1582 : }
1583 : }
1584 :
1585 491721 : return gain;
1586 : }
1587 :
1588 : /* Marks invariant INVNO and all its dependencies for moving. */
1589 :
1590 : static void
1591 334319 : set_move_mark (unsigned invno, int gain)
1592 : {
1593 334319 : struct invariant *inv = invariants[invno];
1594 334319 : bitmap_iterator bi;
1595 :
1596 : /* Find the representative of the class of the equivalent invariants. */
1597 334319 : inv = invariants[inv->eqto];
1598 :
1599 334319 : if (inv->move)
1600 4298 : return;
1601 330021 : inv->move = true;
1602 :
1603 330021 : if (dump_file)
1604 : {
1605 3 : if (gain >= 0)
1606 3 : fprintf (dump_file, "Decided to move invariant %d -- gain %d\n",
1607 : invno, gain);
1608 : else
1609 0 : fprintf (dump_file, "Decided to move dependent invariant %d\n",
1610 : invno);
1611 330021 : };
1612 :
1613 412149 : EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, invno, bi)
1614 : {
1615 82128 : set_move_mark (invno, -1);
1616 : }
1617 : }
1618 :
1619 : /* Determines which invariants to move. */
1620 :
1621 : static void
1622 661851 : find_invariants_to_move (bool speed, bool call_p)
1623 : {
1624 661851 : int gain;
1625 661851 : unsigned i, regs_used, regs_needed[N_REG_CLASSES], new_regs[N_REG_CLASSES];
1626 661851 : struct invariant *inv = NULL;
1627 :
1628 661851 : if (!invariants.length ())
1629 422321 : return;
1630 :
1631 239530 : if (flag_ira_loop_pressure)
1632 : /* REGS_USED is actually never used when the flag is on. */
1633 : regs_used = 0;
1634 : else
1635 : /* We do not really do a good job in estimating number of
1636 : registers used; we put some initial bound here to stand for
1637 : induction variables etc. that we do not detect. */
1638 : {
1639 239529 : unsigned int n_regs = DF_REG_SIZE (df);
1640 :
1641 239529 : regs_used = 2;
1642 :
1643 136209438 : for (i = 0; i < n_regs; i++)
1644 : {
1645 135969909 : if (!DF_REGNO_FIRST_DEF (i) && DF_REGNO_LAST_USE (i))
1646 : {
1647 : /* This is a value that is used but not changed inside loop. */
1648 0 : regs_used++;
1649 : }
1650 : }
1651 : }
1652 :
1653 239530 : if (! flag_ira_loop_pressure)
1654 239529 : new_regs[0] = regs_needed[0] = 0;
1655 : else
1656 : {
1657 5 : for (i = 0; (int) i < ira_pressure_classes_num; i++)
1658 4 : new_regs[ira_pressure_classes[i]] = 0;
1659 : }
1660 983442 : while ((gain = best_gain_for_invariant (&inv, regs_needed,
1661 : new_regs, regs_used,
1662 491721 : speed, call_p)) > 0)
1663 : {
1664 252191 : set_move_mark (inv->invno, gain);
1665 252191 : if (! flag_ira_loop_pressure)
1666 252190 : new_regs[0] += regs_needed[0];
1667 : else
1668 : {
1669 5 : for (i = 0; (int) i < ira_pressure_classes_num; i++)
1670 4 : new_regs[ira_pressure_classes[i]]
1671 4 : += regs_needed[ira_pressure_classes[i]];
1672 : }
1673 : }
1674 : }
1675 :
1676 : /* Replace the uses, reached by the definition of invariant INV, by REG.
1677 :
1678 : IN_GROUP is nonzero if this is part of a group of changes that must be
1679 : performed as a group. In that case, the changes will be stored. The
1680 : function `apply_change_group' will validate and apply the changes. */
1681 :
1682 : static int
1683 244859 : replace_uses (struct invariant *inv, rtx reg, bool in_group)
1684 : {
1685 : /* Replace the uses we know to be dominated. It saves work for copy
1686 : propagation, and also it is necessary so that dependent invariants
1687 : are computed right. */
1688 244859 : if (inv->def)
1689 : {
1690 238764 : struct use *use;
1691 466234 : for (use = inv->def->uses; use; use = use->next)
1692 227470 : validate_change (use->insn, use->pos, reg, true);
1693 :
1694 : /* If we aren't part of a larger group, apply the changes now. */
1695 238764 : if (!in_group)
1696 44368 : return apply_change_group ();
1697 : }
1698 :
1699 : return 1;
1700 : }
1701 :
1702 : /* Whether invariant INV setting REG can be moved out of LOOP, at the end of
1703 : the block preceding its header. */
1704 :
1705 : static bool
1706 330021 : can_move_invariant_reg (class loop *loop, struct invariant *inv, rtx reg)
1707 : {
1708 330021 : df_ref def, use;
1709 330021 : unsigned int dest_regno, defs_in_loop_count = 0;
1710 330021 : rtx_insn *insn = inv->insn;
1711 330021 : basic_block bb = BLOCK_FOR_INSN (inv->insn);
1712 330021 : auto_vec <rtx_insn *, 16> debug_insns_to_reset;
1713 :
1714 : /* We ignore hard register and memory access for cost and complexity reasons.
1715 : Hard register are few at this stage and expensive to consider as they
1716 : require building a separate data flow. Memory access would require using
1717 : df_simulate_* and can_move_insns_across functions and is more complex. */
1718 330021 : if (!REG_P (reg) || HARD_REGISTER_P (reg))
1719 : return false;
1720 :
1721 : /* Check whether the set is always executed. We could omit this condition if
1722 : we know that the register is unused outside of the loop, but it does not
1723 : seem worth finding out. */
1724 329220 : if (!inv->always_executed)
1725 : return false;
1726 :
1727 : /* Check that all uses that would be dominated by def are already dominated
1728 : by it. */
1729 141496 : dest_regno = REGNO (reg);
1730 361629 : for (use = DF_REG_USE_CHAIN (dest_regno); use; use = DF_REF_NEXT_REG (use))
1731 : {
1732 221360 : rtx_insn *use_insn;
1733 221360 : basic_block use_bb;
1734 :
1735 221360 : use_insn = DF_REF_INSN (use);
1736 221360 : use_bb = BLOCK_FOR_INSN (use_insn);
1737 :
1738 : /* Ignore instruction considered for moving. */
1739 221360 : if (use_insn == insn)
1740 11865 : continue;
1741 :
1742 : /* Don't consider uses outside loop. */
1743 221360 : if (!flow_bb_inside_loop_p (loop, use_bb))
1744 11865 : continue;
1745 :
1746 : /* Don't move if a use is not dominated by def in insn. */
1747 144264 : if ((use_bb == bb && DF_INSN_LUID (insn) >= DF_INSN_LUID (use_insn))
1748 352769 : || !dominated_by_p (CDI_DOMINATORS, use_bb, bb))
1749 : {
1750 1229 : if (!DEBUG_INSN_P (use_insn))
1751 1227 : return false;
1752 2 : debug_insns_to_reset.safe_push (use_insn);
1753 : }
1754 : }
1755 :
1756 : /* Check for other defs. Any other def in the loop might reach a use
1757 : currently reached by the def in insn. */
1758 281346 : for (def = DF_REG_DEF_CHAIN (dest_regno); def; def = DF_REF_NEXT_REG (def))
1759 : {
1760 146522 : basic_block def_bb = DF_REF_BB (def);
1761 :
1762 : /* Defs in exit block cannot reach a use they weren't already. */
1763 146522 : if (single_succ_p (def_bb))
1764 : {
1765 40571 : basic_block def_bb_succ;
1766 :
1767 40571 : def_bb_succ = single_succ (def_bb);
1768 40571 : if (!flow_bb_inside_loop_p (loop, def_bb_succ))
1769 808 : continue;
1770 : }
1771 :
1772 145714 : if (++defs_in_loop_count > 1)
1773 : return false;
1774 : }
1775 :
1776 : /* Reset debug uses if a use is not dominated by def in insn. */
1777 404474 : for (auto use_insn : debug_insns_to_reset)
1778 : {
1779 2 : INSN_VAR_LOCATION_LOC (use_insn) = gen_rtx_UNKNOWN_VAR_LOC ();
1780 2 : df_insn_rescan (use_insn);
1781 : }
1782 :
1783 : return true;
1784 330021 : }
1785 :
1786 : /* Move invariant INVNO out of the LOOP. Returns true if this succeeds, false
1787 : otherwise. */
1788 :
1789 : static bool
1790 1424148 : move_invariant_reg (class loop *loop, unsigned invno)
1791 : {
1792 1424148 : struct invariant *inv = invariants[invno];
1793 1424148 : struct invariant *repr = invariants[inv->eqto];
1794 1424148 : unsigned i;
1795 1424148 : basic_block preheader = loop_preheader_edge (loop)->src;
1796 1424148 : rtx reg, set, dest, note;
1797 1424148 : bitmap_iterator bi;
1798 1424148 : int regno = -1;
1799 :
1800 1424148 : if (inv->reg)
1801 : return true;
1802 1292358 : if (!repr->move)
1803 : return false;
1804 :
1805 : /* If this is a representative of the class of equivalent invariants,
1806 : really move the invariant. Otherwise just replace its use with
1807 : the register used for the representative. */
1808 379683 : if (inv == repr)
1809 : {
1810 330021 : if (inv->depends_on)
1811 : {
1812 412149 : EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, i, bi)
1813 : {
1814 82128 : if (!move_invariant_reg (loop, i))
1815 0 : goto fail;
1816 : }
1817 : }
1818 :
1819 : /* If possible, just move the set out of the loop. Otherwise, we
1820 : need to create a temporary register. */
1821 330021 : set = single_set (inv->insn);
1822 330021 : reg = dest = SET_DEST (set);
1823 330021 : if (GET_CODE (reg) == SUBREG)
1824 36 : reg = SUBREG_REG (reg);
1825 330021 : if (REG_P (reg))
1826 329895 : regno = REGNO (reg);
1827 :
1828 330021 : if (!can_move_invariant_reg (loop, inv, dest))
1829 : {
1830 195197 : reg = gen_reg_rtx_and_attrs (dest);
1831 :
1832 : /* Try replacing the destination by a new pseudoregister. */
1833 195197 : validate_change (inv->insn, &SET_DEST (set), reg, true);
1834 :
1835 : /* As well as all the dominated uses. */
1836 195197 : replace_uses (inv, reg, true);
1837 :
1838 : /* And validate all the changes. */
1839 195197 : if (!apply_change_group ())
1840 39 : goto fail;
1841 :
1842 195158 : emit_insn_after (gen_move_insn (dest, reg), inv->insn);
1843 : }
1844 134824 : else if (dump_file)
1845 1 : fprintf (dump_file, "Invariant %d moved without introducing a new "
1846 : "temporary register\n", invno);
1847 329982 : if (JUMP_P (BB_END (preheader)))
1848 6 : preheader = split_edge (loop_preheader_edge (loop));
1849 329982 : reorder_insns (inv->insn, inv->insn, BB_END (preheader));
1850 329982 : df_recompute_luids (preheader);
1851 :
1852 : /* If there is a REG_EQUAL note on the insn we just moved, and the
1853 : insn is in a basic block that is not always executed or the note
1854 : contains something for which we don't know the invariant status,
1855 : the note may no longer be valid after we move the insn. Note that
1856 : uses in REG_EQUAL notes are taken into account in the computation
1857 : of invariants, so it is safe to retain the note even if it contains
1858 : register references for which we know the invariant status. */
1859 329982 : if ((note = find_reg_note (inv->insn, REG_EQUAL, NULL_RTX))
1860 329982 : && (!inv->always_executed
1861 35697 : || !check_maybe_invariant (XEXP (note, 0))))
1862 26434 : remove_note (inv->insn, note);
1863 : }
1864 : else
1865 : {
1866 49662 : if (!move_invariant_reg (loop, repr->invno))
1867 0 : goto fail;
1868 49662 : reg = repr->reg;
1869 49662 : regno = repr->orig_regno;
1870 49662 : if (!replace_uses (inv, reg, false))
1871 0 : goto fail;
1872 49662 : set = single_set (inv->insn);
1873 49662 : emit_insn_after (gen_move_insn (SET_DEST (set), reg), inv->insn);
1874 49662 : delete_insn (inv->insn);
1875 : }
1876 :
1877 379644 : inv->reg = reg;
1878 379644 : inv->orig_regno = regno;
1879 :
1880 379644 : return true;
1881 :
1882 39 : fail:
1883 : /* If we failed, clear move flag, so that we do not try to move inv
1884 : again. */
1885 39 : if (dump_file)
1886 0 : fprintf (dump_file, "Failed to move invariant %d\n", invno);
1887 39 : inv->move = false;
1888 39 : inv->reg = NULL_RTX;
1889 39 : inv->orig_regno = -1;
1890 :
1891 39 : return false;
1892 : }
1893 :
1894 : /* Move selected invariant out of the LOOP. Newly created regs are marked
1895 : in TEMPORARY_REGS. */
1896 :
1897 : static void
1898 661851 : move_invariants (class loop *loop)
1899 : {
1900 661851 : struct invariant *inv;
1901 661851 : unsigned i;
1902 :
1903 1954209 : FOR_EACH_VEC_ELT (invariants, i, inv)
1904 1292358 : move_invariant_reg (loop, i);
1905 661851 : if (flag_ira_loop_pressure && resize_reg_info ())
1906 : {
1907 9 : FOR_EACH_VEC_ELT (invariants, i, inv)
1908 8 : if (inv->reg != NULL_RTX)
1909 : {
1910 1 : if (inv->orig_regno >= 0)
1911 1 : setup_reg_classes (REGNO (inv->reg),
1912 : reg_preferred_class (inv->orig_regno),
1913 : reg_alternate_class (inv->orig_regno),
1914 : reg_allocno_class (inv->orig_regno));
1915 : else
1916 0 : setup_reg_classes (REGNO (inv->reg),
1917 : GENERAL_REGS, NO_REGS, GENERAL_REGS);
1918 : }
1919 : }
1920 : /* Remove the DF_UD_CHAIN problem added in find_defs before rescanning,
1921 : to save a bit of compile time. */
1922 661851 : df_remove_problem (df_chain);
1923 661851 : df_process_deferred_rescans ();
1924 661851 : }
1925 :
1926 : /* Initializes invariant motion data. */
1927 :
1928 : static void
1929 661851 : init_inv_motion_data (void)
1930 : {
1931 661851 : actual_stamp = 1;
1932 :
1933 661851 : invariants.create (100);
1934 661851 : }
1935 :
1936 : /* Frees the data allocated by invariant motion. */
1937 :
1938 : static void
1939 661851 : free_inv_motion_data (void)
1940 : {
1941 661851 : unsigned i;
1942 661851 : struct def *def;
1943 661851 : struct invariant *inv;
1944 :
1945 661851 : check_invariant_table_size ();
1946 80791420 : for (i = 0; i < DF_DEFS_TABLE_SIZE (); i++)
1947 : {
1948 79467718 : inv = invariant_table[i];
1949 79467718 : if (inv)
1950 : {
1951 615411 : def = inv->def;
1952 615411 : gcc_assert (def != NULL);
1953 :
1954 615411 : free_use_list (def->uses);
1955 615411 : free (def);
1956 615411 : invariant_table[i] = NULL;
1957 : }
1958 : }
1959 :
1960 1954209 : FOR_EACH_VEC_ELT (invariants, i, inv)
1961 : {
1962 1292358 : BITMAP_FREE (inv->depends_on);
1963 1292358 : free (inv);
1964 : }
1965 661851 : invariants.release ();
1966 661851 : }
1967 :
1968 : /* Move the invariants out of the LOOP. */
1969 :
1970 : static void
1971 661851 : move_single_loop_invariants (class loop *loop)
1972 : {
1973 661851 : init_inv_motion_data ();
1974 :
1975 661851 : find_invariants (loop);
1976 661851 : find_invariants_to_move (optimize_loop_for_speed_p (loop),
1977 661851 : LOOP_DATA (loop)->has_call);
1978 661851 : move_invariants (loop);
1979 :
1980 661851 : free_inv_motion_data ();
1981 661851 : }
1982 :
1983 : /* Releases the auxiliary data for LOOP. */
1984 :
1985 : static void
1986 661851 : free_loop_data (class loop *loop)
1987 : {
1988 661851 : class loop_data *data = LOOP_DATA (loop);
1989 661851 : if (!data)
1990 : return;
1991 :
1992 661851 : bitmap_clear (&LOOP_DATA (loop)->regs_ref);
1993 661851 : bitmap_clear (&LOOP_DATA (loop)->regs_live);
1994 661851 : free (data);
1995 661851 : loop->aux = NULL;
1996 : }
1997 :
1998 :
1999 :
2000 : /* Registers currently living. */
2001 : static bitmap_head curr_regs_live;
2002 :
2003 : /* Current reg pressure for each pressure class. */
2004 : static int curr_reg_pressure[N_REG_CLASSES];
2005 :
2006 : /* Record all regs that are set in any one insn. Communication from
2007 : mark_reg_{store,clobber} and global_conflicts. Asm can refer to
2008 : all hard-registers. */
2009 : static rtx regs_set[(FIRST_PSEUDO_REGISTER > MAX_RECOG_OPERANDS
2010 : ? FIRST_PSEUDO_REGISTER : MAX_RECOG_OPERANDS) * 2];
2011 : /* Number of regs stored in the previous array. */
2012 : static int n_regs_set;
2013 :
2014 : /* Return pressure class and number of needed hard registers (through
2015 : *NREGS) of register REGNO. */
2016 : static enum reg_class
2017 137 : get_regno_pressure_class (int regno, int *nregs)
2018 : {
2019 137 : if (regno >= FIRST_PSEUDO_REGISTER)
2020 : {
2021 82 : enum reg_class pressure_class;
2022 :
2023 82 : pressure_class = reg_allocno_class (regno);
2024 82 : pressure_class = ira_pressure_class_translate[pressure_class];
2025 82 : *nregs
2026 82 : = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
2027 82 : return pressure_class;
2028 : }
2029 55 : else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
2030 55 : && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
2031 : {
2032 12 : *nregs = 1;
2033 12 : return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
2034 : }
2035 : else
2036 : {
2037 43 : *nregs = 0;
2038 43 : return NO_REGS;
2039 : }
2040 : }
2041 :
2042 : /* Increase (if INCR_P) or decrease current register pressure for
2043 : register REGNO. */
2044 : static void
2045 134 : change_pressure (int regno, bool incr_p)
2046 : {
2047 134 : int nregs;
2048 134 : enum reg_class pressure_class;
2049 :
2050 134 : pressure_class = get_regno_pressure_class (regno, &nregs);
2051 134 : if (! incr_p)
2052 32 : curr_reg_pressure[pressure_class] -= nregs;
2053 : else
2054 : {
2055 102 : curr_reg_pressure[pressure_class] += nregs;
2056 102 : if (LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
2057 : < curr_reg_pressure[pressure_class])
2058 20 : LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
2059 20 : = curr_reg_pressure[pressure_class];
2060 : }
2061 134 : }
2062 :
2063 : /* Mark REGNO birth. */
2064 : static void
2065 48 : mark_regno_live (int regno)
2066 : {
2067 48 : class loop *loop;
2068 :
2069 48 : for (loop = curr_loop;
2070 96 : loop != current_loops->tree_root;
2071 48 : loop = loop_outer (loop))
2072 48 : bitmap_set_bit (&LOOP_DATA (loop)->regs_live, regno);
2073 48 : if (!bitmap_set_bit (&curr_regs_live, regno))
2074 : return;
2075 37 : change_pressure (regno, true);
2076 : }
2077 :
2078 : /* Mark REGNO death. */
2079 : static void
2080 42 : mark_regno_death (int regno)
2081 : {
2082 42 : if (! bitmap_clear_bit (&curr_regs_live, regno))
2083 : return;
2084 32 : change_pressure (regno, false);
2085 : }
2086 :
2087 : /* Mark setting register REG. */
2088 : static void
2089 53 : mark_reg_store (rtx reg, const_rtx setter ATTRIBUTE_UNUSED,
2090 : void *data ATTRIBUTE_UNUSED)
2091 : {
2092 53 : if (GET_CODE (reg) == SUBREG)
2093 0 : reg = SUBREG_REG (reg);
2094 :
2095 53 : if (! REG_P (reg))
2096 : return;
2097 :
2098 48 : regs_set[n_regs_set++] = reg;
2099 :
2100 48 : unsigned int end_regno = END_REGNO (reg);
2101 96 : for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
2102 48 : mark_regno_live (regno);
2103 : }
2104 :
2105 : /* Mark clobbering register REG. */
2106 : static void
2107 43 : mark_reg_clobber (rtx reg, const_rtx setter, void *data)
2108 : {
2109 43 : if (GET_CODE (setter) == CLOBBER)
2110 10 : mark_reg_store (reg, setter, data);
2111 43 : }
2112 :
2113 : /* Mark register REG death. */
2114 : static void
2115 42 : mark_reg_death (rtx reg)
2116 : {
2117 42 : unsigned int end_regno = END_REGNO (reg);
2118 84 : for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
2119 42 : mark_regno_death (regno);
2120 42 : }
2121 :
2122 : /* Mark occurrence of registers in X for the current loop. */
2123 : static void
2124 197 : mark_ref_regs (rtx x)
2125 : {
2126 197 : RTX_CODE code;
2127 197 : int i;
2128 197 : const char *fmt;
2129 :
2130 197 : if (!x)
2131 : return;
2132 :
2133 197 : code = GET_CODE (x);
2134 197 : if (code == REG)
2135 : {
2136 82 : class loop *loop;
2137 :
2138 82 : for (loop = curr_loop;
2139 164 : loop != current_loops->tree_root;
2140 82 : loop = loop_outer (loop))
2141 82 : bitmap_set_bit (&LOOP_DATA (loop)->regs_ref, REGNO (x));
2142 : return;
2143 : }
2144 :
2145 115 : fmt = GET_RTX_FORMAT (code);
2146 305 : for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2147 190 : if (fmt[i] == 'e')
2148 143 : mark_ref_regs (XEXP (x, i));
2149 47 : else if (fmt[i] == 'E')
2150 : {
2151 : int j;
2152 :
2153 30 : for (j = 0; j < XVECLEN (x, i); j++)
2154 20 : mark_ref_regs (XVECEXP (x, i, j));
2155 : }
2156 : }
2157 :
2158 : /* Calculate register pressure in the loops. */
2159 : static void
2160 1 : calculate_loop_reg_pressure (void)
2161 : {
2162 1 : int i;
2163 1 : unsigned int j;
2164 1 : bitmap_iterator bi;
2165 1 : basic_block bb;
2166 1 : rtx_insn *insn;
2167 1 : rtx link;
2168 1 : class loop *parent;
2169 :
2170 4 : for (auto loop : loops_list (cfun, 0))
2171 1 : if (loop->aux == NULL)
2172 : {
2173 1 : loop->aux = xcalloc (1, sizeof (class loop_data));
2174 1 : bitmap_initialize (&LOOP_DATA (loop)->regs_ref, ®_obstack);
2175 1 : bitmap_initialize (&LOOP_DATA (loop)->regs_live, ®_obstack);
2176 1 : }
2177 1 : ira_setup_eliminable_regset ();
2178 1 : bitmap_initialize (&curr_regs_live, ®_obstack);
2179 9 : FOR_EACH_BB_FN (bb, cfun)
2180 : {
2181 8 : curr_loop = bb->loop_father;
2182 8 : if (curr_loop == current_loops->tree_root)
2183 4 : continue;
2184 :
2185 4 : for (class loop *loop = curr_loop;
2186 8 : loop != current_loops->tree_root;
2187 4 : loop = loop_outer (loop))
2188 8 : bitmap_ior_into (&LOOP_DATA (loop)->regs_live, DF_LR_IN (bb));
2189 :
2190 4 : bitmap_copy (&curr_regs_live, DF_LR_IN (bb));
2191 24 : for (i = 0; i < ira_pressure_classes_num; i++)
2192 16 : curr_reg_pressure[ira_pressure_classes[i]] = 0;
2193 69 : EXECUTE_IF_SET_IN_BITMAP (&curr_regs_live, 0, j, bi)
2194 65 : change_pressure (j, true);
2195 :
2196 45 : FOR_BB_INSNS (bb, insn)
2197 : {
2198 41 : if (! NONDEBUG_INSN_P (insn))
2199 7 : continue;
2200 :
2201 34 : mark_ref_regs (PATTERN (insn));
2202 34 : n_regs_set = 0;
2203 34 : note_stores (insn, mark_reg_clobber, NULL);
2204 :
2205 : /* Mark any registers dead after INSN as dead now. */
2206 :
2207 78 : for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
2208 44 : if (REG_NOTE_KIND (link) == REG_DEAD)
2209 22 : mark_reg_death (XEXP (link, 0));
2210 :
2211 : /* Mark any registers set in INSN as live,
2212 : and mark them as conflicting with all other live regs.
2213 : Clobbers are processed again, so they conflict with
2214 : the registers that are set. */
2215 :
2216 34 : note_stores (insn, mark_reg_store, NULL);
2217 :
2218 34 : if (AUTO_INC_DEC)
2219 : for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
2220 : if (REG_NOTE_KIND (link) == REG_INC)
2221 : mark_reg_store (XEXP (link, 0), NULL_RTX, NULL);
2222 :
2223 116 : while (n_regs_set-- > 0)
2224 : {
2225 96 : rtx note = find_regno_note (insn, REG_UNUSED,
2226 48 : REGNO (regs_set[n_regs_set]));
2227 48 : if (! note)
2228 28 : continue;
2229 :
2230 20 : mark_reg_death (XEXP (note, 0));
2231 : }
2232 : }
2233 : }
2234 1 : bitmap_release (&curr_regs_live);
2235 1 : if (flag_ira_region == IRA_REGION_MIXED
2236 1 : || flag_ira_region == IRA_REGION_ALL)
2237 4 : for (auto loop : loops_list (cfun, 0))
2238 : {
2239 41 : EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi)
2240 40 : if (! bitmap_bit_p (&LOOP_DATA (loop)->regs_ref, j))
2241 : {
2242 3 : enum reg_class pressure_class;
2243 3 : int nregs;
2244 :
2245 3 : pressure_class = get_regno_pressure_class (j, &nregs);
2246 3 : LOOP_DATA (loop)->max_reg_pressure[pressure_class] -= nregs;
2247 : }
2248 1 : }
2249 1 : if (dump_file == NULL)
2250 0 : return;
2251 4 : for (auto loop : loops_list (cfun, 0))
2252 : {
2253 1 : parent = loop_outer (loop);
2254 2 : fprintf (dump_file, "\n Loop %d (parent %d, header bb%d, depth %d)\n",
2255 : loop->num, (parent == NULL ? -1 : parent->num),
2256 1 : loop->header->index, loop_depth (loop));
2257 1 : fprintf (dump_file, "\n ref. regnos:");
2258 38 : EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_ref, 0, j, bi)
2259 37 : fprintf (dump_file, " %d", j);
2260 1 : fprintf (dump_file, "\n live regnos:");
2261 41 : EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi)
2262 40 : fprintf (dump_file, " %d", j);
2263 1 : fprintf (dump_file, "\n Pressure:");
2264 5 : for (i = 0; (int) i < ira_pressure_classes_num; i++)
2265 : {
2266 4 : enum reg_class pressure_class;
2267 :
2268 4 : pressure_class = ira_pressure_classes[i];
2269 4 : if (LOOP_DATA (loop)->max_reg_pressure[pressure_class] == 0)
2270 2 : continue;
2271 2 : fprintf (dump_file, " %s=%d", reg_class_names[pressure_class],
2272 : LOOP_DATA (loop)->max_reg_pressure[pressure_class]);
2273 : }
2274 1 : fprintf (dump_file, "\n");
2275 1 : }
2276 : }
2277 :
2278 :
2279 :
2280 : /* Move the invariants out of the loops. */
2281 :
2282 : void
2283 246520 : move_loop_invariants (void)
2284 : {
2285 246520 : if (optimize == 1)
2286 16415 : df_live_add_problem ();
2287 : /* ??? This is a hack. We should only need to call df_live_set_all_dirty
2288 : for optimize == 1, but can_move_invariant_reg relies on DF_INSN_LUID
2289 : being up-to-date. That isn't always true (even after df_analyze)
2290 : because df_process_deferred_rescans doesn't necessarily cause
2291 : blocks to be rescanned. */
2292 246520 : df_live_set_all_dirty ();
2293 246520 : if (flag_ira_loop_pressure)
2294 : {
2295 1 : df_analyze ();
2296 1 : regstat_init_n_sets_and_refs ();
2297 1 : ira_set_pseudo_classes (true, dump_file);
2298 1 : calculate_loop_reg_pressure ();
2299 1 : regstat_free_n_sets_and_refs ();
2300 : }
2301 246520 : df_set_flags (DF_EQ_NOTES + DF_DEFER_INSN_RESCAN);
2302 : /* Process the loops, innermost first. */
2303 1401411 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
2304 : {
2305 661851 : curr_loop = loop;
2306 : /* move_single_loop_invariants for very large loops is time consuming
2307 : and might need a lot of memory. For -O1 only do loop invariant
2308 : motion for very small loops. */
2309 661851 : unsigned max_bbs = param_loop_invariant_max_bbs_in_loop;
2310 661851 : if (optimize < 2)
2311 43545 : max_bbs /= 10;
2312 661851 : if (loop->num_nodes <= max_bbs)
2313 661851 : move_single_loop_invariants (loop);
2314 246520 : }
2315 :
2316 1401411 : for (auto loop : loops_list (cfun, 0))
2317 661851 : free_loop_data (loop);
2318 :
2319 246520 : if (flag_ira_loop_pressure)
2320 : /* There is no sense to keep this info because it was most
2321 : probably outdated by subsequent passes. */
2322 1 : free_reg_info ();
2323 246520 : free (invariant_table);
2324 246520 : invariant_table = NULL;
2325 246520 : invariant_table_size = 0;
2326 :
2327 246520 : if (optimize == 1)
2328 16415 : df_remove_problem (df_live);
2329 :
2330 246520 : checking_verify_flow_info ();
2331 246520 : }
|