Line data Source code
1 : /* RTL-level loop invariant motion.
2 : Copyright (C) 2004-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* This implements the loop invariant motion pass. It is very simple
21 : (no calls, no loads/stores, etc.). This should be sufficient to cleanup
22 : things like address arithmetics -- other more complicated invariants should
23 : be eliminated on GIMPLE either in tree-ssa-loop-im.cc or in tree-ssa-pre.cc.
24 :
25 : We proceed loop by loop -- it is simpler than trying to handle things
26 : globally and should not lose much. First we inspect all sets inside loop
27 : and create a dependency graph on insns (saying "to move this insn, you must
28 : also move the following insns").
29 :
30 : We then need to determine what to move. We estimate the number of registers
31 : used and move as many invariants as possible while we still have enough free
32 : registers. We prefer the expensive invariants.
33 :
34 : Then we move the selected invariants out of the loop, creating a new
35 : temporaries for them if necessary. */
36 :
37 : #include "config.h"
38 : #include "system.h"
39 : #include "coretypes.h"
40 : #include "backend.h"
41 : #include "target.h"
42 : #include "rtl.h"
43 : #include "tree.h"
44 : #include "cfghooks.h"
45 : #include "df.h"
46 : #include "memmodel.h"
47 : #include "tm_p.h"
48 : #include "insn-config.h"
49 : #include "regs.h"
50 : #include "ira.h"
51 : #include "recog.h"
52 : #include "cfgrtl.h"
53 : #include "cfgloop.h"
54 : #include "expr.h"
55 : #include "rtl-iter.h"
56 : #include "dumpfile.h"
57 :
58 : /* The data stored for the loop. */
59 :
60 : class loop_data
61 : {
62 : public:
63 : class loop *outermost_exit; /* The outermost exit of the loop. */
64 : bool has_call; /* True if the loop contains a call. */
65 : /* Maximal register pressure inside loop for given register class
66 : (defined only for the pressure classes). */
67 : int max_reg_pressure[N_REG_CLASSES];
68 : /* Loop regs referenced and live pseudo-registers. */
69 : bitmap_head regs_ref;
70 : bitmap_head regs_live;
71 : };
72 :
73 : #define LOOP_DATA(LOOP) ((class loop_data *) (LOOP)->aux)
74 :
75 : /* The description of an use. */
76 :
77 : struct use
78 : {
79 : rtx *pos; /* Position of the use. */
80 : rtx_insn *insn; /* The insn in that the use occurs. */
81 : unsigned addr_use_p; /* Whether the use occurs in an address. */
82 : struct use *next; /* Next use in the list. */
83 : };
84 :
85 : /* The description of a def. */
86 :
87 : struct def
88 : {
89 : struct use *uses; /* The list of uses that are uniquely reached
90 : by it. */
91 : unsigned n_uses; /* Number of such uses. */
92 : unsigned n_addr_uses; /* Number of uses in addresses. */
93 : unsigned invno; /* The corresponding invariant. */
94 : bool can_prop_to_addr_uses; /* True if the corresponding inv can be
95 : propagated into its address uses. */
96 : };
97 :
98 : /* The data stored for each invariant. */
99 :
100 : struct invariant
101 : {
102 : /* The number of the invariant. */
103 : unsigned invno;
104 :
105 : /* The number of the invariant with the same value. */
106 : unsigned eqto;
107 :
108 : /* The number of invariants which eqto this. */
109 : unsigned eqno;
110 :
111 : /* If we moved the invariant out of the loop, the original regno
112 : that contained its value. */
113 : int orig_regno;
114 :
115 : /* If we moved the invariant out of the loop, the register that contains its
116 : value. */
117 : rtx reg;
118 :
119 : /* The definition of the invariant. */
120 : struct def *def;
121 :
122 : /* The insn in that it is defined. */
123 : rtx_insn *insn;
124 :
125 : /* Whether it is always executed. */
126 : bool always_executed;
127 :
128 : /* Whether to move the invariant. */
129 : bool move;
130 :
131 : /* Whether the invariant is cheap when used as an address. */
132 : bool cheap_address;
133 :
134 : /* Cost of the invariant. */
135 : unsigned cost;
136 :
137 : /* Used for detecting already visited invariants during determining
138 : costs of movements. */
139 : unsigned stamp;
140 :
141 : /* The invariants it depends on. */
142 : bitmap depends_on;
143 : };
144 :
145 : /* Currently processed loop. */
146 : static class loop *curr_loop;
147 :
148 : /* Table of invariants indexed by the df_ref uid field. */
149 :
150 : static unsigned int invariant_table_size = 0;
151 : static struct invariant ** invariant_table;
152 :
153 : /* Entry for hash table of invariant expressions. */
154 :
155 : struct invariant_expr_entry
156 : {
157 : /* The invariant. */
158 : struct invariant *inv;
159 :
160 : /* Its value. */
161 : rtx expr;
162 :
163 : /* Its mode. */
164 : machine_mode mode;
165 :
166 : /* Its hash. */
167 : hashval_t hash;
168 : };
169 :
170 : /* The actual stamp for marking already visited invariants during determining
171 : costs of movements. */
172 :
173 : static unsigned actual_stamp;
174 :
175 : typedef struct invariant *invariant_p;
176 :
177 :
178 : /* The invariants. */
179 :
180 : static vec<invariant_p> invariants;
181 :
182 : /* Check the size of the invariant table and realloc if necessary. */
183 :
184 : static void
185 24557322 : check_invariant_table_size (void)
186 : {
187 24557322 : if (invariant_table_size < DF_DEFS_TABLE_SIZE ())
188 : {
189 294258 : unsigned int new_size = DF_DEFS_TABLE_SIZE () + (DF_DEFS_TABLE_SIZE () / 4);
190 294258 : invariant_table = XRESIZEVEC (struct invariant *, invariant_table, new_size);
191 294258 : memset (&invariant_table[invariant_table_size], 0,
192 294258 : (new_size - invariant_table_size) * sizeof (struct invariant *));
193 294258 : invariant_table_size = new_size;
194 : }
195 24557322 : }
196 :
197 : /* Test for possibility of invariantness of X. */
198 :
199 : static bool
200 18962763 : check_maybe_invariant (rtx x)
201 : {
202 18962763 : enum rtx_code code = GET_CODE (x);
203 18962763 : int i, j;
204 18962763 : const char *fmt;
205 :
206 18962763 : switch (code)
207 : {
208 : CASE_CONST_ANY:
209 : case SYMBOL_REF:
210 : case CONST:
211 : case LABEL_REF:
212 : return true;
213 :
214 : case PC:
215 : case UNSPEC_VOLATILE:
216 : case CALL:
217 : return false;
218 :
219 : case REG:
220 : return true;
221 :
222 1812030 : case MEM:
223 : /* Load/store motion is done elsewhere. ??? Perhaps also add it here?
224 : It should not be hard, and might be faster than "elsewhere". */
225 :
226 : /* Just handle the most trivial case where we load from an unchanging
227 : location (most importantly, pic tables). */
228 1812030 : if (MEM_READONLY_P (x) && !MEM_VOLATILE_P (x))
229 : break;
230 :
231 : return false;
232 :
233 496 : case ASM_OPERANDS:
234 : /* Don't mess with insns declared volatile. */
235 496 : if (MEM_VOLATILE_P (x))
236 : return false;
237 : break;
238 :
239 : default:
240 : break;
241 : }
242 :
243 4970301 : fmt = GET_RTX_FORMAT (code);
244 13919544 : for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
245 : {
246 9103851 : if (fmt[i] == 'e')
247 : {
248 8156270 : if (!check_maybe_invariant (XEXP (x, i)))
249 : return false;
250 : }
251 947581 : else if (fmt[i] == 'E')
252 : {
253 1667452 : for (j = 0; j < XVECLEN (x, i); j++)
254 1297504 : if (!check_maybe_invariant (XVECEXP (x, i, j)))
255 : return false;
256 : }
257 : }
258 :
259 : return true;
260 : }
261 :
262 : /* Returns the invariant definition for USE, or NULL if USE is not
263 : invariant. */
264 :
265 : static struct invariant *
266 24348673 : invariant_for_use (df_ref use)
267 : {
268 24348673 : struct df_link *defs;
269 24348673 : df_ref def;
270 24348673 : basic_block bb = DF_REF_BB (use), def_bb;
271 :
272 24348673 : if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
273 : return NULL;
274 :
275 24303467 : defs = DF_REF_CHAIN (use);
276 24303467 : if (!defs || defs->next)
277 : return NULL;
278 17442777 : def = defs->ref;
279 17442777 : check_invariant_table_size ();
280 17442777 : if (!invariant_table[DF_REF_ID (def)])
281 : return NULL;
282 :
283 1504525 : def_bb = DF_REF_BB (def);
284 1504525 : if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb))
285 : return NULL;
286 1504191 : return invariant_table[DF_REF_ID (def)];
287 : }
288 :
289 : /* Computes hash value for invariant expression X in INSN. */
290 :
291 : static hashval_t
292 1843243 : hash_invariant_expr_1 (rtx_insn *insn, rtx x)
293 : {
294 1843243 : enum rtx_code code = GET_CODE (x);
295 1843243 : int i, j;
296 1843243 : const char *fmt;
297 1843243 : hashval_t val = code;
298 1843243 : int do_not_record_p;
299 1843243 : df_ref use;
300 1843243 : struct invariant *inv;
301 :
302 1843243 : switch (code)
303 : {
304 802994 : CASE_CONST_ANY:
305 802994 : case SYMBOL_REF:
306 802994 : case CONST:
307 802994 : case LABEL_REF:
308 802994 : return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false);
309 :
310 723590 : case REG:
311 723590 : use = df_find_use (insn, x);
312 723590 : if (!use)
313 0 : return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false);
314 723590 : inv = invariant_for_use (use);
315 723590 : if (!inv)
316 463370 : return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false);
317 :
318 260220 : gcc_assert (inv->eqto != ~0u);
319 : return inv->eqto;
320 :
321 316659 : default:
322 316659 : break;
323 : }
324 :
325 316659 : fmt = GET_RTX_FORMAT (code);
326 904416 : for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
327 : {
328 587757 : if (fmt[i] == 'e')
329 540254 : val ^= hash_invariant_expr_1 (insn, XEXP (x, i));
330 : else if (fmt[i] == 'E')
331 : {
332 5560 : for (j = 0; j < XVECLEN (x, i); j++)
333 4533 : val ^= hash_invariant_expr_1 (insn, XVECEXP (x, i, j));
334 : }
335 : else if (fmt[i] == 'i' || fmt[i] == 'n')
336 250 : val ^= XINT (x, i);
337 : else if (fmt[i] == 'L')
338 51 : val ^= XLOC (x, i);
339 : else if (fmt[i] == 'p')
340 7774 : val ^= constant_lower_bound (SUBREG_BYTE (x));
341 : }
342 :
343 : return val;
344 : }
345 :
346 : /* Returns true if the invariant expressions E1 and E2 used in insns INSN1
347 : and INSN2 have always the same value. */
348 :
349 : static bool
350 2434442 : invariant_expr_equal_p (rtx_insn *insn1, rtx e1, rtx_insn *insn2, rtx e2)
351 : {
352 2434442 : enum rtx_code code = GET_CODE (e1);
353 2434442 : int i, j;
354 2434442 : const char *fmt;
355 2434442 : df_ref use1, use2;
356 2434442 : struct invariant *inv1 = NULL, *inv2 = NULL;
357 2434442 : rtx sub1, sub2;
358 :
359 : /* If mode of only one of the operands is VOIDmode, it is not equivalent to
360 : the other one. If both are VOIDmode, we rely on the caller of this
361 : function to verify that their modes are the same. */
362 2434442 : if (code != GET_CODE (e2) || GET_MODE (e1) != GET_MODE (e2))
363 : return false;
364 :
365 1247898 : switch (code)
366 : {
367 504730 : CASE_CONST_ANY:
368 504730 : case SYMBOL_REF:
369 504730 : case CONST:
370 504730 : case LABEL_REF:
371 504730 : return rtx_equal_p (e1, e2);
372 :
373 551059 : case REG:
374 551059 : use1 = df_find_use (insn1, e1);
375 551059 : use2 = df_find_use (insn2, e2);
376 551059 : if (use1)
377 551059 : inv1 = invariant_for_use (use1);
378 551059 : if (use2)
379 551059 : inv2 = invariant_for_use (use2);
380 :
381 551059 : if (!inv1 && !inv2)
382 215599 : return rtx_equal_p (e1, e2);
383 :
384 335460 : if (!inv1 || !inv2)
385 : return false;
386 :
387 237762 : gcc_assert (inv1->eqto != ~0u);
388 237762 : gcc_assert (inv2->eqto != ~0u);
389 237762 : return inv1->eqto == inv2->eqto;
390 :
391 192109 : default:
392 192109 : break;
393 : }
394 :
395 192109 : fmt = GET_RTX_FORMAT (code);
396 248973 : for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
397 : {
398 224170 : if (fmt[i] == 'e')
399 : {
400 198486 : sub1 = XEXP (e1, i);
401 198486 : sub2 = XEXP (e2, i);
402 :
403 198486 : if (!invariant_expr_equal_p (insn1, sub1, insn2, sub2))
404 : return false;
405 : }
406 :
407 : else if (fmt[i] == 'E')
408 : {
409 692 : if (XVECLEN (e1, i) != XVECLEN (e2, i))
410 : return false;
411 :
412 2191 : for (j = 0; j < XVECLEN (e1, i); j++)
413 : {
414 1816 : sub1 = XVECEXP (e1, i, j);
415 1816 : sub2 = XVECEXP (e2, i, j);
416 :
417 1816 : if (!invariant_expr_equal_p (insn1, sub1, insn2, sub2))
418 : return false;
419 : }
420 : }
421 : else if (fmt[i] == 'i' || fmt[i] == 'n')
422 : {
423 125 : if (XINT (e1, i) != XINT (e2, i))
424 : return false;
425 : }
426 : else if (fmt[i] == 'p')
427 : {
428 4882 : if (maybe_ne (SUBREG_BYTE (e1), SUBREG_BYTE (e2)))
429 : return false;
430 : }
431 : /* Unhandled type of subexpression, we fail conservatively. */
432 : else
433 : return false;
434 : }
435 :
436 : return true;
437 : }
438 :
439 : struct invariant_expr_hasher : free_ptr_hash <invariant_expr_entry>
440 : {
441 : static inline hashval_t hash (const invariant_expr_entry *);
442 : static inline bool equal (const invariant_expr_entry *,
443 : const invariant_expr_entry *);
444 : };
445 :
446 : /* Returns hash value for invariant expression entry ENTRY. */
447 :
448 : inline hashval_t
449 2976568 : invariant_expr_hasher::hash (const invariant_expr_entry *entry)
450 : {
451 2976568 : return entry->hash;
452 : }
453 :
454 : /* Compares invariant expression entries ENTRY1 and ENTRY2. */
455 :
456 : inline bool
457 3448254 : invariant_expr_hasher::equal (const invariant_expr_entry *entry1,
458 : const invariant_expr_entry *entry2)
459 : {
460 3448254 : if (entry1->mode != entry2->mode)
461 : return 0;
462 :
463 2234140 : return invariant_expr_equal_p (entry1->inv->insn, entry1->expr,
464 2234140 : entry2->inv->insn, entry2->expr);
465 : }
466 :
467 : typedef hash_table<invariant_expr_hasher> invariant_htab_type;
468 :
469 : /* Checks whether invariant with value EXPR in machine mode MODE is
470 : recorded in EQ. If this is the case, return the invariant. Otherwise
471 : insert INV to the table for this expression and return INV. */
472 :
473 : static struct invariant *
474 1298456 : find_or_insert_inv (invariant_htab_type *eq, rtx expr, machine_mode mode,
475 : struct invariant *inv)
476 : {
477 1298456 : hashval_t hash = hash_invariant_expr_1 (inv->insn, expr);
478 1298456 : struct invariant_expr_entry *entry;
479 1298456 : struct invariant_expr_entry pentry;
480 1298456 : invariant_expr_entry **slot;
481 :
482 1298456 : pentry.expr = expr;
483 1298456 : pentry.inv = inv;
484 1298456 : pentry.mode = mode;
485 1298456 : slot = eq->find_slot_with_hash (&pentry, hash, INSERT);
486 1298456 : entry = *slot;
487 :
488 1298456 : if (entry)
489 366371 : return entry->inv;
490 :
491 932085 : entry = XNEW (struct invariant_expr_entry);
492 932085 : entry->inv = inv;
493 932085 : entry->expr = expr;
494 932085 : entry->mode = mode;
495 932085 : entry->hash = hash;
496 932085 : *slot = entry;
497 :
498 932085 : return inv;
499 : }
500 :
501 : /* Finds invariants identical to INV and records the equivalence. EQ is the
502 : hash table of the invariants. */
503 :
504 : static void
505 1560145 : find_identical_invariants (invariant_htab_type *eq, struct invariant *inv)
506 : {
507 1560145 : unsigned depno;
508 1560145 : bitmap_iterator bi;
509 1560145 : struct invariant *dep;
510 1560145 : rtx expr, set;
511 1560145 : machine_mode mode;
512 1560145 : struct invariant *tmp;
513 :
514 1560145 : if (inv->eqto != ~0u)
515 261689 : return;
516 :
517 1560145 : EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, depno, bi)
518 : {
519 261689 : dep = invariants[depno];
520 261689 : find_identical_invariants (eq, dep);
521 : }
522 :
523 1298456 : set = single_set (inv->insn);
524 1298456 : expr = SET_SRC (set);
525 1298456 : mode = GET_MODE (expr);
526 1298456 : if (mode == VOIDmode)
527 383912 : mode = GET_MODE (SET_DEST (set));
528 :
529 1298456 : tmp = find_or_insert_inv (eq, expr, mode, inv);
530 1298456 : inv->eqto = tmp->invno;
531 :
532 1298456 : if (tmp->invno != inv->invno && inv->always_executed)
533 64153 : tmp->eqno++;
534 :
535 1298456 : if (dump_file && inv->eqto != inv->invno)
536 0 : fprintf (dump_file,
537 : "Invariant %d is equivalent to invariant %d.\n",
538 : inv->invno, inv->eqto);
539 : }
540 :
541 : /* Find invariants with the same value and record the equivalences. */
542 :
543 : static void
544 658728 : merge_identical_invariants (void)
545 : {
546 658728 : unsigned i;
547 658728 : struct invariant *inv;
548 658728 : invariant_htab_type eq (invariants.length ());
549 :
550 2615912 : FOR_EACH_VEC_ELT (invariants, i, inv)
551 1298456 : find_identical_invariants (&eq, inv);
552 658728 : }
553 :
554 : /* Determines the basic blocks inside LOOP that are always executed and
555 : stores their bitmap to ALWAYS_REACHED. MAY_EXIT is a bitmap of
556 : basic blocks that may either exit the loop, or contain the call that
557 : does not have to return. BODY is body of the loop obtained by
558 : get_loop_body_in_dom_order. */
559 :
560 : static void
561 1317456 : compute_always_reached (class loop *loop, basic_block *body,
562 : bitmap may_exit, bitmap always_reached)
563 : {
564 1317456 : unsigned i;
565 :
566 2497146 : for (i = 0; i < loop->num_nodes; i++)
567 : {
568 2483138 : if (dominated_by_p (CDI_DOMINATORS, loop->latch, body[i]))
569 1571646 : bitmap_set_bit (always_reached, i);
570 :
571 2483138 : if (bitmap_bit_p (may_exit, i))
572 : return;
573 : }
574 : }
575 :
576 : /* Finds exits out of the LOOP with body BODY. Marks blocks in that we may
577 : exit the loop by cfg edge to HAS_EXIT and MAY_EXIT. In MAY_EXIT
578 : additionally mark blocks that may exit due to a call. */
579 :
580 : static void
581 658728 : find_exits (class loop *loop, basic_block *body,
582 : bitmap may_exit, bitmap has_exit)
583 : {
584 658728 : unsigned i;
585 658728 : edge_iterator ei;
586 658728 : edge e;
587 658728 : class loop *outermost_exit = loop, *aexit;
588 658728 : bool has_call = false;
589 658728 : rtx_insn *insn;
590 :
591 4748958 : for (i = 0; i < loop->num_nodes; i++)
592 : {
593 4090230 : if (body[i]->loop_father == loop)
594 : {
595 29808900 : FOR_BB_INSNS (body[i], insn)
596 : {
597 27156502 : if (CALL_P (insn)
598 27156502 : && (RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
599 476887 : || !RTL_CONST_OR_PURE_CALL_P (insn)))
600 : {
601 435775 : has_call = true;
602 435775 : bitmap_set_bit (may_exit, i);
603 435775 : break;
604 : }
605 : }
606 :
607 7895361 : FOR_EACH_EDGE (e, ei, body[i]->succs)
608 : {
609 4807188 : if (! flow_bb_inside_loop_p (loop, e->dest))
610 : {
611 1116055 : bitmap_set_bit (may_exit, i);
612 1116055 : bitmap_set_bit (has_exit, i);
613 1116055 : outermost_exit = find_common_loop (outermost_exit,
614 1116055 : e->dest->loop_father);
615 : }
616 : /* If we enter a subloop that might never terminate treat
617 : it like a possible exit. */
618 4807188 : if (flow_loop_nested_p (loop, e->dest->loop_father))
619 152112 : bitmap_set_bit (may_exit, i);
620 : }
621 3088173 : continue;
622 3088173 : }
623 :
624 : /* Use the data stored for the subloop to decide whether we may exit
625 : through it. It is sufficient to do this for header of the loop,
626 : as other basic blocks inside it must be dominated by it. */
627 1002057 : if (body[i]->loop_father->header != body[i])
628 773307 : continue;
629 :
630 228750 : if (LOOP_DATA (body[i]->loop_father)->has_call)
631 : {
632 62397 : has_call = true;
633 62397 : bitmap_set_bit (may_exit, i);
634 : }
635 228750 : aexit = LOOP_DATA (body[i]->loop_father)->outermost_exit;
636 228750 : if (aexit != loop)
637 : {
638 113947 : bitmap_set_bit (may_exit, i);
639 113947 : bitmap_set_bit (has_exit, i);
640 :
641 113947 : if (flow_loop_nested_p (aexit, outermost_exit))
642 4090230 : outermost_exit = aexit;
643 : }
644 : }
645 :
646 658728 : if (loop->aux == NULL)
647 : {
648 658727 : loop->aux = xcalloc (1, sizeof (class loop_data));
649 658727 : bitmap_initialize (&LOOP_DATA (loop)->regs_ref, ®_obstack);
650 658727 : bitmap_initialize (&LOOP_DATA (loop)->regs_live, ®_obstack);
651 : }
652 658728 : LOOP_DATA (loop)->outermost_exit = outermost_exit;
653 658728 : LOOP_DATA (loop)->has_call = has_call;
654 658728 : }
655 :
656 : /* Check whether we may assign a value to X from a register. */
657 :
658 : static bool
659 13326047 : may_assign_reg_p (rtx x)
660 : {
661 13326047 : return (GET_MODE (x) != VOIDmode
662 13324548 : && GET_MODE (x) != BLKmode
663 13307498 : && can_copy_p (GET_MODE (x))
664 : /* Do not mess with the frame pointer adjustments that can
665 : be generated e.g. by expand_builtin_setjmp_receiver. */
666 11341757 : && x != frame_pointer_rtx
667 24667804 : && (!REG_P (x)
668 9584985 : || !HARD_REGISTER_P (x)
669 1644342 : || REGNO_REG_CLASS (REGNO (x)) != NO_REGS));
670 : }
671 :
672 : /* Finds definitions that may correspond to invariants in LOOP with body
673 : BODY. */
674 :
675 : static void
676 658728 : find_defs (class loop *loop)
677 : {
678 658728 : if (dump_file)
679 : {
680 21 : fprintf (dump_file,
681 : "*****starting processing of loop %d ******\n",
682 : loop->num);
683 : }
684 :
685 658728 : df_chain_add_problem (DF_UD_CHAIN);
686 658728 : df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
687 658728 : df_analyze_loop (loop);
688 658728 : check_invariant_table_size ();
689 :
690 658728 : if (dump_file)
691 : {
692 21 : df_dump_region (dump_file);
693 21 : fprintf (dump_file,
694 : "*****ending processing of loop %d ******\n",
695 : loop->num);
696 : }
697 658728 : }
698 :
699 : /* Creates a new invariant for definition DEF in INSN, depending on invariants
700 : in DEPENDS_ON. ALWAYS_EXECUTED is true if the insn is always executed,
701 : unless the program ends due to a function call. The newly created invariant
702 : is returned. */
703 :
704 : static struct invariant *
705 1298456 : create_new_invariant (struct def *def, rtx_insn *insn, bitmap depends_on,
706 : bool always_executed)
707 : {
708 1298456 : struct invariant *inv = XNEW (struct invariant);
709 1298456 : rtx set = single_set (insn);
710 1298456 : bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
711 :
712 1298456 : inv->def = def;
713 1298456 : inv->always_executed = always_executed;
714 1298456 : inv->depends_on = depends_on;
715 :
716 : /* If the set is simple, usually by moving it we move the whole store out of
717 : the loop. Otherwise we save only cost of the computation. */
718 1298456 : if (def)
719 : {
720 616034 : inv->cost = set_rtx_cost (set, speed);
721 : /* ??? Try to determine cheapness of address computation. Unfortunately
722 : the address cost is only a relative measure, we can't really compare
723 : it with any absolute number, but only with other address costs.
724 : But here we don't have any other addresses, so compare with a magic
725 : number anyway. It has to be large enough to not regress PR33928
726 : (by avoiding to move reg+8,reg+16,reg+24 invariants), but small
727 : enough to not regress 410.bwaves either (by still moving reg+reg
728 : invariants).
729 : See http://gcc.gnu.org/ml/gcc-patches/2009-10/msg01210.html . */
730 616034 : if (SCALAR_INT_MODE_P (GET_MODE (SET_DEST (set))))
731 467649 : inv->cheap_address = address_cost (SET_SRC (set), word_mode,
732 467649 : ADDR_SPACE_GENERIC, speed) < 3;
733 : else
734 148385 : inv->cheap_address = false;
735 : }
736 : else
737 : {
738 682422 : inv->cost = set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)),
739 : speed);
740 682422 : inv->cheap_address = false;
741 : }
742 :
743 1298456 : inv->move = false;
744 1298456 : inv->reg = NULL_RTX;
745 1298456 : inv->orig_regno = -1;
746 1298456 : inv->stamp = 0;
747 1298456 : inv->insn = insn;
748 :
749 1298456 : inv->invno = invariants.length ();
750 1298456 : inv->eqto = ~0u;
751 :
752 : /* Itself. */
753 1298456 : inv->eqno = 1;
754 :
755 1298456 : if (def)
756 616034 : def->invno = inv->invno;
757 1298456 : invariants.safe_push (inv);
758 :
759 1298456 : if (dump_file)
760 : {
761 12 : fprintf (dump_file,
762 : "Set in insn %d is invariant (%d), cost %d, depends on ",
763 12 : INSN_UID (insn), inv->invno, inv->cost);
764 12 : dump_bitmap (dump_file, inv->depends_on);
765 : }
766 :
767 1298456 : return inv;
768 : }
769 :
770 : /* Return a canonical version of X for the address, from the point of view,
771 : that all multiplications are represented as MULT instead of the multiply
772 : by a power of 2 being represented as ASHIFT.
773 :
774 : Callers should prepare a copy of X because this function may modify it
775 : in place. */
776 :
777 : static void
778 30407 : canonicalize_address_mult (rtx x)
779 : {
780 30407 : subrtx_var_iterator::array_type array;
781 211744 : FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
782 : {
783 181337 : rtx sub = *iter;
784 181337 : scalar_int_mode sub_mode;
785 312156 : if (is_a <scalar_int_mode> (GET_MODE (sub), &sub_mode)
786 130819 : && GET_CODE (sub) == ASHIFT
787 180 : && CONST_INT_P (XEXP (sub, 1))
788 360 : && INTVAL (XEXP (sub, 1)) < GET_MODE_BITSIZE (sub_mode)
789 180 : && INTVAL (XEXP (sub, 1)) >= 0)
790 : {
791 180 : HOST_WIDE_INT shift = INTVAL (XEXP (sub, 1));
792 180 : PUT_CODE (sub, MULT);
793 180 : XEXP (sub, 1) = gen_int_mode (HOST_WIDE_INT_1 << shift, sub_mode);
794 180 : iter.skip_subrtxes ();
795 : }
796 : }
797 30407 : }
798 :
799 : /* Maximum number of sub expressions in address. We set it to
800 : a small integer since it's unlikely to have a complicated
801 : address expression. */
802 :
803 : #define MAX_CANON_ADDR_PARTS (5)
804 :
805 : /* Collect sub expressions in address X with PLUS as the seperator.
806 : Sub expressions are stored in vector ADDR_PARTS. */
807 :
808 : static void
809 30407 : collect_address_parts (rtx x, vec<rtx> *addr_parts)
810 : {
811 30407 : subrtx_var_iterator::array_type array;
812 172930 : FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
813 : {
814 142523 : rtx sub = *iter;
815 :
816 142523 : if (GET_CODE (sub) != PLUS)
817 : {
818 86465 : addr_parts->safe_push (sub);
819 86465 : iter.skip_subrtxes ();
820 : }
821 : }
822 30407 : }
823 :
824 : /* Compare function for sorting sub expressions X and Y based on
825 : precedence defined for communitive operations. */
826 :
827 : static int
828 301700 : compare_address_parts (const void *x, const void *y)
829 : {
830 301700 : const rtx *rx = (const rtx *)x;
831 301700 : const rtx *ry = (const rtx *)y;
832 301700 : int px = commutative_operand_precedence (*rx);
833 301700 : int py = commutative_operand_precedence (*ry);
834 :
835 301700 : return (py - px);
836 : }
837 :
838 : /* Return a canonical version address for X by following steps:
839 : 1) Rewrite ASHIFT into MULT recursively.
840 : 2) Divide address into sub expressions with PLUS as the
841 : separator.
842 : 3) Sort sub expressions according to precedence defined
843 : for communative operations.
844 : 4) Simplify CONST_INT_P sub expressions.
845 : 5) Create new canonicalized address and return.
846 : Callers should prepare a copy of X because this function may
847 : modify it in place. */
848 :
849 : static rtx
850 30407 : canonicalize_address (rtx x)
851 : {
852 30407 : rtx res;
853 30407 : unsigned int i, j;
854 30407 : machine_mode mode = GET_MODE (x);
855 30407 : auto_vec<rtx, MAX_CANON_ADDR_PARTS> addr_parts;
856 :
857 : /* Rewrite ASHIFT into MULT. */
858 30407 : canonicalize_address_mult (x);
859 : /* Divide address into sub expressions. */
860 30407 : collect_address_parts (x, &addr_parts);
861 : /* Unlikely to have very complicated address. */
862 30407 : if (addr_parts.length () < 2
863 30407 : || addr_parts.length () > MAX_CANON_ADDR_PARTS)
864 : return x;
865 :
866 : /* Sort sub expressions according to canonicalization precedence. */
867 30382 : addr_parts.qsort (compare_address_parts);
868 :
869 : /* Simplify all constant int summary if possible. */
870 115378 : for (i = 0; i < addr_parts.length (); i++)
871 82824 : if (CONST_INT_P (addr_parts[i]))
872 : break;
873 :
874 33998 : for (j = i + 1; j < addr_parts.length (); j++)
875 : {
876 3616 : gcc_assert (CONST_INT_P (addr_parts[j]));
877 3616 : addr_parts[i] = simplify_gen_binary (PLUS, mode,
878 3616 : addr_parts[i],
879 3616 : addr_parts[j]);
880 : }
881 :
882 : /* Chain PLUS operators to the left for !CONST_INT_P sub expressions. */
883 30382 : res = addr_parts[0];
884 54628 : for (j = 1; j < i; j++)
885 24246 : res = simplify_gen_binary (PLUS, mode, res, addr_parts[j]);
886 :
887 : /* Pickup the last CONST_INT_P sub expression. */
888 58617 : if (i < addr_parts.length ())
889 28210 : res = simplify_gen_binary (PLUS, mode, res, addr_parts[i]);
890 :
891 : return res;
892 30407 : }
893 :
894 : /* Given invariant DEF and its address USE, check if the corresponding
895 : invariant expr can be propagated into the use or not. */
896 :
897 : static bool
898 53647 : inv_can_prop_to_addr_use (struct def *def, df_ref use)
899 : {
900 53647 : struct invariant *inv;
901 53647 : rtx *pos = DF_REF_REAL_LOC (use), def_set, use_set;
902 53647 : rtx_insn *use_insn = DF_REF_INSN (use);
903 53647 : rtx_insn *def_insn;
904 53647 : bool ok;
905 :
906 53647 : inv = invariants[def->invno];
907 : /* No need to check if address expression is expensive. */
908 53647 : if (!inv->cheap_address)
909 : return false;
910 :
911 46135 : def_insn = inv->insn;
912 46135 : def_set = single_set (def_insn);
913 46135 : if (!def_set)
914 : return false;
915 :
916 46135 : validate_unshare_change (use_insn, pos, SET_SRC (def_set), true);
917 46135 : ok = verify_changes (0);
918 : /* Try harder with canonicalization in address expression. */
919 46135 : if (!ok && (use_set = single_set (use_insn)) != NULL_RTX)
920 : {
921 34021 : rtx src, dest, mem = NULL_RTX;
922 :
923 34021 : src = SET_SRC (use_set);
924 34021 : dest = SET_DEST (use_set);
925 34021 : if (MEM_P (src))
926 : mem = src;
927 10545 : else if (MEM_P (dest))
928 : mem = dest;
929 :
930 : if (mem != NULL_RTX
931 64454 : && !memory_address_addr_space_p (GET_MODE (mem),
932 : XEXP (mem, 0),
933 30433 : MEM_ADDR_SPACE (mem)))
934 : {
935 30407 : rtx addr = canonicalize_address (copy_rtx (XEXP (mem, 0)));
936 60814 : if (memory_address_addr_space_p (GET_MODE (mem),
937 30407 : addr, MEM_ADDR_SPACE (mem)))
938 46135 : ok = true;
939 : }
940 : }
941 46135 : cancel_changes (0);
942 46135 : return ok;
943 : }
944 :
945 : /* Record USE at DEF. */
946 :
947 : static void
948 670749 : record_use (struct def *def, df_ref use)
949 : {
950 670749 : struct use *u = XNEW (struct use);
951 :
952 670749 : u->pos = DF_REF_REAL_LOC (use);
953 670749 : u->insn = DF_REF_INSN (use);
954 670749 : u->addr_use_p = (DF_REF_TYPE (use) == DF_REF_REG_MEM_LOAD
955 670749 : || DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE);
956 670749 : u->next = def->uses;
957 670749 : def->uses = u;
958 670749 : def->n_uses++;
959 670749 : if (u->addr_use_p)
960 : {
961 : /* Initialize propagation information if this is the first addr
962 : use of the inv def. */
963 59391 : if (def->n_addr_uses == 0)
964 44760 : def->can_prop_to_addr_uses = true;
965 :
966 59391 : def->n_addr_uses++;
967 59391 : if (def->can_prop_to_addr_uses && !inv_can_prop_to_addr_use (def, use))
968 13677 : def->can_prop_to_addr_uses = false;
969 : }
970 670749 : }
971 :
972 : /* Finds the invariants USE depends on and store them to the DEPENDS_ON
973 : bitmap. Returns true if all dependencies of USE are known to be
974 : loop invariants, false otherwise. */
975 :
976 : static bool
977 7032989 : check_dependency (basic_block bb, df_ref use, bitmap depends_on)
978 : {
979 7032989 : df_ref def;
980 7032989 : basic_block def_bb;
981 7032989 : struct df_link *defs;
982 7032989 : struct def *def_data;
983 7032989 : struct invariant *inv;
984 :
985 7032989 : if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
986 : return false;
987 :
988 6998929 : defs = DF_REF_CHAIN (use);
989 6998929 : if (!defs)
990 : {
991 1290326 : unsigned int regno = DF_REF_REGNO (use);
992 :
993 : /* If this is the use of an uninitialized argument register that is
994 : likely to be spilled, do not move it lest this might extend its
995 : lifetime and cause reload to die. This can occur for a call to
996 : a function taking complex number arguments and moving the insns
997 : preparing the arguments without moving the call itself wouldn't
998 : gain much in practice. */
999 1290326 : if ((DF_REF_FLAGS (use) & DF_HARD_REG_LIVE)
1000 325 : && FUNCTION_ARG_REGNO_P (regno)
1001 1290331 : && targetm.class_likely_spilled_p (REGNO_REG_CLASS (regno)))
1002 : return false;
1003 :
1004 1290325 : return true;
1005 : }
1006 :
1007 5708603 : if (defs->next)
1008 : return false;
1009 :
1010 5181055 : def = defs->ref;
1011 5181055 : check_invariant_table_size ();
1012 5181055 : inv = invariant_table[DF_REF_ID (def)];
1013 5181055 : if (!inv)
1014 : return false;
1015 :
1016 288365 : def_data = inv->def;
1017 288365 : gcc_assert (def_data != NULL);
1018 :
1019 288365 : def_bb = DF_REF_BB (def);
1020 : /* Note that in case bb == def_bb, we know that the definition
1021 : dominates insn, because def has invariant_table[DF_REF_ID(def)]
1022 : defined and we process the insns in the basic block bb
1023 : sequentially. */
1024 288365 : if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb))
1025 : return false;
1026 :
1027 288284 : bitmap_set_bit (depends_on, def_data->invno);
1028 288284 : return true;
1029 : }
1030 :
1031 :
1032 : /* Finds the invariants INSN depends on and store them to the DEPENDS_ON
1033 : bitmap. Returns true if all dependencies of INSN are known to be
1034 : loop invariants, false otherwise. */
1035 :
1036 : static bool
1037 6752836 : check_dependencies (rtx_insn *insn, bitmap depends_on)
1038 : {
1039 6752836 : struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1040 6752836 : df_ref use;
1041 6752836 : basic_block bb = BLOCK_FOR_INSN (insn);
1042 :
1043 8194377 : FOR_EACH_INSN_INFO_USE (use, insn_info)
1044 6895921 : if (!check_dependency (bb, use, depends_on))
1045 : return false;
1046 1435524 : FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
1047 137068 : if (!check_dependency (bb, use, depends_on))
1048 : return false;
1049 :
1050 : return true;
1051 : }
1052 :
1053 : /* Pre-check candidate DEST to skip the one which cannot make a valid insn
1054 : during move_invariant_reg. SIMPLE is to skip HARD_REGISTER. */
1055 : static bool
1056 11341757 : pre_check_invariant_p (bool simple, rtx dest)
1057 : {
1058 11341757 : if (simple && REG_P (dest) && DF_REG_DEF_COUNT (REGNO (dest)) > 1)
1059 : {
1060 2646291 : df_ref use;
1061 2646291 : unsigned int i = REGNO (dest);
1062 2646291 : struct df_insn_info *insn_info;
1063 2646291 : df_ref def_rec;
1064 :
1065 11944261 : for (use = DF_REG_USE_CHAIN (i); use; use = DF_REF_NEXT_REG (use))
1066 : {
1067 11166430 : rtx_insn *ref = DF_REF_INSN (use);
1068 11166430 : insn_info = DF_INSN_INFO_GET (ref);
1069 :
1070 19633619 : FOR_EACH_INSN_INFO_DEF (def_rec, insn_info)
1071 10335649 : if (DF_REF_REGNO (def_rec) == i)
1072 : {
1073 : /* Multi definitions at this stage, most likely are due to
1074 : instruction constraints, which requires both read and write
1075 : on the same register. Since move_invariant_reg is not
1076 : powerful enough to handle such cases, just ignore the INV
1077 : and leave the chance to others. */
1078 : return false;
1079 : }
1080 : }
1081 : }
1082 : return true;
1083 : }
1084 :
1085 : /* Finds invariant in INSN. ALWAYS_REACHED is true if the insn is always
1086 : executed. ALWAYS_EXECUTED is true if the insn is always executed,
1087 : unless the program ends due to a function call. */
1088 :
1089 : static void
1090 15554087 : find_invariant_insn (rtx_insn *insn, bool always_reached, bool always_executed)
1091 : {
1092 15554087 : df_ref ref;
1093 15554087 : struct def *def;
1094 15554087 : bitmap depends_on;
1095 15554087 : rtx set, dest;
1096 15554087 : bool simple = true;
1097 15554087 : struct invariant *inv;
1098 :
1099 : /* Jumps have control flow side-effects. */
1100 15554087 : if (JUMP_P (insn))
1101 : return;
1102 :
1103 13690048 : set = single_set (insn);
1104 13690048 : if (!set)
1105 : return;
1106 13326047 : dest = SET_DEST (set);
1107 :
1108 13326047 : if (!REG_P (dest)
1109 13326047 : || HARD_REGISTER_P (dest))
1110 : simple = false;
1111 :
1112 13326047 : if (!may_assign_reg_p (dest)
1113 11341757 : || !pre_check_invariant_p (simple, dest)
1114 22799344 : || !check_maybe_invariant (SET_SRC (set)))
1115 : return;
1116 :
1117 : /* If the insn can throw exception, we cannot move it at all without changing
1118 : cfg. */
1119 7416671 : if (can_throw_internal (insn))
1120 : return;
1121 :
1122 : /* We cannot make trapping insn executed, unless it was executed before. */
1123 7409885 : if (may_trap_or_fault_p (PATTERN (insn)) && !always_reached)
1124 : return;
1125 :
1126 : /* inline-asm that is not always executed cannot be moved
1127 : as it might conditionally trap. */
1128 6752881 : if (!always_reached && asm_noperands (PATTERN (insn)) >= 0)
1129 : return;
1130 :
1131 6752836 : depends_on = BITMAP_ALLOC (NULL);
1132 6752836 : if (!check_dependencies (insn, depends_on))
1133 : {
1134 5454380 : BITMAP_FREE (depends_on);
1135 5454380 : return;
1136 : }
1137 :
1138 1298456 : if (simple)
1139 616034 : def = XCNEW (struct def);
1140 : else
1141 : def = NULL;
1142 :
1143 1298456 : inv = create_new_invariant (def, insn, depends_on, always_executed);
1144 :
1145 1298456 : if (simple)
1146 : {
1147 616034 : ref = df_find_def (insn, dest);
1148 616034 : check_invariant_table_size ();
1149 616034 : invariant_table[DF_REF_ID (ref)] = inv;
1150 : }
1151 : }
1152 :
1153 : /* Record registers used in INSN that have a unique invariant definition. */
1154 :
1155 : static void
1156 15554087 : record_uses (rtx_insn *insn)
1157 : {
1158 15554087 : struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1159 15554087 : df_ref use;
1160 15554087 : struct invariant *inv;
1161 :
1162 37636395 : FOR_EACH_INSN_INFO_USE (use, insn_info)
1163 : {
1164 22082308 : inv = invariant_for_use (use);
1165 22082308 : if (inv)
1166 669950 : record_use (inv->def, use);
1167 : }
1168 15994744 : FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
1169 : {
1170 440657 : inv = invariant_for_use (use);
1171 440657 : if (inv)
1172 799 : record_use (inv->def, use);
1173 : }
1174 15554087 : }
1175 :
1176 : /* Finds invariants in INSN. ALWAYS_REACHED is true if the insn is always
1177 : executed. ALWAYS_EXECUTED is true if the insn is always executed,
1178 : unless the program ends due to a function call. */
1179 :
1180 : static void
1181 15554087 : find_invariants_insn (rtx_insn *insn, bool always_reached, bool always_executed)
1182 : {
1183 0 : find_invariant_insn (insn, always_reached, always_executed);
1184 15554087 : record_uses (insn);
1185 0 : }
1186 :
1187 : /* Finds invariants in basic block BB. ALWAYS_REACHED is true if the
1188 : basic block is always executed. ALWAYS_EXECUTED is true if the basic
1189 : block is always executed, unless the program ends due to a function
1190 : call. */
1191 :
1192 : static void
1193 4090230 : find_invariants_bb (class loop *loop, basic_block bb, bool always_reached,
1194 : bool always_executed)
1195 : {
1196 4090230 : rtx_insn *insn;
1197 4090230 : basic_block preheader = loop_preheader_edge (loop)->src;
1198 :
1199 : /* Don't move insn of cold BB out of loop to preheader to reduce calculations
1200 : and register live range in hot loop with cold BB. */
1201 4090230 : if (!always_executed && preheader->count > bb->count)
1202 : {
1203 686520 : if (dump_file)
1204 2 : fprintf (dump_file, "Don't move invariant from bb: %d out of loop %d\n",
1205 : bb->index, loop->num);
1206 686520 : return;
1207 : }
1208 :
1209 38494133 : FOR_BB_INSNS (bb, insn)
1210 : {
1211 35090423 : if (!NONDEBUG_INSN_P (insn))
1212 19536336 : continue;
1213 :
1214 15554087 : find_invariants_insn (insn, always_reached, always_executed);
1215 :
1216 15554087 : if (always_reached
1217 4043533 : && CALL_P (insn)
1218 15671434 : && (RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
1219 116628 : || ! RTL_CONST_OR_PURE_CALL_P (insn)))
1220 : always_reached = false;
1221 : }
1222 : }
1223 :
1224 : /* Finds invariants in LOOP with body BODY. ALWAYS_REACHED is the bitmap of
1225 : basic blocks in BODY that are always executed. ALWAYS_EXECUTED is the
1226 : bitmap of basic blocks in BODY that are always executed unless the program
1227 : ends due to a function call. */
1228 :
1229 : static void
1230 658728 : find_invariants_body (class loop *loop, basic_block *body,
1231 : bitmap always_reached, bitmap always_executed)
1232 : {
1233 658728 : unsigned i;
1234 :
1235 4748958 : for (i = 0; i < loop->num_nodes; i++)
1236 4090230 : find_invariants_bb (loop, body[i], bitmap_bit_p (always_reached, i),
1237 4090230 : bitmap_bit_p (always_executed, i));
1238 658728 : }
1239 :
1240 : /* Finds invariants in LOOP. */
1241 :
1242 : static void
1243 658728 : find_invariants (class loop *loop)
1244 : {
1245 658728 : auto_bitmap may_exit;
1246 658728 : auto_bitmap always_reached;
1247 658728 : auto_bitmap has_exit;
1248 658728 : auto_bitmap always_executed;
1249 658728 : basic_block *body = get_loop_body_in_dom_order (loop);
1250 :
1251 658728 : find_exits (loop, body, may_exit, has_exit);
1252 658728 : compute_always_reached (loop, body, may_exit, always_reached);
1253 658728 : compute_always_reached (loop, body, has_exit, always_executed);
1254 :
1255 658728 : find_defs (loop);
1256 658728 : find_invariants_body (loop, body, always_reached, always_executed);
1257 658728 : merge_identical_invariants ();
1258 :
1259 658728 : free (body);
1260 658728 : }
1261 :
1262 : /* Frees a list of uses USE. */
1263 :
1264 : static void
1265 616034 : free_use_list (struct use *use)
1266 : {
1267 616034 : struct use *next;
1268 :
1269 1286783 : for (; use; use = next)
1270 : {
1271 670749 : next = use->next;
1272 670749 : free (use);
1273 : }
1274 0 : }
1275 :
1276 : /* Return pressure class and number of hard registers (through *NREGS)
1277 : for destination of INSN. */
1278 : static enum reg_class
1279 15 : get_pressure_class_and_nregs (rtx_insn *insn, int *nregs)
1280 : {
1281 15 : rtx reg;
1282 15 : enum reg_class pressure_class;
1283 15 : rtx set = single_set (insn);
1284 :
1285 : /* Considered invariant insns have only one set. */
1286 15 : gcc_assert (set != NULL_RTX);
1287 15 : reg = SET_DEST (set);
1288 15 : if (GET_CODE (reg) == SUBREG)
1289 0 : reg = SUBREG_REG (reg);
1290 15 : if (MEM_P (reg))
1291 : {
1292 0 : *nregs = 0;
1293 0 : pressure_class = NO_REGS;
1294 : }
1295 : else
1296 : {
1297 15 : if (! REG_P (reg))
1298 : reg = NULL_RTX;
1299 15 : if (reg == NULL_RTX)
1300 : pressure_class = GENERAL_REGS;
1301 : else
1302 : {
1303 15 : pressure_class = reg_allocno_class (REGNO (reg));
1304 15 : pressure_class = ira_pressure_class_translate[pressure_class];
1305 : }
1306 15 : *nregs
1307 15 : = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))];
1308 : }
1309 15 : return pressure_class;
1310 : }
1311 :
1312 : /* Calculates cost and number of registers needed for moving invariant INV
1313 : out of the loop and stores them to *COST and *REGS_NEEDED. *CL will be
1314 : the REG_CLASS of INV. Return
1315 : -1: if INV is invalid.
1316 : 0: if INV and its depends_on have same reg_class
1317 : 1: if INV and its depends_on have different reg_classes. */
1318 :
1319 : static int
1320 2560380 : get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed,
1321 : enum reg_class *cl)
1322 : {
1323 2560380 : int i, acomp_cost;
1324 2560380 : unsigned aregs_needed[N_REG_CLASSES];
1325 2560380 : unsigned depno;
1326 2560380 : struct invariant *dep;
1327 2560380 : bitmap_iterator bi;
1328 2560380 : int ret = 1;
1329 :
1330 : /* Find the representative of the class of the equivalent invariants. */
1331 2560380 : inv = invariants[inv->eqto];
1332 :
1333 2560380 : *comp_cost = 0;
1334 2560380 : if (! flag_ira_loop_pressure)
1335 2560365 : regs_needed[0] = 0;
1336 : else
1337 : {
1338 75 : for (i = 0; i < ira_pressure_classes_num; i++)
1339 60 : regs_needed[ira_pressure_classes[i]] = 0;
1340 : }
1341 :
1342 2560380 : if (inv->move
1343 2557938 : || inv->stamp == actual_stamp)
1344 : return -1;
1345 2556711 : inv->stamp = actual_stamp;
1346 :
1347 2556711 : if (! flag_ira_loop_pressure)
1348 2556696 : regs_needed[0]++;
1349 : else
1350 : {
1351 15 : int nregs;
1352 15 : enum reg_class pressure_class;
1353 :
1354 15 : pressure_class = get_pressure_class_and_nregs (inv->insn, &nregs);
1355 15 : regs_needed[pressure_class] += nregs;
1356 15 : *cl = pressure_class;
1357 15 : ret = 0;
1358 : }
1359 :
1360 2556711 : if (!inv->cheap_address
1361 1016397 : || inv->def->n_uses == 0
1362 809007 : || inv->def->n_addr_uses < inv->def->n_uses
1363 : /* Count cost if the inv can't be propagated into address uses. */
1364 70266 : || !inv->def->can_prop_to_addr_uses)
1365 2494981 : (*comp_cost) += inv->cost * inv->eqno;
1366 :
1367 : #ifdef STACK_REGS
1368 2556711 : {
1369 : /* Hoisting constant pool constants into stack regs may cost more than
1370 : just single register. On x87, the balance is affected both by the
1371 : small number of FP registers, and by its register stack organization,
1372 : that forces us to add compensation code in and around the loop to
1373 : shuffle the operands to the top of stack before use, and pop them
1374 : from the stack after the loop finishes.
1375 :
1376 : To model this effect, we increase the number of registers needed for
1377 : stack registers by two: one register push, and one register pop.
1378 : This usually has the effect that FP constant loads from the constant
1379 : pool are not moved out of the loop.
1380 :
1381 : Note that this also means that dependent invariants cannot be moved.
1382 : However, the primary purpose of this pass is to move loop invariant
1383 : address arithmetic out of loops, and address arithmetic that depends
1384 : on floating point constants is unlikely to ever occur. */
1385 2556711 : rtx set = single_set (inv->insn);
1386 2556711 : if (set
1387 2556711 : && IS_STACK_MODE (GET_MODE (SET_SRC (set)))
1388 2568370 : && constant_pool_constant_p (SET_SRC (set)))
1389 : {
1390 7487 : if (flag_ira_loop_pressure)
1391 0 : regs_needed[ira_stack_reg_pressure_class] += 2;
1392 : else
1393 7487 : regs_needed[0] += 2;
1394 : }
1395 : }
1396 : #endif
1397 :
1398 3174665 : EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, depno, bi)
1399 : {
1400 617954 : bool check_p;
1401 617954 : enum reg_class dep_cl = ALL_REGS;
1402 617954 : int dep_ret;
1403 :
1404 617954 : dep = invariants[depno];
1405 :
1406 : /* If DEP is moved out of the loop, it is not a depends_on any more. */
1407 617954 : if (dep->move)
1408 95796 : continue;
1409 :
1410 522158 : dep_ret = get_inv_cost (dep, &acomp_cost, aregs_needed, &dep_cl);
1411 :
1412 522158 : if (! flag_ira_loop_pressure)
1413 522158 : check_p = aregs_needed[0] != 0;
1414 : else
1415 : {
1416 0 : for (i = 0; i < ira_pressure_classes_num; i++)
1417 0 : if (aregs_needed[ira_pressure_classes[i]] != 0)
1418 : break;
1419 0 : check_p = i < ira_pressure_classes_num;
1420 :
1421 0 : if ((dep_ret == 1) || ((dep_ret == 0) && (*cl != dep_cl)))
1422 : {
1423 0 : *cl = ALL_REGS;
1424 0 : ret = 1;
1425 : }
1426 : }
1427 522158 : if (check_p
1428 : /* We need to check always_executed, since if the original value of
1429 : the invariant may be preserved, we may need to keep it in a
1430 : separate register. TODO check whether the register has an
1431 : use outside of the loop. */
1432 518489 : && dep->always_executed
1433 155662 : && !dep->def->uses->next)
1434 : {
1435 : /* If this is a single use, after moving the dependency we will not
1436 : need a new register. */
1437 94311 : if (! flag_ira_loop_pressure)
1438 94311 : aregs_needed[0]--;
1439 : else
1440 : {
1441 0 : int nregs;
1442 0 : enum reg_class pressure_class;
1443 :
1444 0 : pressure_class = get_pressure_class_and_nregs (inv->insn, &nregs);
1445 0 : aregs_needed[pressure_class] -= nregs;
1446 : }
1447 : }
1448 :
1449 522158 : if (! flag_ira_loop_pressure)
1450 522158 : regs_needed[0] += aregs_needed[0];
1451 : else
1452 : {
1453 0 : for (i = 0; i < ira_pressure_classes_num; i++)
1454 0 : regs_needed[ira_pressure_classes[i]]
1455 0 : += aregs_needed[ira_pressure_classes[i]];
1456 : }
1457 522158 : (*comp_cost) += acomp_cost;
1458 : }
1459 : return ret;
1460 : }
1461 :
1462 : /* Calculates gain for eliminating invariant INV. REGS_USED is the number
1463 : of registers used in the loop, NEW_REGS is the number of new variables
1464 : already added due to the invariant motion. The number of registers needed
1465 : for it is stored in *REGS_NEEDED. SPEED and CALL_P are flags passed
1466 : through to estimate_reg_pressure_cost. */
1467 :
1468 : static int
1469 2038222 : gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
1470 : unsigned *new_regs, unsigned regs_used,
1471 : bool speed, bool call_p)
1472 : {
1473 2038222 : int comp_cost, size_cost;
1474 : /* Workaround -Wmaybe-uninitialized false positive during
1475 : profiledbootstrap by initializing it. */
1476 2038222 : enum reg_class cl = NO_REGS;
1477 2038222 : int ret;
1478 :
1479 2038222 : actual_stamp++;
1480 :
1481 2038222 : ret = get_inv_cost (inv, &comp_cost, regs_needed, &cl);
1482 :
1483 2038222 : if (! flag_ira_loop_pressure)
1484 : {
1485 2038207 : size_cost = (estimate_reg_pressure_cost (new_regs[0] + regs_needed[0],
1486 : regs_used, speed, call_p)
1487 2038207 : - estimate_reg_pressure_cost (new_regs[0],
1488 : regs_used, speed, call_p));
1489 : }
1490 15 : else if (ret < 0)
1491 : return -1;
1492 15 : else if ((ret == 0) && (cl == NO_REGS))
1493 : /* Hoist it anyway since it does not impact register pressure. */
1494 : return 1;
1495 : else
1496 : {
1497 : int i;
1498 : enum reg_class pressure_class;
1499 :
1500 19 : for (i = 0; i < ira_pressure_classes_num; i++)
1501 : {
1502 18 : pressure_class = ira_pressure_classes[i];
1503 :
1504 18 : if (!reg_classes_intersect_p (pressure_class, cl))
1505 3 : continue;
1506 :
1507 15 : if ((int) new_regs[pressure_class]
1508 15 : + (int) regs_needed[pressure_class]
1509 15 : + LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
1510 15 : + param_ira_loop_reserved_regs
1511 15 : > ira_class_hard_regs_num[pressure_class])
1512 : break;
1513 : }
1514 15 : if (i < ira_pressure_classes_num)
1515 : /* There will be register pressure excess and we want not to
1516 : make this loop invariant motion. All loop invariants with
1517 : non-positive gains will be rejected in function
1518 : find_invariants_to_move. Therefore we return the negative
1519 : number here.
1520 :
1521 : One could think that this rejects also expensive loop
1522 : invariant motions and this will hurt code performance.
1523 : However numerous experiments with different heuristics
1524 : taking invariant cost into account did not confirm this
1525 : assumption. There are possible explanations for this
1526 : result:
1527 : o probably all expensive invariants were already moved out
1528 : of the loop by PRE and gimple invariant motion pass.
1529 : o expensive invariant execution will be hidden by insn
1530 : scheduling or OOO processor hardware because usually such
1531 : invariants have a lot of freedom to be executed
1532 : out-of-order.
1533 : Another reason for ignoring invariant cost vs spilling cost
1534 : heuristics is also in difficulties to evaluate accurately
1535 : spill cost at this stage. */
1536 : return -1;
1537 : else
1538 : size_cost = 0;
1539 : }
1540 :
1541 2038208 : return comp_cost - size_cost;
1542 : }
1543 :
1544 : /* Finds invariant with best gain for moving. Returns the gain, stores
1545 : the invariant in *BEST and number of registers needed for it to
1546 : *REGS_NEEDED. REGS_USED is the number of registers used in the loop.
1547 : NEW_REGS is the number of new variables already added due to invariant
1548 : motion. */
1549 :
1550 : static int
1551 488802 : best_gain_for_invariant (struct invariant **best, unsigned *regs_needed,
1552 : unsigned *new_regs, unsigned regs_used,
1553 : bool speed, bool call_p)
1554 : {
1555 488802 : struct invariant *inv;
1556 488802 : int i, gain = 0, again;
1557 488802 : unsigned aregs_needed[N_REG_CLASSES], invno;
1558 :
1559 4250009 : FOR_EACH_VEC_ELT (invariants, invno, inv)
1560 : {
1561 3761207 : if (inv->move)
1562 631187 : continue;
1563 :
1564 : /* Only consider the "representatives" of equivalent invariants. */
1565 3130020 : if (inv->eqto != inv->invno)
1566 1091798 : continue;
1567 :
1568 2038222 : again = gain_for_invariant (inv, aregs_needed, new_regs, regs_used,
1569 : speed, call_p);
1570 2038222 : if (again > gain)
1571 : {
1572 346731 : gain = again;
1573 346731 : *best = inv;
1574 346731 : if (! flag_ira_loop_pressure)
1575 346730 : regs_needed[0] = aregs_needed[0];
1576 : else
1577 : {
1578 5 : for (i = 0; i < ira_pressure_classes_num; i++)
1579 4 : regs_needed[ira_pressure_classes[i]]
1580 4 : = aregs_needed[ira_pressure_classes[i]];
1581 : }
1582 : }
1583 : }
1584 :
1585 488802 : return gain;
1586 : }
1587 :
1588 : /* Marks invariant INVNO and all its dependencies for moving. */
1589 :
1590 : static void
1591 332638 : set_move_mark (unsigned invno, int gain)
1592 : {
1593 332638 : struct invariant *inv = invariants[invno];
1594 332638 : bitmap_iterator bi;
1595 :
1596 : /* Find the representative of the class of the equivalent invariants. */
1597 332638 : inv = invariants[inv->eqto];
1598 :
1599 332638 : if (inv->move)
1600 4303 : return;
1601 328335 : inv->move = true;
1602 :
1603 328335 : if (dump_file)
1604 : {
1605 3 : if (gain >= 0)
1606 3 : fprintf (dump_file, "Decided to move invariant %d -- gain %d\n",
1607 : invno, gain);
1608 : else
1609 0 : fprintf (dump_file, "Decided to move dependent invariant %d\n",
1610 : invno);
1611 328335 : };
1612 :
1613 410465 : EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, invno, bi)
1614 : {
1615 82130 : set_move_mark (invno, -1);
1616 : }
1617 : }
1618 :
1619 : /* Determines which invariants to move. */
1620 :
1621 : static void
1622 658728 : find_invariants_to_move (bool speed, bool call_p)
1623 : {
1624 658728 : int gain;
1625 658728 : unsigned i, regs_used, regs_needed[N_REG_CLASSES], new_regs[N_REG_CLASSES];
1626 658728 : struct invariant *inv = NULL;
1627 :
1628 658728 : if (!invariants.length ())
1629 420434 : return;
1630 :
1631 238294 : if (flag_ira_loop_pressure)
1632 : /* REGS_USED is actually never used when the flag is on. */
1633 : regs_used = 0;
1634 : else
1635 : /* We do not really do a good job in estimating number of
1636 : registers used; we put some initial bound here to stand for
1637 : induction variables etc. that we do not detect. */
1638 : {
1639 238293 : unsigned int n_regs = DF_REG_SIZE (df);
1640 :
1641 238293 : regs_used = 2;
1642 :
1643 136225035 : for (i = 0; i < n_regs; i++)
1644 : {
1645 135986742 : if (!DF_REGNO_FIRST_DEF (i) && DF_REGNO_LAST_USE (i))
1646 : {
1647 : /* This is a value that is used but not changed inside loop. */
1648 0 : regs_used++;
1649 : }
1650 : }
1651 : }
1652 :
1653 238294 : if (! flag_ira_loop_pressure)
1654 238293 : new_regs[0] = regs_needed[0] = 0;
1655 : else
1656 : {
1657 5 : for (i = 0; (int) i < ira_pressure_classes_num; i++)
1658 4 : new_regs[ira_pressure_classes[i]] = 0;
1659 : }
1660 977604 : while ((gain = best_gain_for_invariant (&inv, regs_needed,
1661 : new_regs, regs_used,
1662 488802 : speed, call_p)) > 0)
1663 : {
1664 250508 : set_move_mark (inv->invno, gain);
1665 250508 : if (! flag_ira_loop_pressure)
1666 250507 : new_regs[0] += regs_needed[0];
1667 : else
1668 : {
1669 5 : for (i = 0; (int) i < ira_pressure_classes_num; i++)
1670 4 : new_regs[ira_pressure_classes[i]]
1671 4 : += regs_needed[ira_pressure_classes[i]];
1672 : }
1673 : }
1674 : }
1675 :
1676 : /* Replace the uses, reached by the definition of invariant INV, by REG.
1677 :
1678 : IN_GROUP is nonzero if this is part of a group of changes that must be
1679 : performed as a group. In that case, the changes will be stored. The
1680 : function `apply_change_group' will validate and apply the changes. */
1681 :
1682 : static int
1683 244119 : replace_uses (struct invariant *inv, rtx reg, bool in_group)
1684 : {
1685 : /* Replace the uses we know to be dominated. It saves work for copy
1686 : propagation, and also it is necessary so that dependent invariants
1687 : are computed right. */
1688 244119 : if (inv->def)
1689 : {
1690 238064 : struct use *use;
1691 465520 : for (use = inv->def->uses; use; use = use->next)
1692 227456 : validate_change (use->insn, use->pos, reg, true);
1693 :
1694 : /* If we aren't part of a larger group, apply the changes now. */
1695 238064 : if (!in_group)
1696 44228 : return apply_change_group ();
1697 : }
1698 :
1699 : return 1;
1700 : }
1701 :
1702 : /* Whether invariant INV setting REG can be moved out of LOOP, at the end of
1703 : the block preceding its header. */
1704 :
1705 : static bool
1706 328335 : can_move_invariant_reg (class loop *loop, struct invariant *inv, rtx reg)
1707 : {
1708 328335 : df_ref def, use;
1709 328335 : unsigned int dest_regno, defs_in_loop_count = 0;
1710 328335 : rtx_insn *insn = inv->insn;
1711 328335 : basic_block bb = BLOCK_FOR_INSN (inv->insn);
1712 328335 : auto_vec <rtx_insn *, 16> debug_insns_to_reset;
1713 :
1714 : /* We ignore hard register and memory access for cost and complexity reasons.
1715 : Hard register are few at this stage and expensive to consider as they
1716 : require building a separate data flow. Memory access would require using
1717 : df_simulate_* and can_move_insns_across functions and is more complex. */
1718 328335 : if (!REG_P (reg) || HARD_REGISTER_P (reg))
1719 : return false;
1720 :
1721 : /* Check whether the set is always executed. We could omit this condition if
1722 : we know that the register is unused outside of the loop, but it does not
1723 : seem worth finding out. */
1724 327528 : if (!inv->always_executed)
1725 : return false;
1726 :
1727 : /* Check that all uses that would be dominated by def are already dominated
1728 : by it. */
1729 140342 : dest_regno = REGNO (reg);
1730 358168 : for (use = DF_REG_USE_CHAIN (dest_regno); use; use = DF_REF_NEXT_REG (use))
1731 : {
1732 219057 : rtx_insn *use_insn;
1733 219057 : basic_block use_bb;
1734 :
1735 219057 : use_insn = DF_REF_INSN (use);
1736 219057 : use_bb = BLOCK_FOR_INSN (use_insn);
1737 :
1738 : /* Ignore instruction considered for moving. */
1739 219057 : if (use_insn == insn)
1740 11696 : continue;
1741 :
1742 : /* Don't consider uses outside loop. */
1743 219057 : if (!flow_bb_inside_loop_p (loop, use_bb))
1744 11696 : continue;
1745 :
1746 : /* Don't move if a use is not dominated by def in insn. */
1747 143024 : if ((use_bb == bb && DF_INSN_LUID (insn) >= DF_INSN_LUID (use_insn))
1748 349391 : || !dominated_by_p (CDI_DOMINATORS, use_bb, bb))
1749 : {
1750 1233 : if (!DEBUG_INSN_P (use_insn))
1751 1231 : return false;
1752 2 : debug_insns_to_reset.safe_push (use_insn);
1753 : }
1754 : }
1755 :
1756 : /* Check for other defs. Any other def in the loop might reach a use
1757 : currently reached by the def in insn. */
1758 279032 : for (def = DF_REG_DEF_CHAIN (dest_regno); def; def = DF_REF_NEXT_REG (def))
1759 : {
1760 145340 : basic_block def_bb = DF_REF_BB (def);
1761 :
1762 : /* Defs in exit block cannot reach a use they weren't already. */
1763 145340 : if (single_succ_p (def_bb))
1764 : {
1765 40542 : basic_block def_bb_succ;
1766 :
1767 40542 : def_bb_succ = single_succ (def_bb);
1768 40542 : if (!flow_bb_inside_loop_p (loop, def_bb_succ))
1769 810 : continue;
1770 : }
1771 :
1772 144530 : if (++defs_in_loop_count > 1)
1773 : return false;
1774 : }
1775 :
1776 : /* Reset debug uses if a use is not dominated by def in insn. */
1777 401078 : for (auto use_insn : debug_insns_to_reset)
1778 : {
1779 2 : INSN_VAR_LOCATION_LOC (use_insn) = gen_rtx_UNKNOWN_VAR_LOC ();
1780 2 : df_insn_rescan (use_insn);
1781 : }
1782 :
1783 : return true;
1784 328335 : }
1785 :
1786 : /* Move invariant INVNO out of the LOOP. Returns true if this succeeds, false
1787 : otherwise. */
1788 :
1789 : static bool
1790 1430062 : move_invariant_reg (class loop *loop, unsigned invno)
1791 : {
1792 1430062 : struct invariant *inv = invariants[invno];
1793 1430062 : struct invariant *repr = invariants[inv->eqto];
1794 1430062 : unsigned i;
1795 1430062 : basic_block preheader = loop_preheader_edge (loop)->src;
1796 1430062 : rtx reg, set, dest, note;
1797 1430062 : bitmap_iterator bi;
1798 1430062 : int regno = -1;
1799 :
1800 1430062 : if (inv->reg)
1801 : return true;
1802 1298456 : if (!repr->move)
1803 : return false;
1804 :
1805 : /* If this is a representative of the class of equivalent invariants,
1806 : really move the invariant. Otherwise just replace its use with
1807 : the register used for the representative. */
1808 377811 : if (inv == repr)
1809 : {
1810 328335 : if (inv->depends_on)
1811 : {
1812 410465 : EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, i, bi)
1813 : {
1814 82130 : if (!move_invariant_reg (loop, i))
1815 0 : goto fail;
1816 : }
1817 : }
1818 :
1819 : /* If possible, just move the set out of the loop. Otherwise, we
1820 : need to create a temporary register. */
1821 328335 : set = single_set (inv->insn);
1822 328335 : reg = dest = SET_DEST (set);
1823 328335 : if (GET_CODE (reg) == SUBREG)
1824 36 : reg = SUBREG_REG (reg);
1825 328335 : if (REG_P (reg))
1826 328204 : regno = REGNO (reg);
1827 :
1828 328335 : if (!can_move_invariant_reg (loop, inv, dest))
1829 : {
1830 194643 : reg = gen_reg_rtx_and_attrs (dest);
1831 :
1832 : /* Try replacing the destination by a new pseudoregister. */
1833 194643 : validate_change (inv->insn, &SET_DEST (set), reg, true);
1834 :
1835 : /* As well as all the dominated uses. */
1836 194643 : replace_uses (inv, reg, true);
1837 :
1838 : /* And validate all the changes. */
1839 194643 : if (!apply_change_group ())
1840 39 : goto fail;
1841 :
1842 194604 : emit_insn_after (gen_move_insn (dest, reg), inv->insn);
1843 : }
1844 133692 : else if (dump_file)
1845 1 : fprintf (dump_file, "Invariant %d moved without introducing a new "
1846 : "temporary register\n", invno);
1847 328296 : if (JUMP_P (BB_END (preheader)))
1848 6 : preheader = split_edge (loop_preheader_edge (loop));
1849 328296 : reorder_insns (inv->insn, inv->insn, BB_END (preheader));
1850 328296 : df_recompute_luids (preheader);
1851 :
1852 : /* If there is a REG_EQUAL note on the insn we just moved, and the
1853 : insn is in a basic block that is not always executed or the note
1854 : contains something for which we don't know the invariant status,
1855 : the note may no longer be valid after we move the insn. Note that
1856 : uses in REG_EQUAL notes are taken into account in the computation
1857 : of invariants, so it is safe to retain the note even if it contains
1858 : register references for which we know the invariant status. */
1859 328296 : if ((note = find_reg_note (inv->insn, REG_EQUAL, NULL_RTX))
1860 328296 : && (!inv->always_executed
1861 35692 : || !check_maybe_invariant (XEXP (note, 0))))
1862 26487 : remove_note (inv->insn, note);
1863 : }
1864 : else
1865 : {
1866 49476 : if (!move_invariant_reg (loop, repr->invno))
1867 0 : goto fail;
1868 49476 : reg = repr->reg;
1869 49476 : regno = repr->orig_regno;
1870 49476 : if (!replace_uses (inv, reg, false))
1871 0 : goto fail;
1872 49476 : set = single_set (inv->insn);
1873 49476 : emit_insn_after (gen_move_insn (SET_DEST (set), reg), inv->insn);
1874 49476 : delete_insn (inv->insn);
1875 : }
1876 :
1877 377772 : inv->reg = reg;
1878 377772 : inv->orig_regno = regno;
1879 :
1880 377772 : return true;
1881 :
1882 39 : fail:
1883 : /* If we failed, clear move flag, so that we do not try to move inv
1884 : again. */
1885 39 : if (dump_file)
1886 0 : fprintf (dump_file, "Failed to move invariant %d\n", invno);
1887 39 : inv->move = false;
1888 39 : inv->reg = NULL_RTX;
1889 39 : inv->orig_regno = -1;
1890 :
1891 39 : return false;
1892 : }
1893 :
1894 : /* Move selected invariant out of the LOOP. Newly created regs are marked
1895 : in TEMPORARY_REGS. */
1896 :
1897 : static void
1898 658728 : move_invariants (class loop *loop)
1899 : {
1900 658728 : struct invariant *inv;
1901 658728 : unsigned i;
1902 :
1903 1957184 : FOR_EACH_VEC_ELT (invariants, i, inv)
1904 1298456 : move_invariant_reg (loop, i);
1905 658728 : if (flag_ira_loop_pressure && resize_reg_info ())
1906 : {
1907 9 : FOR_EACH_VEC_ELT (invariants, i, inv)
1908 8 : if (inv->reg != NULL_RTX)
1909 : {
1910 1 : if (inv->orig_regno >= 0)
1911 1 : setup_reg_classes (REGNO (inv->reg),
1912 : reg_preferred_class (inv->orig_regno),
1913 : reg_alternate_class (inv->orig_regno),
1914 : reg_allocno_class (inv->orig_regno));
1915 : else
1916 0 : setup_reg_classes (REGNO (inv->reg),
1917 : GENERAL_REGS, NO_REGS, GENERAL_REGS);
1918 : }
1919 : }
1920 : /* Remove the DF_UD_CHAIN problem added in find_defs before rescanning,
1921 : to save a bit of compile time. */
1922 658728 : df_remove_problem (df_chain);
1923 658728 : df_process_deferred_rescans ();
1924 658728 : }
1925 :
1926 : /* Initializes invariant motion data. */
1927 :
1928 : static void
1929 658728 : init_inv_motion_data (void)
1930 : {
1931 658728 : actual_stamp = 1;
1932 :
1933 658728 : invariants.create (100);
1934 658728 : }
1935 :
1936 : /* Frees the data allocated by invariant motion. */
1937 :
1938 : static void
1939 658728 : free_inv_motion_data (void)
1940 : {
1941 658728 : unsigned i;
1942 658728 : struct def *def;
1943 658728 : struct invariant *inv;
1944 :
1945 658728 : check_invariant_table_size ();
1946 81028576 : for (i = 0; i < DF_DEFS_TABLE_SIZE (); i++)
1947 : {
1948 79711120 : inv = invariant_table[i];
1949 79711120 : if (inv)
1950 : {
1951 616034 : def = inv->def;
1952 616034 : gcc_assert (def != NULL);
1953 :
1954 616034 : free_use_list (def->uses);
1955 616034 : free (def);
1956 616034 : invariant_table[i] = NULL;
1957 : }
1958 : }
1959 :
1960 1957184 : FOR_EACH_VEC_ELT (invariants, i, inv)
1961 : {
1962 1298456 : BITMAP_FREE (inv->depends_on);
1963 1298456 : free (inv);
1964 : }
1965 658728 : invariants.release ();
1966 658728 : }
1967 :
1968 : /* Move the invariants out of the LOOP. */
1969 :
1970 : static void
1971 658728 : move_single_loop_invariants (class loop *loop)
1972 : {
1973 658728 : init_inv_motion_data ();
1974 :
1975 658728 : find_invariants (loop);
1976 658728 : find_invariants_to_move (optimize_loop_for_speed_p (loop),
1977 658728 : LOOP_DATA (loop)->has_call);
1978 658728 : move_invariants (loop);
1979 :
1980 658728 : free_inv_motion_data ();
1981 658728 : }
1982 :
1983 : /* Releases the auxiliary data for LOOP. */
1984 :
1985 : static void
1986 658728 : free_loop_data (class loop *loop)
1987 : {
1988 658728 : class loop_data *data = LOOP_DATA (loop);
1989 658728 : if (!data)
1990 : return;
1991 :
1992 658728 : bitmap_clear (&LOOP_DATA (loop)->regs_ref);
1993 658728 : bitmap_clear (&LOOP_DATA (loop)->regs_live);
1994 658728 : free (data);
1995 658728 : loop->aux = NULL;
1996 : }
1997 :
1998 :
1999 :
2000 : /* Registers currently living. */
2001 : static bitmap_head curr_regs_live;
2002 :
2003 : /* Current reg pressure for each pressure class. */
2004 : static int curr_reg_pressure[N_REG_CLASSES];
2005 :
2006 : /* Record all regs that are set in any one insn. Communication from
2007 : mark_reg_{store,clobber} and global_conflicts. Asm can refer to
2008 : all hard-registers. */
2009 : static rtx regs_set[(FIRST_PSEUDO_REGISTER > MAX_RECOG_OPERANDS
2010 : ? FIRST_PSEUDO_REGISTER : MAX_RECOG_OPERANDS) * 2];
2011 : /* Number of regs stored in the previous array. */
2012 : static int n_regs_set;
2013 :
2014 : /* Return pressure class and number of needed hard registers (through
2015 : *NREGS) of register REGNO. */
2016 : static enum reg_class
2017 137 : get_regno_pressure_class (int regno, int *nregs)
2018 : {
2019 137 : if (regno >= FIRST_PSEUDO_REGISTER)
2020 : {
2021 82 : enum reg_class pressure_class;
2022 :
2023 82 : pressure_class = reg_allocno_class (regno);
2024 82 : pressure_class = ira_pressure_class_translate[pressure_class];
2025 82 : *nregs
2026 82 : = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
2027 82 : return pressure_class;
2028 : }
2029 55 : else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
2030 55 : && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
2031 : {
2032 12 : *nregs = 1;
2033 12 : return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
2034 : }
2035 : else
2036 : {
2037 43 : *nregs = 0;
2038 43 : return NO_REGS;
2039 : }
2040 : }
2041 :
2042 : /* Increase (if INCR_P) or decrease current register pressure for
2043 : register REGNO. */
2044 : static void
2045 134 : change_pressure (int regno, bool incr_p)
2046 : {
2047 134 : int nregs;
2048 134 : enum reg_class pressure_class;
2049 :
2050 134 : pressure_class = get_regno_pressure_class (regno, &nregs);
2051 134 : if (! incr_p)
2052 32 : curr_reg_pressure[pressure_class] -= nregs;
2053 : else
2054 : {
2055 102 : curr_reg_pressure[pressure_class] += nregs;
2056 102 : if (LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
2057 : < curr_reg_pressure[pressure_class])
2058 20 : LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
2059 20 : = curr_reg_pressure[pressure_class];
2060 : }
2061 134 : }
2062 :
2063 : /* Mark REGNO birth. */
2064 : static void
2065 48 : mark_regno_live (int regno)
2066 : {
2067 48 : class loop *loop;
2068 :
2069 48 : for (loop = curr_loop;
2070 96 : loop != current_loops->tree_root;
2071 48 : loop = loop_outer (loop))
2072 48 : bitmap_set_bit (&LOOP_DATA (loop)->regs_live, regno);
2073 48 : if (!bitmap_set_bit (&curr_regs_live, regno))
2074 : return;
2075 37 : change_pressure (regno, true);
2076 : }
2077 :
2078 : /* Mark REGNO death. */
2079 : static void
2080 42 : mark_regno_death (int regno)
2081 : {
2082 42 : if (! bitmap_clear_bit (&curr_regs_live, regno))
2083 : return;
2084 32 : change_pressure (regno, false);
2085 : }
2086 :
2087 : /* Mark setting register REG. */
2088 : static void
2089 53 : mark_reg_store (rtx reg, const_rtx setter ATTRIBUTE_UNUSED,
2090 : void *data ATTRIBUTE_UNUSED)
2091 : {
2092 53 : if (GET_CODE (reg) == SUBREG)
2093 0 : reg = SUBREG_REG (reg);
2094 :
2095 53 : if (! REG_P (reg))
2096 : return;
2097 :
2098 48 : regs_set[n_regs_set++] = reg;
2099 :
2100 48 : unsigned int end_regno = END_REGNO (reg);
2101 96 : for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
2102 48 : mark_regno_live (regno);
2103 : }
2104 :
2105 : /* Mark clobbering register REG. */
2106 : static void
2107 43 : mark_reg_clobber (rtx reg, const_rtx setter, void *data)
2108 : {
2109 43 : if (GET_CODE (setter) == CLOBBER)
2110 10 : mark_reg_store (reg, setter, data);
2111 43 : }
2112 :
2113 : /* Mark register REG death. */
2114 : static void
2115 42 : mark_reg_death (rtx reg)
2116 : {
2117 42 : unsigned int end_regno = END_REGNO (reg);
2118 84 : for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
2119 42 : mark_regno_death (regno);
2120 42 : }
2121 :
2122 : /* Mark occurrence of registers in X for the current loop. */
2123 : static void
2124 197 : mark_ref_regs (rtx x)
2125 : {
2126 197 : RTX_CODE code;
2127 197 : int i;
2128 197 : const char *fmt;
2129 :
2130 197 : if (!x)
2131 : return;
2132 :
2133 197 : code = GET_CODE (x);
2134 197 : if (code == REG)
2135 : {
2136 82 : class loop *loop;
2137 :
2138 82 : for (loop = curr_loop;
2139 164 : loop != current_loops->tree_root;
2140 82 : loop = loop_outer (loop))
2141 82 : bitmap_set_bit (&LOOP_DATA (loop)->regs_ref, REGNO (x));
2142 : return;
2143 : }
2144 :
2145 115 : fmt = GET_RTX_FORMAT (code);
2146 305 : for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2147 190 : if (fmt[i] == 'e')
2148 143 : mark_ref_regs (XEXP (x, i));
2149 47 : else if (fmt[i] == 'E')
2150 : {
2151 : int j;
2152 :
2153 30 : for (j = 0; j < XVECLEN (x, i); j++)
2154 20 : mark_ref_regs (XVECEXP (x, i, j));
2155 : }
2156 : }
2157 :
2158 : /* Calculate register pressure in the loops. */
2159 : static void
2160 1 : calculate_loop_reg_pressure (void)
2161 : {
2162 1 : int i;
2163 1 : unsigned int j;
2164 1 : bitmap_iterator bi;
2165 1 : basic_block bb;
2166 1 : rtx_insn *insn;
2167 1 : rtx link;
2168 1 : class loop *parent;
2169 :
2170 4 : for (auto loop : loops_list (cfun, 0))
2171 1 : if (loop->aux == NULL)
2172 : {
2173 1 : loop->aux = xcalloc (1, sizeof (class loop_data));
2174 1 : bitmap_initialize (&LOOP_DATA (loop)->regs_ref, ®_obstack);
2175 1 : bitmap_initialize (&LOOP_DATA (loop)->regs_live, ®_obstack);
2176 1 : }
2177 1 : ira_setup_eliminable_regset ();
2178 1 : bitmap_initialize (&curr_regs_live, ®_obstack);
2179 9 : FOR_EACH_BB_FN (bb, cfun)
2180 : {
2181 8 : curr_loop = bb->loop_father;
2182 8 : if (curr_loop == current_loops->tree_root)
2183 4 : continue;
2184 :
2185 4 : for (class loop *loop = curr_loop;
2186 8 : loop != current_loops->tree_root;
2187 4 : loop = loop_outer (loop))
2188 8 : bitmap_ior_into (&LOOP_DATA (loop)->regs_live, DF_LR_IN (bb));
2189 :
2190 4 : bitmap_copy (&curr_regs_live, DF_LR_IN (bb));
2191 24 : for (i = 0; i < ira_pressure_classes_num; i++)
2192 16 : curr_reg_pressure[ira_pressure_classes[i]] = 0;
2193 69 : EXECUTE_IF_SET_IN_BITMAP (&curr_regs_live, 0, j, bi)
2194 65 : change_pressure (j, true);
2195 :
2196 45 : FOR_BB_INSNS (bb, insn)
2197 : {
2198 41 : if (! NONDEBUG_INSN_P (insn))
2199 7 : continue;
2200 :
2201 34 : mark_ref_regs (PATTERN (insn));
2202 34 : n_regs_set = 0;
2203 34 : note_stores (insn, mark_reg_clobber, NULL);
2204 :
2205 : /* Mark any registers dead after INSN as dead now. */
2206 :
2207 78 : for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
2208 44 : if (REG_NOTE_KIND (link) == REG_DEAD)
2209 22 : mark_reg_death (XEXP (link, 0));
2210 :
2211 : /* Mark any registers set in INSN as live,
2212 : and mark them as conflicting with all other live regs.
2213 : Clobbers are processed again, so they conflict with
2214 : the registers that are set. */
2215 :
2216 34 : note_stores (insn, mark_reg_store, NULL);
2217 :
2218 34 : if (AUTO_INC_DEC)
2219 : for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
2220 : if (REG_NOTE_KIND (link) == REG_INC)
2221 : mark_reg_store (XEXP (link, 0), NULL_RTX, NULL);
2222 :
2223 116 : while (n_regs_set-- > 0)
2224 : {
2225 96 : rtx note = find_regno_note (insn, REG_UNUSED,
2226 48 : REGNO (regs_set[n_regs_set]));
2227 48 : if (! note)
2228 28 : continue;
2229 :
2230 20 : mark_reg_death (XEXP (note, 0));
2231 : }
2232 : }
2233 : }
2234 1 : bitmap_release (&curr_regs_live);
2235 1 : if (flag_ira_region == IRA_REGION_MIXED
2236 1 : || flag_ira_region == IRA_REGION_ALL)
2237 4 : for (auto loop : loops_list (cfun, 0))
2238 : {
2239 41 : EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi)
2240 40 : if (! bitmap_bit_p (&LOOP_DATA (loop)->regs_ref, j))
2241 : {
2242 3 : enum reg_class pressure_class;
2243 3 : int nregs;
2244 :
2245 3 : pressure_class = get_regno_pressure_class (j, &nregs);
2246 3 : LOOP_DATA (loop)->max_reg_pressure[pressure_class] -= nregs;
2247 : }
2248 1 : }
2249 1 : if (dump_file == NULL)
2250 0 : return;
2251 4 : for (auto loop : loops_list (cfun, 0))
2252 : {
2253 1 : parent = loop_outer (loop);
2254 2 : fprintf (dump_file, "\n Loop %d (parent %d, header bb%d, depth %d)\n",
2255 : loop->num, (parent == NULL ? -1 : parent->num),
2256 1 : loop->header->index, loop_depth (loop));
2257 1 : fprintf (dump_file, "\n ref. regnos:");
2258 38 : EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_ref, 0, j, bi)
2259 37 : fprintf (dump_file, " %d", j);
2260 1 : fprintf (dump_file, "\n live regnos:");
2261 41 : EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi)
2262 40 : fprintf (dump_file, " %d", j);
2263 1 : fprintf (dump_file, "\n Pressure:");
2264 5 : for (i = 0; (int) i < ira_pressure_classes_num; i++)
2265 : {
2266 4 : enum reg_class pressure_class;
2267 :
2268 4 : pressure_class = ira_pressure_classes[i];
2269 4 : if (LOOP_DATA (loop)->max_reg_pressure[pressure_class] == 0)
2270 2 : continue;
2271 2 : fprintf (dump_file, " %s=%d", reg_class_names[pressure_class],
2272 : LOOP_DATA (loop)->max_reg_pressure[pressure_class]);
2273 : }
2274 1 : fprintf (dump_file, "\n");
2275 1 : }
2276 : }
2277 :
2278 :
2279 :
2280 : /* Move the invariants out of the loops. */
2281 :
2282 : void
2283 245736 : move_loop_invariants (void)
2284 : {
2285 245736 : if (optimize == 1)
2286 16477 : df_live_add_problem ();
2287 : /* ??? This is a hack. We should only need to call df_live_set_all_dirty
2288 : for optimize == 1, but can_move_invariant_reg relies on DF_INSN_LUID
2289 : being up-to-date. That isn't always true (even after df_analyze)
2290 : because df_process_deferred_rescans doesn't necessarily cause
2291 : blocks to be rescanned. */
2292 245736 : df_live_set_all_dirty ();
2293 245736 : if (flag_ira_loop_pressure)
2294 : {
2295 1 : df_analyze ();
2296 1 : regstat_init_n_sets_and_refs ();
2297 1 : ira_set_pseudo_classes (true, dump_file);
2298 1 : calculate_loop_reg_pressure ();
2299 1 : regstat_free_n_sets_and_refs ();
2300 : }
2301 245736 : df_set_flags (DF_EQ_NOTES + DF_DEFER_INSN_RESCAN);
2302 : /* Process the loops, innermost first. */
2303 1395936 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
2304 : {
2305 658728 : curr_loop = loop;
2306 : /* move_single_loop_invariants for very large loops is time consuming
2307 : and might need a lot of memory. For -O1 only do loop invariant
2308 : motion for very small loops. */
2309 658728 : unsigned max_bbs = param_loop_invariant_max_bbs_in_loop;
2310 658728 : if (optimize < 2)
2311 43676 : max_bbs /= 10;
2312 658728 : if (loop->num_nodes <= max_bbs)
2313 658728 : move_single_loop_invariants (loop);
2314 245736 : }
2315 :
2316 1395936 : for (auto loop : loops_list (cfun, 0))
2317 658728 : free_loop_data (loop);
2318 :
2319 245736 : if (flag_ira_loop_pressure)
2320 : /* There is no sense to keep this info because it was most
2321 : probably outdated by subsequent passes. */
2322 1 : free_reg_info ();
2323 245736 : free (invariant_table);
2324 245736 : invariant_table = NULL;
2325 245736 : invariant_table_size = 0;
2326 :
2327 245736 : if (optimize == 1)
2328 16477 : df_remove_problem (df_live);
2329 :
2330 245736 : checking_verify_flow_info ();
2331 245736 : }
|