Line data Source code
1 : /* Partial redundancy elimination / Hoisting for RTL.
2 : Copyright (C) 1997-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* TODO
21 : - reordering of memory allocation and freeing to be more space efficient
22 : - calc rough register pressure information and use the info to drive all
23 : kinds of code motion (including code hoisting) in a unified way.
24 : */
25 :
26 : /* References searched while implementing this.
27 :
28 : Compilers Principles, Techniques and Tools
29 : Aho, Sethi, Ullman
30 : Addison-Wesley, 1988
31 :
32 : Global Optimization by Suppression of Partial Redundancies
33 : E. Morel, C. Renvoise
34 : communications of the acm, Vol. 22, Num. 2, Feb. 1979
35 :
36 : A Portable Machine-Independent Global Optimizer - Design and Measurements
37 : Frederick Chow
38 : Stanford Ph.D. thesis, Dec. 1983
39 :
40 : A Fast Algorithm for Code Movement Optimization
41 : D.M. Dhamdhere
42 : SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
43 :
44 : A Solution to a Problem with Morel and Renvoise's
45 : Global Optimization by Suppression of Partial Redundancies
46 : K-H Drechsler, M.P. Stadel
47 : ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
48 :
49 : Practical Adaptation of the Global Optimization
50 : Algorithm of Morel and Renvoise
51 : D.M. Dhamdhere
52 : ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
53 :
54 : Efficiently Computing Static Single Assignment Form and the Control
55 : Dependence Graph
56 : R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
57 : ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
58 :
59 : Lazy Code Motion
60 : J. Knoop, O. Ruthing, B. Steffen
61 : ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
62 :
63 : What's In a Region? Or Computing Control Dependence Regions in Near-Linear
64 : Time for Reducible Flow Control
65 : Thomas Ball
66 : ACM Letters on Programming Languages and Systems,
67 : Vol. 2, Num. 1-4, Mar-Dec 1993
68 :
69 : An Efficient Representation for Sparse Sets
70 : Preston Briggs, Linda Torczon
71 : ACM Letters on Programming Languages and Systems,
72 : Vol. 2, Num. 1-4, Mar-Dec 1993
73 :
74 : A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
75 : K-H Drechsler, M.P. Stadel
76 : ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
77 :
78 : Partial Dead Code Elimination
79 : J. Knoop, O. Ruthing, B. Steffen
80 : ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
81 :
82 : Effective Partial Redundancy Elimination
83 : P. Briggs, K.D. Cooper
84 : ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
85 :
86 : The Program Structure Tree: Computing Control Regions in Linear Time
87 : R. Johnson, D. Pearson, K. Pingali
88 : ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
89 :
90 : Optimal Code Motion: Theory and Practice
91 : J. Knoop, O. Ruthing, B. Steffen
92 : ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
93 :
94 : The power of assignment motion
95 : J. Knoop, O. Ruthing, B. Steffen
96 : ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
97 :
98 : Global code motion / global value numbering
99 : C. Click
100 : ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
101 :
102 : Value Driven Redundancy Elimination
103 : L.T. Simpson
104 : Rice University Ph.D. thesis, Apr. 1996
105 :
106 : Value Numbering
107 : L.T. Simpson
108 : Massively Scalar Compiler Project, Rice University, Sep. 1996
109 :
110 : High Performance Compilers for Parallel Computing
111 : Michael Wolfe
112 : Addison-Wesley, 1996
113 :
114 : Advanced Compiler Design and Implementation
115 : Steven Muchnick
116 : Morgan Kaufmann, 1997
117 :
118 : Building an Optimizing Compiler
119 : Robert Morgan
120 : Digital Press, 1998
121 :
122 : People wishing to speed up the code here should read:
123 : Elimination Algorithms for Data Flow Analysis
124 : B.G. Ryder, M.C. Paull
125 : ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
126 :
127 : How to Analyze Large Programs Efficiently and Informatively
128 : D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
129 : ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
130 :
131 : People wishing to do something different can find various possibilities
132 : in the above papers and elsewhere.
133 : */
134 :
135 : #include "config.h"
136 : #include "system.h"
137 : #include "coretypes.h"
138 : #include "backend.h"
139 : #include "target.h"
140 : #include "rtl.h"
141 : #include "tree.h"
142 : #include "predict.h"
143 : #include "df.h"
144 : #include "memmodel.h"
145 : #include "tm_p.h"
146 : #include "insn-config.h"
147 : #include "print-rtl.h"
148 : #include "regs.h"
149 : #include "ira.h"
150 : #include "recog.h"
151 : #include "diagnostic-core.h"
152 : #include "cfgrtl.h"
153 : #include "cfganal.h"
154 : #include "lcm.h"
155 : #include "cfgcleanup.h"
156 : #include "expr.h"
157 : #include "intl.h"
158 : #include "tree-pass.h"
159 : #include "dbgcnt.h"
160 : #include "gcse.h"
161 : #include "gcse-common.h"
162 : #include "function-abi.h"
163 :
164 : /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
165 : are a superset of those done by classic GCSE.
166 :
167 : Two passes of copy/constant propagation are done around PRE or hoisting
168 : because the first one enables more GCSE and the second one helps to clean
169 : up the copies that PRE and HOIST create. This is needed more for PRE than
170 : for HOIST because code hoisting will try to use an existing register
171 : containing the common subexpression rather than create a new one. This is
172 : harder to do for PRE because of the code motion (which HOIST doesn't do).
173 :
174 : Expressions we are interested in GCSE-ing are of the form
175 : (set (pseudo-reg) (expression)).
176 : Function want_to_gcse_p says what these are.
177 :
178 : In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
179 : This allows PRE to hoist expressions that are expressed in multiple insns,
180 : such as complex address calculations (e.g. for PIC code, or loads with a
181 : high part and a low part).
182 :
183 : PRE handles moving invariant expressions out of loops (by treating them as
184 : partially redundant).
185 :
186 : **********************
187 :
188 : We used to support multiple passes but there are diminishing returns in
189 : doing so. The first pass usually makes 90% of the changes that are doable.
190 : A second pass can make a few more changes made possible by the first pass.
191 : Experiments show any further passes don't make enough changes to justify
192 : the expense.
193 :
194 : A study of spec92 using an unlimited number of passes:
195 : [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
196 : [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
197 : [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
198 :
199 : It was found doing copy propagation between each pass enables further
200 : substitutions.
201 :
202 : This study was done before expressions in REG_EQUAL notes were added as
203 : candidate expressions for optimization, and before the GIMPLE optimizers
204 : were added. Probably, multiple passes is even less efficient now than
205 : at the time when the study was conducted.
206 :
207 : PRE is quite expensive in complicated functions because the DFA can take
208 : a while to converge. Hence we only perform one pass.
209 :
210 : **********************
211 :
212 : The steps for PRE are:
213 :
214 : 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
215 :
216 : 2) Perform the data flow analysis for PRE.
217 :
218 : 3) Delete the redundant instructions
219 :
220 : 4) Insert the required copies [if any] that make the partially
221 : redundant instructions fully redundant.
222 :
223 : 5) For other reaching expressions, insert an instruction to copy the value
224 : to a newly created pseudo that will reach the redundant instruction.
225 :
226 : The deletion is done first so that when we do insertions we
227 : know which pseudo reg to use.
228 :
229 : Various papers have argued that PRE DFA is expensive (O(n^2)) and others
230 : argue it is not. The number of iterations for the algorithm to converge
231 : is typically 2-4 so I don't view it as that expensive (relatively speaking).
232 :
233 : PRE GCSE depends heavily on the second CPROP pass to clean up the copies
234 : we create. To make an expression reach the place where it's redundant,
235 : the result of the expression is copied to a new register, and the redundant
236 : expression is deleted by replacing it with this new register. Classic GCSE
237 : doesn't have this problem as much as it computes the reaching defs of
238 : each register in each block and thus can try to use an existing
239 : register. */
240 :
241 : /* GCSE global vars. */
242 :
243 : struct target_gcse default_target_gcse;
244 : #if SWITCHABLE_TARGET
245 : struct target_gcse *this_target_gcse = &default_target_gcse;
246 : #endif
247 :
248 : /* Set to non-zero if CSE should run after all GCSE optimizations are done. */
249 : int flag_rerun_cse_after_global_opts;
250 :
251 : /* An obstack for our working variables. */
252 : static struct obstack gcse_obstack;
253 :
254 : /* Hash table of expressions. */
255 :
256 : struct gcse_expr
257 : {
258 : /* The expression. */
259 : rtx expr;
260 : /* Index in the available expression bitmaps. */
261 : int bitmap_index;
262 : /* Next entry with the same hash. */
263 : struct gcse_expr *next_same_hash;
264 : /* List of anticipatable occurrences in basic blocks in the function.
265 : An "anticipatable occurrence" is one that is the first occurrence in the
266 : basic block, the operands are not modified in the basic block prior
267 : to the occurrence and the output is not used between the start of
268 : the block and the occurrence. */
269 : struct gcse_occr *antic_occr;
270 : /* List of available occurrence in basic blocks in the function.
271 : An "available occurrence" is one that is the last occurrence in the
272 : basic block and the operands are not modified by following statements in
273 : the basic block [including this insn]. */
274 : struct gcse_occr *avail_occr;
275 : /* Non-null if the computation is PRE redundant.
276 : The value is the newly created pseudo-reg to record a copy of the
277 : expression in all the places that reach the redundant copy. */
278 : rtx reaching_reg;
279 : /* Maximum distance in instructions this expression can travel.
280 : We avoid moving simple expressions for more than a few instructions
281 : to keep register pressure under control.
282 : A value of "0" removes restrictions on how far the expression can
283 : travel. */
284 : HOST_WIDE_INT max_distance;
285 : };
286 :
287 : /* Occurrence of an expression.
288 : There is one per basic block. If a pattern appears more than once the
289 : last appearance is used [or first for anticipatable expressions]. */
290 :
291 : struct gcse_occr
292 : {
293 : /* Next occurrence of this expression. */
294 : struct gcse_occr *next;
295 : /* The insn that computes the expression. */
296 : rtx_insn *insn;
297 : /* Nonzero if this [anticipatable] occurrence has been deleted. */
298 : char deleted_p;
299 : /* Nonzero if this [available] occurrence has been copied to
300 : reaching_reg. */
301 : /* ??? This is mutually exclusive with deleted_p, so they could share
302 : the same byte. */
303 : char copied_p;
304 : };
305 :
306 : typedef struct gcse_occr *occr_t;
307 :
308 : /* Expression hash tables.
309 : Each hash table is an array of buckets.
310 : ??? It is known that if it were an array of entries, structure elements
311 : `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
312 : not clear whether in the final analysis a sufficient amount of memory would
313 : be saved as the size of the available expression bitmaps would be larger
314 : [one could build a mapping table without holes afterwards though].
315 : Someday I'll perform the computation and figure it out. */
316 :
317 : struct gcse_hash_table_d
318 : {
319 : /* The table itself.
320 : This is an array of `expr_hash_table_size' elements. */
321 : struct gcse_expr **table;
322 :
323 : /* Size of the hash table, in elements. */
324 : unsigned int size;
325 :
326 : /* Number of hash table elements. */
327 : unsigned int n_elems;
328 : };
329 :
330 : /* Expression hash table. */
331 : static struct gcse_hash_table_d expr_hash_table;
332 :
333 : /* This is a list of expressions which are MEMs and will be used by load
334 : or store motion.
335 : Load motion tracks MEMs which aren't killed by anything except itself,
336 : i.e. loads and stores to a single location.
337 : We can then allow movement of these MEM refs with a little special
338 : allowance. (all stores copy the same value to the reaching reg used
339 : for the loads). This means all values used to store into memory must have
340 : no side effects so we can re-issue the setter value. */
341 :
342 : struct ls_expr
343 : {
344 : struct gcse_expr * expr; /* Gcse expression reference for LM. */
345 : rtx pattern; /* Pattern of this mem. */
346 : rtx pattern_regs; /* List of registers mentioned by the mem. */
347 : vec<rtx_insn *> stores; /* INSN list of stores seen. */
348 : struct ls_expr * next; /* Next in the list. */
349 : int invalid; /* Invalid for some reason. */
350 : int index; /* If it maps to a bitmap index. */
351 : unsigned int hash_index; /* Index when in a hash table. */
352 : rtx reaching_reg; /* Register to use when re-writing. */
353 : };
354 :
355 : /* Head of the list of load/store memory refs. */
356 : static struct ls_expr * pre_ldst_mems = NULL;
357 :
358 : struct pre_ldst_expr_hasher : nofree_ptr_hash <ls_expr>
359 : {
360 : typedef value_type compare_type;
361 : static inline hashval_t hash (const ls_expr *);
362 : static inline bool equal (const ls_expr *, const ls_expr *);
363 : };
364 :
365 : /* Hashtable helpers. */
366 : inline hashval_t
367 100753357 : pre_ldst_expr_hasher::hash (const ls_expr *x)
368 : {
369 100753357 : int do_not_record_p = 0;
370 100753357 : return
371 100753357 : hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
372 : }
373 :
374 : static bool expr_equiv_p (const_rtx, const_rtx);
375 :
376 : inline bool
377 119847229 : pre_ldst_expr_hasher::equal (const ls_expr *ptr1,
378 : const ls_expr *ptr2)
379 : {
380 119847229 : return expr_equiv_p (ptr1->pattern, ptr2->pattern);
381 : }
382 :
383 : /* Hashtable for the load/store memory refs. */
384 : static hash_table<pre_ldst_expr_hasher> *pre_ldst_table;
385 :
386 : /* Bitmap containing one bit for each register in the program.
387 : Used when performing GCSE to track which registers have been set since
388 : the start of the basic block. */
389 : static regset reg_set_bitmap;
390 :
391 : /* Array, indexed by basic block number for a list of insns which modify
392 : memory within that block. */
393 : static vec<rtx_insn *> *modify_mem_list;
394 : static bitmap modify_mem_list_set;
395 :
396 : /* This array parallels modify_mem_list, except that it stores MEMs
397 : being set and their canonicalized memory addresses. */
398 : static vec<modify_pair> *canon_modify_mem_list;
399 :
400 : /* Bitmap indexed by block numbers to record which blocks contain
401 : function calls. */
402 : static bitmap blocks_with_calls;
403 :
404 : /* Various variables for statistics gathering. */
405 :
406 : /* Memory used in a pass.
407 : This isn't intended to be absolutely precise. Its intent is only
408 : to keep an eye on memory usage. */
409 : static int bytes_used;
410 :
411 : /* GCSE substitutions made. */
412 : static int gcse_subst_count;
413 : /* Number of copy instructions created. */
414 : static int gcse_create_count;
415 :
416 : /* Doing code hoisting. */
417 : static bool doing_code_hoisting_p = false;
418 :
419 : /* Doing hardreg_pre. */
420 : static bool doing_hardreg_pre_p = false;
421 :
422 : inline bool
423 22504787 : do_load_motion ()
424 : {
425 22504745 : return flag_gcse_lm && !doing_hardreg_pre_p;
426 : }
427 :
428 : static unsigned int current_hardreg_regno;
429 :
430 : /* For available exprs */
431 : static sbitmap *ae_kill;
432 :
433 : /* Data stored for each basic block. */
434 : struct bb_data
435 : {
436 : /* Maximal register pressure inside basic block for given register class
437 : (defined only for the pressure classes). */
438 : int max_reg_pressure[N_REG_CLASSES];
439 : /* Recorded register pressure of basic block before trying to hoist
440 : an expression. Will be used to restore the register pressure
441 : if the expression should not be hoisted. */
442 : int old_pressure;
443 : /* Recorded register live_in info of basic block during code hoisting
444 : process. BACKUP is used to record live_in info before trying to
445 : hoist an expression, and will be used to restore LIVE_IN if the
446 : expression should not be hoisted. */
447 : bitmap live_in, backup;
448 : };
449 :
450 : #define BB_DATA(bb) ((struct bb_data *) (bb)->aux)
451 :
452 : static basic_block curr_bb;
453 :
454 : /* Current register pressure for each pressure class. */
455 : static int curr_reg_pressure[N_REG_CLASSES];
456 :
457 :
458 : static void compute_can_copy (void);
459 : static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
460 : static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
461 : static void *gcse_alloc (unsigned long);
462 : static void alloc_gcse_mem (void);
463 : static void free_gcse_mem (void);
464 : static void hash_scan_insn (rtx_insn *, struct gcse_hash_table_d *);
465 : static void hash_scan_set (rtx, rtx_insn *, struct gcse_hash_table_d *);
466 : static void hash_scan_clobber (rtx, rtx_insn *, struct gcse_hash_table_d *);
467 : static void hash_scan_call (rtx, rtx_insn *, struct gcse_hash_table_d *);
468 : static bool oprs_unchanged_p (const_rtx, const rtx_insn *, bool);
469 : static bool oprs_anticipatable_p (const_rtx, const rtx_insn *);
470 : static bool oprs_available_p (const_rtx, const rtx_insn *);
471 : static void insert_expr_in_table (rtx, machine_mode, rtx_insn *, bool, bool,
472 : HOST_WIDE_INT, struct gcse_hash_table_d *);
473 : static unsigned int hash_expr (const_rtx, machine_mode, int *, int);
474 : static void record_last_reg_set_info (rtx_insn *, int);
475 : static void record_last_mem_set_info (rtx_insn *);
476 : static void record_last_set_info (rtx, const_rtx, void *);
477 : static void compute_hash_table (struct gcse_hash_table_d *);
478 : static void alloc_hash_table (struct gcse_hash_table_d *);
479 : static void free_hash_table (struct gcse_hash_table_d *);
480 : static void compute_hash_table_work (struct gcse_hash_table_d *);
481 : static void dump_hash_table (FILE *, const char *, struct gcse_hash_table_d *);
482 : static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
483 : struct gcse_hash_table_d *);
484 : static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
485 : static bool load_killed_in_block_p (const_basic_block, int, const_rtx, bool);
486 : static void alloc_pre_mem (int, int);
487 : static void free_pre_mem (void);
488 : static struct edge_list *compute_pre_data (void);
489 : static bool pre_expr_reaches_here_p (basic_block, struct gcse_expr *,
490 : basic_block);
491 : static void insert_insn_end_basic_block (struct gcse_expr *, basic_block);
492 : static void pre_insert_copy_insn (struct gcse_expr *, rtx_insn *);
493 : static void pre_insert_copies (void);
494 : static bool pre_delete (void);
495 : static bool pre_gcse (struct edge_list *);
496 : static bool one_pre_gcse_pass (void);
497 : static void add_label_notes (rtx, rtx_insn *);
498 : static void alloc_code_hoist_mem (int, int);
499 : static void free_code_hoist_mem (void);
500 : static void compute_code_hoist_vbeinout (void);
501 : static void compute_code_hoist_data (void);
502 : static bool should_hoist_expr_to_dom (basic_block, struct gcse_expr *,
503 : basic_block,
504 : sbitmap, HOST_WIDE_INT, int *,
505 : enum reg_class,
506 : int *, bitmap, rtx_insn *);
507 : static bool hoist_code (void);
508 : static enum reg_class get_regno_pressure_class (int regno, int *nregs);
509 : static enum reg_class get_pressure_class_and_nregs (rtx_insn *insn, int *nregs);
510 : static bool one_code_hoisting_pass (void);
511 : static rtx_insn *process_insert_insn (struct gcse_expr *);
512 : static bool pre_edge_insert (struct edge_list *, struct gcse_expr **);
513 : static bool pre_expr_reaches_here_p_work (basic_block, struct gcse_expr *,
514 : basic_block, char *);
515 : static struct ls_expr * ldst_entry (rtx);
516 : static void free_ldst_entry (struct ls_expr *);
517 : static void free_ld_motion_mems (void);
518 : static void print_ldst_list (FILE *);
519 : static struct ls_expr * find_rtx_in_ldst (rtx);
520 : static bool simple_mem (const_rtx);
521 : static void invalidate_any_buried_refs (rtx);
522 : static void compute_ld_motion_mems (void);
523 : static void trim_ld_motion_mems (void);
524 : static void update_ld_motion_stores (struct gcse_expr *);
525 : static void clear_modify_mem_tables (void);
526 : static void free_modify_mem_tables (void);
527 :
528 : #define GNEW(T) ((T *) gmalloc (sizeof (T)))
529 : #define GCNEW(T) ((T *) gcalloc (1, sizeof (T)))
530 :
531 : #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N)))
532 : #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T)))
533 :
534 : #define GNEWVAR(T, S) ((T *) gmalloc ((S)))
535 : #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S)))
536 :
537 : #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
538 : #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
539 :
540 : /* Misc. utilities. */
541 :
542 : #define can_copy \
543 : (this_target_gcse->x_can_copy)
544 : #define can_copy_init_p \
545 : (this_target_gcse->x_can_copy_init_p)
546 :
547 : /* Compute which modes support reg/reg copy operations. */
548 :
549 : static void
550 86091 : compute_can_copy (void)
551 : {
552 86091 : int i;
553 : #ifndef AVOID_CCMODE_COPIES
554 : rtx reg;
555 : rtx_insn *insn;
556 : #endif
557 86091 : memset (can_copy, 0, NUM_MACHINE_MODES);
558 :
559 86091 : start_sequence ();
560 10847466 : for (i = 0; i < NUM_MACHINE_MODES; i++)
561 10675284 : if (GET_MODE_CLASS (i) == MODE_CC)
562 : {
563 : #ifdef AVOID_CCMODE_COPIES
564 1033092 : can_copy[i] = 0;
565 : #else
566 : reg = gen_rtx_REG ((machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
567 : insn = emit_insn (gen_rtx_SET (reg, reg));
568 : if (recog (PATTERN (insn), insn, NULL) >= 0)
569 : can_copy[i] = 1;
570 : #endif
571 : }
572 : else
573 9642192 : can_copy[i] = 1;
574 :
575 86091 : end_sequence ();
576 86091 : }
577 :
578 : /* Returns whether the mode supports reg/reg copy operations. */
579 :
580 : bool
581 100758075 : can_copy_p (machine_mode mode)
582 : {
583 100758075 : if (! can_copy_init_p)
584 : {
585 86091 : compute_can_copy ();
586 86091 : can_copy_init_p = true;
587 : }
588 :
589 100758075 : return can_copy[mode] != 0;
590 : }
591 :
592 : /* Cover function to xmalloc to record bytes allocated. */
593 :
594 : static void *
595 1015224 : gmalloc (size_t size)
596 : {
597 1015224 : bytes_used += size;
598 0 : return xmalloc (size);
599 : }
600 :
601 : /* Cover function to xcalloc to record bytes allocated. */
602 :
603 : static void *
604 1849898 : gcalloc (size_t nelem, size_t elsize)
605 : {
606 1849898 : bytes_used += nelem * elsize;
607 1849898 : return xcalloc (nelem, elsize);
608 : }
609 :
610 : /* Cover function to obstack_alloc. */
611 :
612 : static void *
613 26124184 : gcse_alloc (unsigned long size)
614 : {
615 26124184 : bytes_used += size;
616 26124184 : return obstack_alloc (&gcse_obstack, size);
617 : }
618 :
619 : /* Allocate memory for the reg/memory set tracking tables.
620 : This is called at the start of each pass. */
621 :
622 : static void
623 507612 : alloc_gcse_mem (void)
624 : {
625 : /* Allocate vars to track sets of regs. */
626 507612 : reg_set_bitmap = ALLOC_REG_SET (NULL);
627 :
628 : /* Allocate array to keep a list of insns which modify memory in each
629 : basic block. The two typedefs are needed to work around the
630 : pre-processor limitation with template types in macro arguments. */
631 507612 : typedef vec<rtx_insn *> vec_rtx_heap;
632 507612 : typedef vec<modify_pair> vec_modify_pair_heap;
633 507612 : modify_mem_list = GCNEWVEC (vec_rtx_heap, last_basic_block_for_fn (cfun));
634 507612 : canon_modify_mem_list = GCNEWVEC (vec_modify_pair_heap,
635 : last_basic_block_for_fn (cfun));
636 507612 : modify_mem_list_set = BITMAP_ALLOC (NULL);
637 507612 : blocks_with_calls = BITMAP_ALLOC (NULL);
638 507612 : }
639 :
640 : /* Free memory allocated by alloc_gcse_mem. */
641 :
642 : static void
643 507612 : free_gcse_mem (void)
644 : {
645 507612 : FREE_REG_SET (reg_set_bitmap);
646 :
647 507612 : free_modify_mem_tables ();
648 507612 : BITMAP_FREE (modify_mem_list_set);
649 507612 : BITMAP_FREE (blocks_with_calls);
650 507612 : }
651 :
652 : /* Compute the local properties of each recorded expression.
653 :
654 : Local properties are those that are defined by the block, irrespective of
655 : other blocks.
656 :
657 : An expression is transparent in a block if its operands are not modified
658 : in the block.
659 :
660 : An expression is computed (locally available) in a block if it is computed
661 : at least once and expression would contain the same value if the
662 : computation was moved to the end of the block.
663 :
664 : An expression is locally anticipatable in a block if it is computed at
665 : least once and expression would contain the same value if the computation
666 : was moved to the beginning of the block.
667 :
668 : We call this routine for pre and code hoisting. They all compute
669 : basically the same information and thus can easily share this code.
670 :
671 : TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
672 : properties. If NULL, then it is not necessary to compute or record that
673 : particular property.
674 :
675 : TABLE controls which hash table to look at. */
676 :
677 : static void
678 443231 : compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
679 : struct gcse_hash_table_d *table)
680 : {
681 443231 : unsigned int i;
682 :
683 : /* Initialize any bitmaps that were passed in. */
684 443231 : if (transp)
685 : {
686 443231 : bitmap_vector_ones (transp, last_basic_block_for_fn (cfun));
687 : }
688 :
689 443231 : if (comp)
690 443231 : bitmap_vector_clear (comp, last_basic_block_for_fn (cfun));
691 443231 : if (antloc)
692 443231 : bitmap_vector_clear (antloc, last_basic_block_for_fn (cfun));
693 :
694 21128794 : for (i = 0; i < table->size; i++)
695 : {
696 20685563 : struct gcse_expr *expr;
697 :
698 30667387 : for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
699 : {
700 9981824 : int indx = expr->bitmap_index;
701 9981824 : struct gcse_occr *occr;
702 :
703 : /* In most cases, the expression is transparent in the block if it is
704 : not killed. The exception to this is during hardreg PRE, in which
705 : uses of the hardreg prevent transparency but do not kill the
706 : expression.
707 :
708 : We start by assuming all expressions are transparent [none are
709 : killed], and then reset the bits for those that are. */
710 9981824 : if (transp)
711 : {
712 9981824 : compute_transp (expr->expr, indx, transp,
713 : blocks_with_calls,
714 : modify_mem_list_set,
715 : canon_modify_mem_list);
716 :
717 9981824 : if (doing_hardreg_pre_p)
718 : {
719 : /* We also need to check whether the destination hardreg is
720 : set or call-clobbered in each BB. We'll check for hardreg
721 : uses later. */
722 0 : df_ref def;
723 0 : for (def = DF_REG_DEF_CHAIN (current_hardreg_regno);
724 0 : def;
725 0 : def = DF_REF_NEXT_REG (def))
726 0 : bitmap_clear_bit (transp[DF_REF_BB (def)->index], indx);
727 : }
728 : }
729 :
730 : /* The occurrences recorded in antic_occr are exactly those that
731 : we want to set to nonzero in ANTLOC. */
732 9981824 : if (antloc)
733 17104196 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
734 : {
735 7122372 : bitmap_set_bit (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
736 :
737 : /* While we're scanning the table, this is a good place to
738 : initialize this. */
739 7122372 : occr->deleted_p = 0;
740 : }
741 :
742 : /* The occurrences recorded in avail_occr are exactly those that
743 : we want to set to nonzero in COMP. */
744 9981824 : if (comp)
745 19001812 : for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
746 : {
747 9019988 : bitmap_set_bit (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
748 :
749 : /* While we're scanning the table, this is a good place to
750 : initialize this. */
751 9019988 : occr->copied_p = 0;
752 : }
753 :
754 : /* While we're scanning the table, this is a good place to
755 : initialize this. */
756 9981824 : expr->reaching_reg = 0;
757 : }
758 : }
759 443231 : }
760 :
761 : /* A hardreg set is not transparent in a block if there are any uses of that
762 : hardreg. This filters the results of compute_local_properties, after the
763 : result of that function has been used to define the kills bitmap.
764 :
765 : TRANSP is the destination sbitmap to be updated.
766 :
767 : TABLE controls which hash table to look at. */
768 :
769 : static void
770 0 : prune_hardreg_uses (sbitmap *transp, struct gcse_hash_table_d *table)
771 : {
772 0 : unsigned int i;
773 0 : gcc_assert (doing_hardreg_pre_p);
774 :
775 0 : for (i = 0; i < table->size; i++)
776 : {
777 0 : struct gcse_expr *expr;
778 :
779 0 : for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
780 : {
781 0 : int indx = expr->bitmap_index;
782 0 : df_ref def;
783 :
784 0 : for (def = DF_REG_USE_CHAIN (current_hardreg_regno);
785 0 : def;
786 0 : def = DF_REF_NEXT_REG (def))
787 0 : bitmap_clear_bit (transp[DF_REF_BB (def)->index], indx);
788 : }
789 : }
790 0 : }
791 :
792 : /* Hash table support. */
793 :
794 : struct reg_avail_info
795 : {
796 : basic_block last_bb;
797 : int first_set;
798 : int last_set;
799 : };
800 :
801 : static struct reg_avail_info *reg_avail_info;
802 : static basic_block current_bb;
803 :
804 : /* See whether X, the source of a set, is something we want to consider for
805 : GCSE. */
806 :
807 : static bool
808 21726844 : want_to_gcse_p (rtx x, machine_mode mode, HOST_WIDE_INT *max_distance_ptr)
809 : {
810 : #ifdef STACK_REGS
811 : /* On register stack architectures, don't GCSE constants from the
812 : constant pool, as the benefits are often swamped by the overhead
813 : of shuffling the register stack between basic blocks. */
814 21726844 : if (IS_STACK_MODE (GET_MODE (x)))
815 267603 : x = avoid_constant_pool_reference (x);
816 : #endif
817 :
818 : /* GCSE'ing constants:
819 :
820 : We do not specifically distinguish between constant and non-constant
821 : expressions in PRE and Hoist. We use set_src_cost below to limit
822 : the maximum distance simple expressions can travel.
823 :
824 : Nevertheless, constants are much easier to GCSE, and, hence,
825 : it is easy to overdo the optimizations. Usually, excessive PRE and
826 : Hoisting of constant leads to increased register pressure.
827 :
828 : RA can deal with this by rematerialing some of the constants.
829 : Therefore, it is important that the back-end generates sets of constants
830 : in a way that allows reload rematerialize them under high register
831 : pressure, i.e., a pseudo register with REG_EQUAL to constant
832 : is set only once. Failing to do so will result in IRA/reload
833 : spilling such constants under high register pressure instead of
834 : rematerializing them.
835 :
836 : For hardreg PRE, register pressure is not a concern, and we also want to
837 : apply GCSE to simple moves. */
838 :
839 21726844 : switch (GET_CODE (x))
840 : {
841 3236964 : case REG:
842 3236964 : case SUBREG:
843 3236964 : return doing_hardreg_pre_p;
844 :
845 : case CALL:
846 : return false;
847 :
848 2635098 : CASE_CONST_ANY:
849 2635098 : if (doing_hardreg_pre_p)
850 : return true;
851 2635098 : else if (!doing_code_hoisting_p)
852 : /* Do not PRE constants. */
853 : return false;
854 :
855 : /* FALLTHRU */
856 :
857 15989051 : default:
858 15989051 : if (doing_code_hoisting_p)
859 : /* PRE doesn't implement max_distance restriction. */
860 : {
861 659729 : int cost;
862 659729 : HOST_WIDE_INT max_distance;
863 :
864 659729 : gcc_assert (!optimize_function_for_speed_p (cfun)
865 : && optimize_function_for_size_p (cfun));
866 659729 : cost = set_src_cost (x, mode, 0);
867 :
868 659729 : if (cost < COSTS_N_INSNS (param_gcse_unrestricted_cost))
869 : {
870 618749 : max_distance
871 618749 : = ((HOST_WIDE_INT)param_gcse_cost_distance_ratio * cost) / 10;
872 618749 : if (max_distance == 0)
873 : return false;
874 :
875 554872 : gcc_assert (max_distance > 0);
876 : }
877 : else
878 : max_distance = 0;
879 :
880 595852 : if (max_distance_ptr)
881 536903 : *max_distance_ptr = max_distance;
882 : }
883 :
884 15925174 : return can_assign_to_reg_without_clobbers_p (x, mode);
885 : }
886 : }
887 :
888 : /* Used internally by can_assign_to_reg_without_clobbers_p. */
889 :
890 : static GTY(()) rtx_insn *test_insn;
891 :
892 : /* Return true if we can assign X to a pseudo register of mode MODE
893 : such that the resulting insn does not result in clobbering a hard
894 : register as a side-effect.
895 :
896 : Additionally, if the target requires it, check that the resulting insn
897 : can be copied. If it cannot, this means that X is special and probably
898 : has hidden side-effects we don't want to mess with.
899 :
900 : This function is typically used by code motion passes, to verify
901 : that it is safe to insert an insn without worrying about clobbering
902 : maybe live hard regs. */
903 :
904 : bool
905 20486371 : can_assign_to_reg_without_clobbers_p (rtx x, machine_mode mode)
906 : {
907 20486371 : int num_clobbers = 0;
908 20486371 : int icode;
909 20486371 : bool can_assign = false;
910 :
911 : /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
912 20486371 : if (general_operand (x, mode))
913 : return true;
914 9698639 : else if (GET_MODE (x) == VOIDmode)
915 : return false;
916 :
917 : /* Otherwise, check if we can make a valid insn from it. First initialize
918 : our test insn if we haven't already. */
919 9698639 : if (test_insn == 0)
920 : {
921 69170 : test_insn
922 69170 : = make_insn_raw (gen_rtx_SET (gen_rtx_REG (word_mode,
923 : FIRST_PSEUDO_REGISTER * 2),
924 : const0_rtx));
925 69170 : SET_NEXT_INSN (test_insn) = SET_PREV_INSN (test_insn) = 0;
926 69170 : INSN_LOCATION (test_insn) = UNKNOWN_LOCATION;
927 : }
928 :
929 : /* Now make an insn like the one we would make when GCSE'ing and see if
930 : valid. */
931 9698639 : PUT_MODE (SET_DEST (PATTERN (test_insn)), mode);
932 9698639 : SET_SRC (PATTERN (test_insn)) = x;
933 :
934 9698639 : icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
935 :
936 : /* If the test insn is valid and doesn't need clobbers, and the target also
937 : has no objections, we're good. */
938 9698639 : if (icode >= 0
939 8779658 : && (num_clobbers == 0 || !added_clobbers_hard_reg_p (icode))
940 16531451 : && ! (targetm.cannot_copy_insn_p
941 0 : && targetm.cannot_copy_insn_p (test_insn)))
942 : can_assign = true;
943 :
944 : /* Make sure test_insn doesn't have any pointers into GC space. */
945 9698639 : SET_SRC (PATTERN (test_insn)) = NULL_RTX;
946 :
947 9698639 : return can_assign;
948 : }
949 :
950 : /* Return true if the operands of expression X are unchanged from the
951 : start of INSN's basic block up to but not including INSN
952 : (if AVAIL_P == false), or from INSN to the end of INSN's basic block
953 : (if AVAIL_P == true). */
954 :
955 : static bool
956 72974124 : oprs_unchanged_p (const_rtx x, const rtx_insn *insn, bool avail_p)
957 : {
958 72974124 : int i, j;
959 72974124 : enum rtx_code code;
960 72974124 : const char *fmt;
961 :
962 72974124 : if (x == 0)
963 : return true;
964 :
965 72974124 : code = GET_CODE (x);
966 72974124 : switch (code)
967 : {
968 22715211 : case REG:
969 22715211 : {
970 22715211 : struct reg_avail_info *info = ®_avail_info[REGNO (x)];
971 :
972 22715211 : if (info->last_bb != current_bb)
973 : return true;
974 11051626 : if (avail_p)
975 5854358 : return info->last_set < DF_INSN_LUID (insn);
976 : else
977 5197268 : return info->first_set >= DF_INSN_LUID (insn);
978 : }
979 :
980 11897483 : case MEM:
981 11897483 : if (! do_load_motion ()
982 11897461 : || load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
983 : x, avail_p))
984 3632730 : return false;
985 : else
986 8264753 : return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
987 :
988 : case PRE_DEC:
989 : case PRE_INC:
990 : case POST_DEC:
991 : case POST_INC:
992 : case PRE_MODIFY:
993 : case POST_MODIFY:
994 : return false;
995 :
996 : case PC:
997 : case CONST:
998 : CASE_CONST_ANY:
999 : case SYMBOL_REF:
1000 : case LABEL_REF:
1001 : case ADDR_VEC:
1002 : case ADDR_DIFF_VEC:
1003 : return true;
1004 :
1005 21241684 : default:
1006 21241684 : break;
1007 : }
1008 :
1009 38962266 : for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1010 : {
1011 38507700 : if (fmt[i] == 'e')
1012 : {
1013 : /* If we are about to do the last recursive call needed at this
1014 : level, change it into iteration. This function is called enough
1015 : to be worth it. */
1016 37301166 : if (i == 0)
1017 19240250 : return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1018 :
1019 18060916 : else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1020 : return false;
1021 : }
1022 1206534 : else if (fmt[i] == 'E')
1023 2066123 : for (j = 0; j < XVECLEN (x, i); j++)
1024 1611616 : if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1025 : return false;
1026 : }
1027 :
1028 : return true;
1029 : }
1030 :
1031 : /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */
1032 :
1033 : struct mem_conflict_info
1034 : {
1035 : /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
1036 : see if a memory store conflicts with this memory load. */
1037 : const_rtx mem;
1038 :
1039 : /* True if mems_conflict_for_gcse_p finds a conflict between two memory
1040 : references. */
1041 : bool conflict;
1042 : };
1043 :
1044 : /* DEST is the output of an instruction. If it is a memory reference and
1045 : possibly conflicts with the load found in DATA, then communicate this
1046 : information back through DATA. */
1047 :
1048 : static void
1049 13053767 : mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
1050 : void *data)
1051 : {
1052 13053767 : struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
1053 :
1054 13053767 : while (GET_CODE (dest) == SUBREG
1055 13053767 : || GET_CODE (dest) == ZERO_EXTRACT
1056 26107534 : || GET_CODE (dest) == STRICT_LOW_PART)
1057 0 : dest = XEXP (dest, 0);
1058 :
1059 : /* If DEST is not a MEM, then it will not conflict with the load. Note
1060 : that function calls are assumed to clobber memory, but are handled
1061 : elsewhere. */
1062 13053767 : if (! MEM_P (dest))
1063 : return;
1064 :
1065 : /* If we are setting a MEM in our list of specially recognized MEMs,
1066 : don't mark as killed this time. */
1067 25504289 : if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
1068 : {
1069 107280 : if (!find_rtx_in_ldst (dest))
1070 37927 : mci->conflict = true;
1071 107280 : return;
1072 : }
1073 :
1074 12849174 : if (true_dependence (dest, GET_MODE (dest), mci->mem))
1075 1556080 : mci->conflict = true;
1076 : }
1077 :
1078 : /* Return true if the expression in X (a memory reference) is killed
1079 : in block BB before or after the insn with the LUID in UID_LIMIT.
1080 : AVAIL_P is true for kills after UID_LIMIT, and zero for kills
1081 : before UID_LIMIT.
1082 :
1083 : To check the entire block, set UID_LIMIT to max_uid + 1 and
1084 : AVAIL_P to false. */
1085 :
1086 : static bool
1087 11897461 : load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
1088 : bool avail_p)
1089 : {
1090 11897461 : vec<rtx_insn *> list = modify_mem_list[bb->index];
1091 11897461 : rtx_insn *setter;
1092 11897461 : unsigned ix;
1093 :
1094 : /* If this is a readonly then we aren't going to be changing it. */
1095 11897461 : if (MEM_READONLY_P (x))
1096 : return false;
1097 :
1098 51826103 : FOR_EACH_VEC_ELT_REVERSE (list, ix, setter)
1099 : {
1100 37986734 : struct mem_conflict_info mci;
1101 :
1102 : /* Ignore entries in the list that do not apply. */
1103 60978334 : if ((avail_p
1104 15604163 : && DF_INSN_LUID (setter) < uid_limit)
1105 37986734 : || (! avail_p
1106 22382571 : && DF_INSN_LUID (setter) > uid_limit))
1107 22991600 : continue;
1108 :
1109 : /* If SETTER is a call everything is clobbered. Note that calls
1110 : to pure functions are never put on the list, so we need not
1111 : worry about them. */
1112 14995134 : if (CALL_P (setter))
1113 3632708 : return true;
1114 :
1115 : /* SETTER must be an INSN of some kind that sets memory. Call
1116 : note_stores to examine each hunk of memory that is modified. */
1117 12956431 : mci.mem = x;
1118 12956431 : mci.conflict = false;
1119 12956431 : note_stores (setter, mems_conflict_for_gcse_p, &mci);
1120 12956431 : if (mci.conflict)
1121 : return true;
1122 : }
1123 : return false;
1124 : }
1125 :
1126 : /* Return true if the operands of expression X are unchanged from
1127 : the start of INSN's basic block up to but not including INSN. */
1128 :
1129 : static bool
1130 12898240 : oprs_anticipatable_p (const_rtx x, const rtx_insn *insn)
1131 : {
1132 0 : return oprs_unchanged_p (x, insn, false);
1133 : }
1134 :
1135 : /* Return true if the operands of expression X are unchanged from
1136 : INSN to the end of INSN's basic block. */
1137 :
1138 : static bool
1139 12898349 : oprs_available_p (const_rtx x, const rtx_insn *insn)
1140 : {
1141 0 : return oprs_unchanged_p (x, insn, true);
1142 : }
1143 :
1144 : /* Hash expression X.
1145 :
1146 : MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1147 : indicating if a volatile operand is found or if the expression contains
1148 : something we don't want to insert in the table. HASH_TABLE_SIZE is
1149 : the current size of the hash table to be probed. */
1150 :
1151 : static unsigned int
1152 12898349 : hash_expr (const_rtx x, machine_mode mode, int *do_not_record_p,
1153 : int hash_table_size)
1154 : {
1155 12898349 : unsigned int hash;
1156 :
1157 12898349 : *do_not_record_p = 0;
1158 :
1159 12898349 : hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1160 12898349 : return hash % hash_table_size;
1161 : }
1162 :
1163 : /* Return true if exp1 is equivalent to exp2. */
1164 :
1165 : static bool
1166 146108901 : expr_equiv_p (const_rtx x, const_rtx y)
1167 : {
1168 132395064 : return exp_equiv_p (x, y, 0, true);
1169 : }
1170 :
1171 : /* Insert expression X in INSN in the hash TABLE.
1172 : If it is already present, record it as the last occurrence in INSN's
1173 : basic block.
1174 :
1175 : MODE is the mode of the value X is being stored into.
1176 : It is only used if X is a CONST_INT.
1177 :
1178 : ANTIC_P is true if X is an anticipatable expression.
1179 : AVAIL_P is true if X is an available expression.
1180 :
1181 : MAX_DISTANCE is the maximum distance in instructions this expression can
1182 : be moved. */
1183 :
1184 : static void
1185 12898349 : insert_expr_in_table (rtx x, machine_mode mode, rtx_insn *insn,
1186 : bool antic_p, bool avail_p, HOST_WIDE_INT max_distance,
1187 : struct gcse_hash_table_d *table)
1188 : {
1189 12898349 : bool found;
1190 12898349 : int do_not_record_p;
1191 12898349 : unsigned int hash;
1192 12898349 : struct gcse_expr *cur_expr, *last_expr = NULL;
1193 12898349 : struct gcse_occr *antic_occr, *avail_occr;
1194 :
1195 12898349 : hash = hash_expr (x, mode, &do_not_record_p, table->size);
1196 :
1197 : /* Do not insert expression in table if it contains volatile operands,
1198 : or if hash_expr determines the expression is something we don't want
1199 : to or can't handle. */
1200 12898349 : if (do_not_record_p)
1201 304999 : return;
1202 :
1203 12593350 : cur_expr = table->table[hash];
1204 12593350 : found = false;
1205 :
1206 16783220 : while (cur_expr && (found = expr_equiv_p (cur_expr->expr, x)) == 0)
1207 : {
1208 : /* If the expression isn't found, save a pointer to the end of
1209 : the list. */
1210 4189870 : last_expr = cur_expr;
1211 4189870 : cur_expr = cur_expr->next_same_hash;
1212 : }
1213 :
1214 12593350 : if (! found)
1215 : {
1216 9981824 : cur_expr = GOBNEW (struct gcse_expr);
1217 9981824 : bytes_used += sizeof (struct gcse_expr);
1218 9981824 : if (table->table[hash] == NULL)
1219 : /* This is the first pattern that hashed to this index. */
1220 7328866 : table->table[hash] = cur_expr;
1221 : else
1222 : /* Add EXPR to end of this hash chain. */
1223 2652958 : last_expr->next_same_hash = cur_expr;
1224 :
1225 : /* Set the fields of the expr element. */
1226 9981824 : cur_expr->expr = x;
1227 9981824 : cur_expr->bitmap_index = table->n_elems++;
1228 9981824 : cur_expr->next_same_hash = NULL;
1229 9981824 : cur_expr->antic_occr = NULL;
1230 9981824 : cur_expr->avail_occr = NULL;
1231 9981824 : gcc_assert (max_distance >= 0);
1232 9981824 : cur_expr->max_distance = max_distance;
1233 : }
1234 : else
1235 2611526 : gcc_assert (cur_expr->max_distance == max_distance);
1236 :
1237 : /* Now record the occurrence(s). */
1238 12593350 : if (antic_p)
1239 : {
1240 7130807 : antic_occr = cur_expr->antic_occr;
1241 :
1242 7130807 : if (antic_occr
1243 7130807 : && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1244 : antic_occr = NULL;
1245 :
1246 5167463 : if (antic_occr)
1247 : /* Found another instance of the expression in the same basic block.
1248 : Prefer the currently recorded one. We want the first one in the
1249 : block and the block is scanned from start to end. */
1250 : ; /* nothing to do */
1251 : else
1252 : {
1253 : /* First occurrence of this expression in this basic block. */
1254 7122372 : antic_occr = GOBNEW (struct gcse_occr);
1255 7122372 : bytes_used += sizeof (struct gcse_occr);
1256 7122372 : antic_occr->insn = insn;
1257 7122372 : antic_occr->next = cur_expr->antic_occr;
1258 7122372 : antic_occr->deleted_p = 0;
1259 7122372 : cur_expr->antic_occr = antic_occr;
1260 : }
1261 : }
1262 :
1263 12593350 : if (avail_p)
1264 : {
1265 9029582 : avail_occr = cur_expr->avail_occr;
1266 :
1267 9029582 : if (avail_occr
1268 9029582 : && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1269 : {
1270 : /* Found another instance of the expression in the same basic block.
1271 : Prefer this occurrence to the currently recorded one. We want
1272 : the last one in the block and the block is scanned from start
1273 : to end. */
1274 9594 : avail_occr->insn = insn;
1275 : }
1276 : else
1277 : {
1278 : /* First occurrence of this expression in this basic block. */
1279 9019988 : avail_occr = GOBNEW (struct gcse_occr);
1280 9019988 : bytes_used += sizeof (struct gcse_occr);
1281 9019988 : avail_occr->insn = insn;
1282 9019988 : avail_occr->next = cur_expr->avail_occr;
1283 9019988 : avail_occr->deleted_p = 0;
1284 9019988 : cur_expr->avail_occr = avail_occr;
1285 : }
1286 : }
1287 : }
1288 :
1289 : /* Scan SET present in INSN and add an entry to the hash TABLE. */
1290 :
1291 : static void
1292 47384914 : hash_scan_set (rtx set, rtx_insn *insn, struct gcse_hash_table_d *table)
1293 : {
1294 47384914 : rtx src = SET_SRC (set);
1295 47384914 : rtx dest = SET_DEST (set);
1296 47384914 : rtx note;
1297 :
1298 47384914 : if (GET_CODE (src) == CALL)
1299 : hash_scan_call (src, insn, table);
1300 :
1301 45771816 : else if (REG_P (dest))
1302 : {
1303 33802093 : unsigned int regno = REGNO (dest);
1304 33802093 : HOST_WIDE_INT max_distance = 0;
1305 :
1306 : /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1307 :
1308 : This allows us to do a single GCSE pass and still eliminate
1309 : redundant constants, addresses or other expressions that are
1310 : constructed with multiple instructions.
1311 :
1312 : However, keep the original SRC if INSN is a simple reg-reg move.
1313 : In this case, there will almost always be a REG_EQUAL note on the
1314 : insn that sets SRC. By recording the REG_EQUAL value here as SRC
1315 : for INSN, we miss copy propagation opportunities and we perform the
1316 : same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1317 : do more than one PRE GCSE pass.
1318 :
1319 : Note that this does not impede profitable constant propagations. We
1320 : "look through" reg-reg sets in lookup_avail_set. */
1321 33802093 : note = find_reg_equal_equiv_note (insn);
1322 33802093 : if (note != 0
1323 2794722 : && REG_NOTE_KIND (note) == REG_EQUAL
1324 2649677 : && !REG_P (src)
1325 35309968 : && want_to_gcse_p (XEXP (note, 0), GET_MODE (dest), NULL))
1326 31182 : src = XEXP (note, 0), set = gen_rtx_SET (dest, src);
1327 :
1328 : /* Only record sets of pseudo-regs in the hash table, unless we're
1329 : currently doing hardreg switching. */
1330 33802093 : if ((doing_hardreg_pre_p ? regno == current_hardreg_regno
1331 : : regno >= FIRST_PSEUDO_REGISTER)
1332 : /* Don't GCSE something if we can't do a reg/reg copy. */
1333 20298842 : && can_copy_p (GET_MODE (dest))
1334 : /* GCSE commonly inserts instruction after the insn. We can't
1335 : do that easily for EH edges so disable GCSE on these for now. */
1336 : /* ??? We can now easily create new EH landing pads at the
1337 : gimple level, for splitting edges; there's no reason we
1338 : can't do the same thing at the rtl level. */
1339 20298842 : && !can_throw_internal (insn)
1340 : /* Is SET_SRC something we want to gcse? */
1341 20218860 : && want_to_gcse_p (src, GET_MODE (dest), &max_distance)
1342 : /* Don't CSE a nop. */
1343 13036546 : && ! set_noop_p (set)
1344 : /* Don't GCSE if it has attached REG_EQUIV note.
1345 : At this point this only function parameters should have
1346 : REG_EQUIV notes and if the argument slot is used somewhere
1347 : explicitly, it means address of parameter has been taken,
1348 : so we should not extend the lifetime of the pseudo. */
1349 46838639 : && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1350 : {
1351 : /* An expression is not anticipatable if its operands are
1352 : modified before this insn or if this is not the only SET in
1353 : this insn. The latter condition does not have to mean that
1354 : SRC itself is not anticipatable, but we just will not be
1355 : able to handle code motion of insns with multiple sets. */
1356 12898240 : bool antic_p = (oprs_anticipatable_p (src, insn)
1357 12898240 : && !multiple_sets (insn));
1358 12898240 : if (doing_hardreg_pre_p)
1359 : {
1360 : /* An hardreg assignment is anticipatable only if the hardreg is
1361 : neither set nor used prior to this assignment. */
1362 0 : auto info = reg_avail_info[current_hardreg_regno];
1363 0 : if ((info.last_bb == current_bb
1364 0 : && info.first_set < DF_INSN_LUID (insn))
1365 0 : || bitmap_bit_p (DF_LR_IN (current_bb),
1366 : current_hardreg_regno))
1367 : antic_p = false;
1368 : }
1369 :
1370 : /* An expression is not available if its operands are
1371 : subsequently modified, including this insn. It's also not
1372 : available if this is a branch, because we can't insert
1373 : a set after the branch. */
1374 12898240 : bool avail_p = (oprs_available_p (src, insn)
1375 12898240 : && ! JUMP_P (insn));
1376 12898240 : if (doing_hardreg_pre_p)
1377 : {
1378 : /* An hardreg assignment is only available if the hardreg is
1379 : not set later in the BB. Uses of the hardreg are allowed. */
1380 0 : auto info = reg_avail_info[current_hardreg_regno];
1381 0 : if (info.last_bb == current_bb
1382 0 : && info.last_set > DF_INSN_LUID (insn))
1383 : avail_p = false;
1384 : }
1385 :
1386 12898240 : insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1387 : max_distance, table);
1388 : }
1389 : }
1390 : /* In case of store we want to consider the memory value as available in
1391 : the REG stored in that memory. This makes it possible to remove
1392 : redundant loads from due to stores to the same location. */
1393 11969723 : else if (flag_gcse_las
1394 129 : && !doing_hardreg_pre_p
1395 129 : && REG_P (src)
1396 109 : && MEM_P (dest))
1397 : {
1398 109 : unsigned int regno = REGNO (src);
1399 109 : HOST_WIDE_INT max_distance = 0;
1400 :
1401 : /* Only record sets of pseudo-regs in the hash table. */
1402 109 : if (regno >= FIRST_PSEUDO_REGISTER
1403 : /* Don't GCSE something if we can't do a reg/reg copy. */
1404 109 : && can_copy_p (GET_MODE (src))
1405 : /* GCSE commonly inserts instruction after the insn. We can't
1406 : do that easily for EH edges so disable GCSE on these for now. */
1407 109 : && !can_throw_internal (insn)
1408 : /* Is SET_DEST something we want to gcse? */
1409 109 : && want_to_gcse_p (dest, GET_MODE (dest), &max_distance)
1410 : /* Don't CSE a nop. */
1411 109 : && ! set_noop_p (set)
1412 : /* Don't GCSE if it has attached REG_EQUIV note.
1413 : At this point this only function parameters should have
1414 : REG_EQUIV notes and if the argument slot is used somewhere
1415 : explicitly, it means address of parameter has been taken,
1416 : so we should not extend the lifetime of the pseudo. */
1417 218 : && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1418 0 : || ! MEM_P (XEXP (note, 0))))
1419 : {
1420 : /* Stores are never anticipatable. */
1421 109 : bool antic_p = 0;
1422 : /* An expression is not available if its operands are
1423 : subsequently modified, including this insn. It's also not
1424 : available if this is a branch, because we can't insert
1425 : a set after the branch. */
1426 109 : bool avail_p = oprs_available_p (dest, insn) && ! JUMP_P (insn);
1427 :
1428 : /* Record the memory expression (DEST) in the hash table. */
1429 109 : insert_expr_in_table (dest, GET_MODE (dest), insn,
1430 : antic_p, avail_p, max_distance, table);
1431 : }
1432 : }
1433 47384914 : }
1434 :
1435 : static void
1436 0 : hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
1437 : struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
1438 : {
1439 : /* Currently nothing to do. */
1440 0 : }
1441 :
1442 : static void
1443 0 : hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
1444 : struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
1445 : {
1446 : /* Currently nothing to do. */
1447 0 : }
1448 :
1449 : /* Process INSN and add hash table entries as appropriate. */
1450 :
1451 : static void
1452 49709398 : hash_scan_insn (rtx_insn *insn, struct gcse_hash_table_d *table)
1453 : {
1454 49709398 : rtx pat = PATTERN (insn);
1455 49709398 : int i;
1456 :
1457 : /* Pick out the sets of INSN and for other forms of instructions record
1458 : what's been modified. */
1459 :
1460 49709398 : if (GET_CODE (pat) == SET)
1461 39129930 : hash_scan_set (pat, insn, table);
1462 :
1463 10579468 : else if (GET_CODE (pat) == CLOBBER)
1464 : hash_scan_clobber (pat, insn, table);
1465 :
1466 10566027 : else if (GET_CODE (pat) == CALL)
1467 : hash_scan_call (pat, insn, table);
1468 :
1469 8462446 : else if (GET_CODE (pat) == PARALLEL)
1470 24440048 : for (i = 0; i < XVECLEN (pat, 0); i++)
1471 : {
1472 16393162 : rtx x = XVECEXP (pat, 0, i);
1473 :
1474 16393162 : if (GET_CODE (x) == SET)
1475 8254984 : hash_scan_set (x, insn, table);
1476 : else if (GET_CODE (x) == CLOBBER)
1477 : hash_scan_clobber (x, insn, table);
1478 : else if (GET_CODE (x) == CALL)
1479 : hash_scan_call (x, insn, table);
1480 : }
1481 49709398 : }
1482 :
1483 : /* Dump the hash table TABLE to file FILE under the name NAME. */
1484 :
1485 : static void
1486 24 : dump_hash_table (FILE *file, const char *name, struct gcse_hash_table_d *table)
1487 : {
1488 24 : int i;
1489 : /* Flattened out table, so it's printed in proper order. */
1490 24 : struct gcse_expr **flat_table;
1491 24 : unsigned int *hash_val;
1492 24 : struct gcse_expr *expr;
1493 :
1494 24 : flat_table = XCNEWVEC (struct gcse_expr *, table->n_elems);
1495 24 : hash_val = XNEWVEC (unsigned int, table->n_elems);
1496 :
1497 416 : for (i = 0; i < (int) table->size; i++)
1498 628 : for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1499 : {
1500 260 : flat_table[expr->bitmap_index] = expr;
1501 260 : hash_val[expr->bitmap_index] = i;
1502 : }
1503 :
1504 24 : fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1505 : name, table->size, table->n_elems);
1506 :
1507 308 : for (i = 0; i < (int) table->n_elems; i++)
1508 260 : if (flat_table[i] != 0)
1509 : {
1510 260 : expr = flat_table[i];
1511 260 : fprintf (file, "Index %d (hash value %d; max distance "
1512 : HOST_WIDE_INT_PRINT_DEC ")\n ",
1513 260 : expr->bitmap_index, hash_val[i], expr->max_distance);
1514 260 : print_rtl (file, expr->expr);
1515 260 : fprintf (file, "\n");
1516 : }
1517 :
1518 24 : fprintf (file, "\n");
1519 :
1520 24 : free (flat_table);
1521 24 : free (hash_val);
1522 24 : }
1523 :
1524 : /* Record register first/last/block set information for REGNO in INSN.
1525 :
1526 : first_set records the first place in the block where the register
1527 : is set and is used to compute "anticipatability".
1528 :
1529 : last_set records the last place in the block where the register
1530 : is set and is used to compute "availability".
1531 :
1532 : last_bb records the block for which first_set and last_set are
1533 : valid, as a quick test to invalidate them. */
1534 :
1535 : static void
1536 356038527 : record_last_reg_set_info (rtx_insn *insn, int regno)
1537 : {
1538 356038527 : struct reg_avail_info *info = ®_avail_info[regno];
1539 356038527 : int luid = DF_INSN_LUID (insn);
1540 :
1541 356038527 : info->last_set = luid;
1542 356038527 : if (info->last_bb != current_bb)
1543 : {
1544 267127188 : info->last_bb = current_bb;
1545 267127188 : info->first_set = luid;
1546 : }
1547 356038527 : }
1548 :
1549 : /* Record memory modification information for INSN. We do not actually care
1550 : about the memory location(s) that are set, or even how they are set (consider
1551 : a CALL_INSN). We merely need to record which insns modify memory. */
1552 :
1553 : static void
1554 9172539 : record_last_mem_set_info (rtx_insn *insn)
1555 : {
1556 9172539 : if (! do_load_motion ())
1557 : return;
1558 :
1559 9172525 : record_last_mem_set_info_common (insn, modify_mem_list,
1560 : canon_modify_mem_list,
1561 : modify_mem_list_set,
1562 : blocks_with_calls);
1563 : }
1564 :
1565 : /* Called from compute_hash_table via note_stores to handle one
1566 : SET or CLOBBER in an insn. DATA is really the instruction in which
1567 : the SET is taking place. */
1568 :
1569 : static void
1570 55405575 : record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1571 : {
1572 55405575 : rtx_insn *last_set_insn = (rtx_insn *) data;
1573 :
1574 55405575 : if (GET_CODE (dest) == SUBREG)
1575 0 : dest = SUBREG_REG (dest);
1576 :
1577 55405575 : if (REG_P (dest))
1578 43620507 : record_last_reg_set_info (last_set_insn, REGNO (dest));
1579 11785068 : else if (MEM_P (dest)
1580 : /* Ignore pushes, they clobber nothing. */
1581 11785068 : && ! push_operand (dest, GET_MODE (dest)))
1582 5704077 : record_last_mem_set_info (last_set_insn);
1583 55405575 : }
1584 :
1585 : /* Top level function to create an expression hash table.
1586 :
1587 : Expression entries are placed in the hash table if
1588 : - they are of the form (set (pseudo-reg) src),
1589 : - src is something we want to perform GCSE on,
1590 : - none of the operands are subsequently modified in the block
1591 :
1592 : Currently src must be a pseudo-reg or a const_int.
1593 :
1594 : TABLE is the table computed. */
1595 :
1596 : static void
1597 507612 : compute_hash_table_work (struct gcse_hash_table_d *table)
1598 : {
1599 507612 : int i;
1600 :
1601 : /* re-Cache any INSN_LIST nodes we have allocated. */
1602 507612 : clear_modify_mem_tables ();
1603 : /* Some working arrays used to track first and last set in each block. */
1604 507612 : reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1605 :
1606 81449390 : for (i = 0; i < max_reg_num (); ++i)
1607 80941778 : reg_avail_info[i].last_bb = NULL;
1608 :
1609 10253287 : FOR_EACH_BB_FN (current_bb, cfun)
1610 : {
1611 9745675 : rtx_insn *insn;
1612 9745675 : unsigned int regno;
1613 :
1614 : /* First pass over the instructions records information used to
1615 : determine when registers and memory are first and last set. */
1616 121220580 : FOR_BB_INSNS (current_bb, insn)
1617 : {
1618 111474905 : if (!NONDEBUG_INSN_P (insn))
1619 61765507 : continue;
1620 :
1621 49709398 : if (CALL_P (insn))
1622 : {
1623 3837048 : hard_reg_set_iterator hrsi;
1624 :
1625 : /* We don't track modes of hard registers, so we need
1626 : to be conservative and assume that partial kills
1627 : are full kills. */
1628 3837048 : HARD_REG_SET callee_clobbers
1629 3837048 : = insn_callee_abi (insn).full_and_partial_reg_clobbers ();
1630 316255068 : EXECUTE_IF_SET_IN_HARD_REG_SET (callee_clobbers, 0, regno, hrsi)
1631 312418020 : record_last_reg_set_info (insn, regno);
1632 :
1633 7536956 : if (! RTL_CONST_OR_PURE_CALL_P (insn)
1634 394654 : || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
1635 4207365 : || can_throw_external (insn))
1636 3468462 : record_last_mem_set_info (insn);
1637 : }
1638 :
1639 49709398 : note_stores (insn, record_last_set_info, insn);
1640 : }
1641 :
1642 : /* The next pass builds the hash table. */
1643 121220580 : FOR_BB_INSNS (current_bb, insn)
1644 111474905 : if (NONDEBUG_INSN_P (insn))
1645 49709398 : hash_scan_insn (insn, table);
1646 : }
1647 :
1648 507612 : free (reg_avail_info);
1649 507612 : reg_avail_info = NULL;
1650 507612 : }
1651 :
1652 : /* Allocate space for the set/expr hash TABLE.
1653 : It is used to determine the number of buckets to use. */
1654 :
1655 : static void
1656 507612 : alloc_hash_table (struct gcse_hash_table_d *table)
1657 : {
1658 507612 : int n;
1659 :
1660 507612 : n = get_max_insn_count ();
1661 :
1662 507612 : table->size = n / 4;
1663 507612 : if (table->size < 11)
1664 198488 : table->size = 11;
1665 :
1666 : /* Attempt to maintain efficient use of hash table.
1667 : Making it an odd number is simplest for now.
1668 : ??? Later take some measurements. */
1669 507612 : table->size |= 1;
1670 507612 : n = table->size * sizeof (struct gcse_expr *);
1671 507612 : table->table = GNEWVAR (struct gcse_expr *, n);
1672 507612 : }
1673 :
1674 : /* Free things allocated by alloc_hash_table. */
1675 :
1676 : static void
1677 507612 : free_hash_table (struct gcse_hash_table_d *table)
1678 : {
1679 507612 : free (table->table);
1680 0 : }
1681 :
1682 : /* Compute the expression hash table TABLE. */
1683 :
1684 : static void
1685 507612 : compute_hash_table (struct gcse_hash_table_d *table)
1686 : {
1687 : /* Initialize count of number of entries in hash table. */
1688 507612 : table->n_elems = 0;
1689 507612 : memset (table->table, 0, table->size * sizeof (struct gcse_expr *));
1690 :
1691 507612 : compute_hash_table_work (table);
1692 507612 : }
1693 :
1694 : /* Expression tracking support. */
1695 :
1696 : /* Clear canon_modify_mem_list and modify_mem_list tables. */
1697 : static void
1698 1015224 : clear_modify_mem_tables (void)
1699 : {
1700 1015224 : unsigned i;
1701 1015224 : bitmap_iterator bi;
1702 :
1703 5016135 : EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1704 : {
1705 4000911 : modify_mem_list[i].release ();
1706 4000911 : canon_modify_mem_list[i].release ();
1707 : }
1708 1015224 : bitmap_clear (modify_mem_list_set);
1709 1015224 : bitmap_clear (blocks_with_calls);
1710 1015224 : }
1711 :
1712 : /* Release memory used by modify_mem_list_set. */
1713 :
1714 : static void
1715 507612 : free_modify_mem_tables (void)
1716 : {
1717 507612 : clear_modify_mem_tables ();
1718 507612 : free (modify_mem_list);
1719 507612 : free (canon_modify_mem_list);
1720 507612 : modify_mem_list = 0;
1721 507612 : canon_modify_mem_list = 0;
1722 507612 : }
1723 :
1724 : /* Compute PRE+LCM working variables. */
1725 :
1726 : /* Local properties of expressions. */
1727 :
1728 : /* Nonzero for expressions that are transparent in the block. */
1729 : static sbitmap *transp;
1730 :
1731 : /* Nonzero for expressions that are computed (available) in the block. */
1732 : static sbitmap *comp;
1733 :
1734 : /* Nonzero for expressions that are locally anticipatable in the block. */
1735 : static sbitmap *antloc;
1736 :
1737 : /* Nonzero for expressions where this block is an optimal computation
1738 : point. */
1739 : static sbitmap *pre_optimal;
1740 :
1741 : /* Nonzero for expressions which are redundant in a particular block. */
1742 : static sbitmap *pre_redundant;
1743 :
1744 : /* Nonzero for expressions which should be inserted on a specific edge. */
1745 : static sbitmap *pre_insert_map;
1746 :
1747 : /* Nonzero for expressions which should be deleted in a specific block. */
1748 : static sbitmap *pre_delete_map;
1749 :
1750 : /* Allocate vars used for PRE analysis. */
1751 :
1752 : static void
1753 417337 : alloc_pre_mem (int n_blocks, int n_exprs)
1754 : {
1755 417337 : transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1756 417337 : comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1757 417337 : antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1758 :
1759 417337 : pre_optimal = NULL;
1760 417337 : pre_redundant = NULL;
1761 417337 : pre_insert_map = NULL;
1762 417337 : pre_delete_map = NULL;
1763 417337 : ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1764 :
1765 : /* pre_insert and pre_delete are allocated later. */
1766 417337 : }
1767 :
1768 : /* Free vars used for PRE analysis. */
1769 :
1770 : static void
1771 417337 : free_pre_mem (void)
1772 : {
1773 417337 : sbitmap_vector_free (transp);
1774 417337 : sbitmap_vector_free (comp);
1775 :
1776 : /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
1777 :
1778 417337 : if (pre_optimal)
1779 0 : sbitmap_vector_free (pre_optimal);
1780 417337 : if (pre_redundant)
1781 0 : sbitmap_vector_free (pre_redundant);
1782 417337 : if (pre_insert_map)
1783 417337 : sbitmap_vector_free (pre_insert_map);
1784 417337 : if (pre_delete_map)
1785 417337 : sbitmap_vector_free (pre_delete_map);
1786 :
1787 417337 : transp = comp = NULL;
1788 417337 : pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1789 417337 : }
1790 :
1791 : /* Remove certain expressions from anticipatable and transparent
1792 : sets of basic blocks that have incoming abnormal edge.
1793 : For PRE remove potentially trapping expressions to avoid placing
1794 : them on abnormal edges. For hoisting remove memory references that
1795 : can be clobbered by calls. */
1796 :
1797 : static void
1798 443231 : prune_expressions (bool pre_p)
1799 : {
1800 443231 : struct gcse_expr *expr;
1801 443231 : unsigned int ui;
1802 443231 : basic_block bb;
1803 :
1804 443231 : auto_sbitmap prune_exprs (expr_hash_table.n_elems);
1805 443231 : bitmap_clear (prune_exprs);
1806 21128794 : for (ui = 0; ui < expr_hash_table.size; ui++)
1807 : {
1808 30667387 : for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1809 : {
1810 : /* Note potentially trapping expressions. */
1811 9981824 : if (may_trap_p (expr->expr))
1812 : {
1813 2652870 : bitmap_set_bit (prune_exprs, expr->bitmap_index);
1814 2652870 : continue;
1815 : }
1816 :
1817 7328954 : if (!pre_p && contains_mem_rtx_p (expr->expr))
1818 : /* Note memory references that can be clobbered by a call.
1819 : We do not split abnormal edges in hoisting, so would
1820 : a memory reference get hoisted along an abnormal edge,
1821 : it would be placed /before/ the call. Therefore, only
1822 : constant memory references can be hoisted along abnormal
1823 : edges. */
1824 : {
1825 36468 : rtx x = expr->expr;
1826 :
1827 : /* Common cases where we might find the MEM which may allow us
1828 : to avoid pruning the expression. */
1829 36837 : while (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
1830 369 : x = XEXP (x, 0);
1831 :
1832 : /* If we found the MEM, go ahead and look at it to see if it has
1833 : properties that allow us to avoid pruning its expression out
1834 : of the tables. */
1835 36468 : if (MEM_P (x))
1836 : {
1837 37146 : if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1838 36407 : && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)))
1839 739 : continue;
1840 :
1841 37315 : if (MEM_READONLY_P (x)
1842 1647 : && !MEM_VOLATILE_P (x)
1843 37315 : && MEM_NOTRAP_P (x))
1844 : /* Constant memory reference, e.g., a PIC address. */
1845 1647 : continue;
1846 : }
1847 :
1848 : /* ??? Optimally, we would use interprocedural alias
1849 : analysis to determine if this mem is actually killed
1850 : by this call. */
1851 :
1852 34082 : bitmap_set_bit (prune_exprs, expr->bitmap_index);
1853 : }
1854 : }
1855 : }
1856 :
1857 9870257 : FOR_EACH_BB_FN (bb, cfun)
1858 : {
1859 9427026 : edge e;
1860 9427026 : edge_iterator ei;
1861 :
1862 : /* If the current block is the destination of an abnormal edge, we
1863 : kill all trapping (for PRE) and memory (for hoist) expressions
1864 : because we won't be able to properly place the instruction on
1865 : the edge. So make them neither anticipatable nor transparent.
1866 : This is fairly conservative.
1867 :
1868 : ??? For hoisting it may be necessary to check for set-and-jump
1869 : instructions here, not just for abnormal edges. The general problem
1870 : is that when an expression cannot not be placed right at the end of
1871 : a basic block we should account for any side-effects of a subsequent
1872 : jump instructions that could clobber the expression. It would
1873 : be best to implement this check along the lines of
1874 : should_hoist_expr_to_dom where the target block is already known
1875 : and, hence, there's no need to conservatively prune expressions on
1876 : "intermediate" set-and-jump instructions. */
1877 22826851 : FOR_EACH_EDGE (e, ei, bb->preds)
1878 13573631 : if ((e->flags & EDGE_ABNORMAL)
1879 173901 : && (pre_p || CALL_P (BB_END (e->src))))
1880 : {
1881 173806 : bitmap_and_compl (antloc[bb->index],
1882 173806 : antloc[bb->index], prune_exprs);
1883 173806 : bitmap_and_compl (transp[bb->index],
1884 173806 : transp[bb->index], prune_exprs);
1885 173806 : break;
1886 : }
1887 : }
1888 443231 : }
1889 :
1890 : /* It may be necessary to insert a large number of insns on edges to
1891 : make the existing occurrences of expressions fully redundant. This
1892 : routine examines the set of insertions and deletions and if the ratio
1893 : of insertions to deletions is too high for a particular expression, then
1894 : the expression is removed from the insertion/deletion sets.
1895 :
1896 : N_ELEMS is the number of elements in the hash table. */
1897 :
1898 : static void
1899 417337 : prune_insertions_deletions (int n_elems)
1900 : {
1901 417337 : sbitmap_iterator sbi;
1902 :
1903 : /* We always use I to iterate over blocks/edges and J to iterate over
1904 : expressions. */
1905 417337 : unsigned int i, j;
1906 :
1907 : /* Counts for the number of times an expression needs to be inserted and
1908 : number of times an expression can be removed as a result. */
1909 417337 : int *insertions = GCNEWVEC (int, n_elems);
1910 417337 : int *deletions = GCNEWVEC (int, n_elems);
1911 :
1912 : /* Set of expressions which require too many insertions relative to
1913 : the number of deletions achieved. We will prune these out of the
1914 : insertion/deletion sets. */
1915 417337 : auto_sbitmap prune_exprs (n_elems);
1916 417337 : bitmap_clear (prune_exprs);
1917 :
1918 : /* Iterate over the edges counting the number of times each expression
1919 : needs to be inserted. */
1920 15255508 : for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
1921 : {
1922 29165876 : EXECUTE_IF_SET_IN_BITMAP (pre_insert_map[i], 0, j, sbi)
1923 324208 : insertions[j]++;
1924 : }
1925 :
1926 : /* Similarly for deletions, but those occur in blocks rather than on
1927 : edges. */
1928 11189871 : for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
1929 : {
1930 22479345 : EXECUTE_IF_SET_IN_BITMAP (pre_delete_map[i], 0, j, sbi)
1931 934277 : deletions[j]++;
1932 : }
1933 :
1934 : /* Now that we have accurate counts, iterate over the elements in the
1935 : hash table and see if any need too many insertions relative to the
1936 : number of evaluations that can be removed. If so, mark them in
1937 : PRUNE_EXPRS. */
1938 10096553 : for (j = 0; j < (unsigned) n_elems; j++)
1939 9679216 : if (deletions[j]
1940 352396 : && (insertions[j] / deletions[j]) > param_max_gcse_insertion_ratio)
1941 328 : bitmap_set_bit (prune_exprs, j);
1942 :
1943 : /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
1944 835002 : EXECUTE_IF_SET_IN_BITMAP (prune_exprs, 0, j, sbi)
1945 : {
1946 332611 : for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
1947 332283 : bitmap_clear_bit (pre_insert_map[i], j);
1948 :
1949 221561 : for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
1950 221233 : bitmap_clear_bit (pre_delete_map[i], j);
1951 : }
1952 :
1953 417337 : free (insertions);
1954 417337 : free (deletions);
1955 417337 : }
1956 :
1957 : /* Top level routine to do the dataflow analysis needed by PRE. */
1958 :
1959 : static struct edge_list *
1960 417337 : compute_pre_data (void)
1961 : {
1962 417337 : struct edge_list *edge_list;
1963 417337 : basic_block bb;
1964 :
1965 417337 : compute_local_properties (transp, comp, antloc, &expr_hash_table);
1966 417337 : prune_expressions (true);
1967 417337 : bitmap_vector_clear (ae_kill, last_basic_block_for_fn (cfun));
1968 :
1969 : /* Compute ae_kill for each basic block using:
1970 :
1971 : ~(TRANSP | COMP)
1972 : */
1973 :
1974 9518912 : FOR_EACH_BB_FN (bb, cfun)
1975 : {
1976 9101575 : bitmap_ior (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1977 9101575 : bitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1978 : }
1979 :
1980 417337 : if (doing_hardreg_pre_p)
1981 0 : prune_hardreg_uses (transp, &expr_hash_table);
1982 :
1983 417337 : edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1984 : ae_kill, &pre_insert_map, &pre_delete_map);
1985 417337 : sbitmap_vector_free (antloc);
1986 417337 : antloc = NULL;
1987 417337 : sbitmap_vector_free (ae_kill);
1988 417337 : ae_kill = NULL;
1989 :
1990 417337 : prune_insertions_deletions (expr_hash_table.n_elems);
1991 :
1992 417337 : return edge_list;
1993 : }
1994 :
1995 : /* PRE utilities */
1996 :
1997 : /* Return true if an occurrence of expression EXPR in OCCR_BB would reach
1998 : block BB.
1999 :
2000 : VISITED is a pointer to a working buffer for tracking which BB's have
2001 : been visited. It is NULL for the top-level call.
2002 :
2003 : We treat reaching expressions that go through blocks containing the same
2004 : reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2005 : 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2006 : 2 as not reaching. The intent is to improve the probability of finding
2007 : only one reaching expression and to reduce register lifetimes by picking
2008 : the closest such expression. */
2009 :
2010 : static bool
2011 19019008 : pre_expr_reaches_here_p_work (basic_block occr_bb, struct gcse_expr *expr,
2012 : basic_block bb, char *visited)
2013 : {
2014 19019008 : edge pred;
2015 19019008 : edge_iterator ei;
2016 :
2017 40481762 : FOR_EACH_EDGE (pred, ei, bb->preds)
2018 : {
2019 24071621 : basic_block pred_bb = pred->src;
2020 :
2021 24071621 : if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
2022 : /* Has predecessor has already been visited? */
2023 23749641 : || visited[pred_bb->index])
2024 : ;/* Nothing to do. */
2025 :
2026 : /* Does this predecessor generate this expression? */
2027 20075198 : else if (bitmap_bit_p (comp[pred_bb->index], expr->bitmap_index))
2028 : {
2029 : /* Is this the occurrence we're looking for?
2030 : Note that there's only one generating occurrence per block
2031 : so we just need to check the block number. */
2032 2367275 : if (occr_bb == pred_bb)
2033 : return true;
2034 :
2035 2073483 : visited[pred_bb->index] = 1;
2036 : }
2037 : /* Ignore this predecessor if it kills the expression.
2038 :
2039 : If this were used for hardreg pre, then it would need to use the kills
2040 : bitmap. */
2041 17707923 : else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
2042 259089 : visited[pred_bb->index] = 1;
2043 :
2044 : /* Neither gen nor kill. */
2045 : else
2046 : {
2047 17448834 : visited[pred_bb->index] = 1;
2048 17448834 : if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2049 : return true;
2050 : }
2051 : }
2052 :
2053 : /* All paths have been checked. */
2054 : return false;
2055 : }
2056 :
2057 : /* The wrapper for pre_expr_reaches_here_work that ensures that any
2058 : memory allocated for that function is returned. */
2059 :
2060 : static bool
2061 1570174 : pre_expr_reaches_here_p (basic_block occr_bb, struct gcse_expr *expr,
2062 : basic_block bb)
2063 : {
2064 1570174 : bool rval;
2065 1570174 : char *visited = XCNEWVEC (char, last_basic_block_for_fn (cfun));
2066 :
2067 1570174 : rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2068 :
2069 1570174 : free (visited);
2070 1570174 : return rval;
2071 : }
2072 :
2073 : /* Generate RTL to copy an EXP to REG and return it. */
2074 :
2075 : rtx_insn *
2076 316706 : prepare_copy_insn (rtx reg, rtx exp)
2077 : {
2078 316706 : rtx_insn *pat;
2079 :
2080 316706 : start_sequence ();
2081 :
2082 : /* If the expression is something that's an operand, like a constant,
2083 : just copy it to a register. */
2084 316706 : if (general_operand (exp, GET_MODE (reg)))
2085 113734 : emit_move_insn (reg, exp);
2086 :
2087 : /* Otherwise, make a new insn to compute this expression and make sure the
2088 : insn will be recognized (this also adds any needed CLOBBERs). */
2089 : else
2090 : {
2091 202972 : rtx_insn *insn = emit_insn (gen_rtx_SET (reg, exp));
2092 :
2093 202972 : if (insn_invalid_p (insn, false))
2094 0 : gcc_unreachable ();
2095 : }
2096 :
2097 316706 : pat = end_sequence ();
2098 :
2099 316706 : return pat;
2100 : }
2101 :
2102 : /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */
2103 :
2104 : static rtx_insn *
2105 316692 : process_insert_insn (struct gcse_expr *expr)
2106 : {
2107 316692 : rtx reg = expr->reaching_reg;
2108 : /* Copy the expression to make sure we don't have any sharing issues. */
2109 316692 : rtx exp = copy_rtx (expr->expr);
2110 :
2111 316692 : return prepare_copy_insn (reg, exp);
2112 : }
2113 :
2114 : /* Return the INSN which is added at the end of the block BB with
2115 : same instruction pattern with PAT. */
2116 :
2117 : rtx_insn *
2118 69429 : insert_insn_end_basic_block (rtx_insn *pat, basic_block bb)
2119 : {
2120 69429 : rtx_insn *insn = BB_END (bb);
2121 69429 : rtx_insn *new_insn;
2122 69429 : rtx_insn *pat_end;
2123 :
2124 69429 : gcc_assert (pat && INSN_P (pat));
2125 :
2126 : pat_end = pat;
2127 69970 : while (NEXT_INSN (pat_end) != NULL_RTX)
2128 : pat_end = NEXT_INSN (pat_end);
2129 :
2130 : /* If the last insn is a jump, insert EXPR in front. Similarly we need to
2131 : take care of trapping instructions in presence of non-call exceptions. */
2132 :
2133 69429 : if (JUMP_P (insn)
2134 69429 : || (NONJUMP_INSN_P (insn)
2135 18411 : && (!single_succ_p (bb)
2136 3360 : || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2137 : {
2138 : /* FIXME: What if something in jump uses value set in new insn? */
2139 25345 : new_insn = emit_insn_before_noloc (pat, insn, bb);
2140 : }
2141 :
2142 : /* Likewise if the last insn is a call, as will happen in the presence
2143 : of exception handling. */
2144 44084 : else if (CALL_P (insn)
2145 84806 : && (!single_succ_p (bb)
2146 4913 : || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2147 : {
2148 : /* Keeping in mind targets with small register classes and parameters
2149 : in registers, we search backward and place the instructions before
2150 : the first parameter is loaded. Do this for everyone for consistency
2151 : and a presumption that we'll get better code elsewhere as well. */
2152 :
2153 : /* Since different machines initialize their parameter registers
2154 : in different orders, assume nothing. Collect the set of all
2155 : parameter registers. */
2156 40646 : insn = find_first_parameter_load (insn, BB_HEAD (bb));
2157 :
2158 : /* If we found all the parameter loads, then we want to insert
2159 : before the first parameter load.
2160 :
2161 : If we did not find all the parameter loads, then we might have
2162 : stopped on the head of the block, which could be a CODE_LABEL.
2163 : If we inserted before the CODE_LABEL, then we would be putting
2164 : the insn in the wrong basic block. In that case, put the insn
2165 : after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
2166 40646 : while (LABEL_P (insn)
2167 40646 : || NOTE_INSN_BASIC_BLOCK_P (insn))
2168 0 : insn = NEXT_INSN (insn);
2169 :
2170 40646 : new_insn = emit_insn_before_noloc (pat, insn, bb);
2171 : }
2172 : else
2173 3438 : new_insn = emit_insn_after_noloc (pat, insn, bb);
2174 :
2175 541 : while (1)
2176 : {
2177 69970 : if (INSN_P (pat))
2178 69970 : add_label_notes (PATTERN (pat), new_insn);
2179 69970 : if (pat == pat_end)
2180 : break;
2181 541 : pat = NEXT_INSN (pat);
2182 : }
2183 69429 : return new_insn;
2184 : }
2185 :
2186 : /* Add EXPR to the end of basic block BB.
2187 :
2188 : This is used by both the PRE and code hoisting. */
2189 :
2190 : static void
2191 69429 : insert_insn_end_basic_block (struct gcse_expr *expr, basic_block bb)
2192 : {
2193 69429 : rtx reg = expr->reaching_reg;
2194 69429 : int regno = REGNO (reg);
2195 :
2196 69429 : rtx_insn *insn = process_insert_insn (expr);
2197 69429 : rtx_insn *new_insn = insert_insn_end_basic_block (insn, bb);
2198 :
2199 69429 : gcse_create_count++;
2200 :
2201 69429 : if (dump_file)
2202 : {
2203 3 : fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2204 3 : bb->index, INSN_UID (new_insn));
2205 3 : fprintf (dump_file, "copying expression %d to reg %d\n",
2206 : expr->bitmap_index, regno);
2207 : }
2208 69429 : }
2209 :
2210 : /* Return the INSN which is added at the start of the block BB with
2211 : same instruction pattern with PAT. */
2212 :
2213 : rtx_insn *
2214 0 : insert_insn_start_basic_block (rtx_insn *pat, basic_block bb)
2215 : {
2216 0 : rtx_insn *insn = BB_HEAD (bb);
2217 0 : rtx_insn *next_insn;
2218 :
2219 0 : gcc_assert (pat && INSN_P (pat));
2220 :
2221 : /* Insert after the last initial CODE_LABEL or NOTE_INSN_BASIC_BLOCK, before
2222 : any other instructions. */
2223 0 : while ((next_insn = NEXT_INSN (insn))
2224 0 : && (LABEL_P (next_insn) || NOTE_INSN_BASIC_BLOCK_P (insn)))
2225 : insn = next_insn;
2226 :
2227 0 : rtx_insn *new_insn = emit_insn_after_noloc (pat, insn, bb);
2228 :
2229 0 : while (pat != NULL_RTX)
2230 : {
2231 0 : if (INSN_P (pat))
2232 0 : add_label_notes (PATTERN (pat), new_insn);
2233 0 : pat = NEXT_INSN (pat);
2234 : }
2235 :
2236 0 : return new_insn;
2237 : }
2238 :
2239 : /* Add EXPR to the start of basic block BB.
2240 :
2241 : This is used by hardreg PRE. */
2242 :
2243 : static void
2244 0 : insert_insn_start_basic_block (struct gcse_expr *expr, basic_block bb)
2245 : {
2246 0 : rtx reg = expr->reaching_reg;
2247 0 : int regno = REGNO (reg);
2248 :
2249 0 : rtx_insn *insn = process_insert_insn (expr);
2250 0 : rtx_insn *new_insn = insert_insn_start_basic_block (insn, bb);
2251 :
2252 0 : gcse_create_count++;
2253 :
2254 0 : if (dump_file)
2255 : {
2256 0 : fprintf (dump_file, "hardreg PRE: start of bb %d, insn %d, ",
2257 0 : bb->index, INSN_UID (new_insn));
2258 0 : fprintf (dump_file, "copying expression %d to reg %d\n",
2259 : expr->bitmap_index, regno);
2260 : }
2261 0 : }
2262 :
2263 : /* Insert partially redundant expressions on edges in the CFG to make
2264 : the expressions fully redundant. */
2265 :
2266 : static bool
2267 417337 : pre_edge_insert (struct edge_list *edge_list, struct gcse_expr **index_map)
2268 : {
2269 417337 : int e, i, j, num_edges, set_size;
2270 417337 : bool did_insert = false;
2271 417337 : sbitmap *inserted;
2272 :
2273 : /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2274 : if it reaches any of the deleted expressions. */
2275 :
2276 417337 : set_size = pre_insert_map[0]->size;
2277 417337 : num_edges = NUM_EDGES (edge_list);
2278 417337 : inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2279 417337 : bitmap_vector_clear (inserted, num_edges);
2280 :
2281 14838171 : for (e = 0; e < num_edges; e++)
2282 : {
2283 14420834 : int indx;
2284 14420834 : basic_block pred_bb = INDEX_EDGE_PRED_BB (edge_list, e);
2285 14420834 : basic_block succ_bb = INDEX_EDGE_SUCC_BB (edge_list, e);
2286 :
2287 53444201 : for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2288 : {
2289 39023367 : SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2290 :
2291 39023367 : for (j = indx;
2292 43775823 : insert && j < (int) expr_hash_table.n_elems;
2293 4752456 : j++, insert >>= 1)
2294 4752456 : if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2295 : {
2296 302675 : struct gcse_expr *expr = index_map[j];
2297 302675 : struct gcse_occr *occr;
2298 :
2299 : /* Now look at each deleted occurrence of this expression. */
2300 2088107 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2301 : {
2302 1785432 : if (! occr->deleted_p)
2303 714786 : continue;
2304 :
2305 : /* Insert this expression on this edge if it would
2306 : reach the deleted occurrence in BB. */
2307 1070646 : if (!bitmap_bit_p (inserted[e], j))
2308 : {
2309 302675 : rtx_insn *insn;
2310 302675 : edge eg = INDEX_EDGE (edge_list, e);
2311 :
2312 : /* We can't insert anything on an abnormal and
2313 : critical edge, so we insert the insn at the end of
2314 : the previous block. There are several alternatives
2315 : detailed in Morgans book P277 (sec 10.5) for
2316 : handling this situation. This one is easiest for
2317 : now.
2318 :
2319 : For hardreg PRE this would add an unwanted clobber
2320 : of the hardreg, so we instead insert in the
2321 : successor block. This may be partially redundant,
2322 : but it is at least correct. */
2323 302675 : if (eg->flags & EDGE_ABNORMAL)
2324 : {
2325 55412 : if (doing_hardreg_pre_p)
2326 0 : insert_insn_start_basic_block (index_map[j],
2327 : succ_bb);
2328 : else
2329 55412 : insert_insn_end_basic_block (index_map[j],
2330 : pred_bb);
2331 : }
2332 : else
2333 : {
2334 247263 : insn = process_insert_insn (index_map[j]);
2335 247263 : insert_insn_on_edge (insn, eg);
2336 : }
2337 :
2338 302675 : if (dump_file)
2339 : {
2340 8 : fprintf (dump_file, "PRE: edge (%d,%d), ",
2341 : pred_bb->index,
2342 : succ_bb->index);
2343 8 : fprintf (dump_file, "copy expression %d\n",
2344 : expr->bitmap_index);
2345 : }
2346 :
2347 302675 : update_ld_motion_stores (expr);
2348 302675 : bitmap_set_bit (inserted[e], j);
2349 302675 : did_insert = true;
2350 302675 : gcse_create_count++;
2351 : }
2352 : }
2353 : }
2354 : }
2355 : }
2356 :
2357 417337 : sbitmap_vector_free (inserted);
2358 417337 : return did_insert;
2359 : }
2360 :
2361 : /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2362 : Given "old_reg <- expr" (INSN), instead of adding after it
2363 : reaching_reg <- old_reg
2364 : it's better to do the following:
2365 : reaching_reg <- expr
2366 : old_reg <- reaching_reg
2367 : because this way copy propagation can discover additional PRE
2368 : opportunities. But if this fails, we try the old way.
2369 : When "expr" is a store, i.e.
2370 : given "MEM <- old_reg", instead of adding after it
2371 : reaching_reg <- old_reg
2372 : it's better to add it before as follows:
2373 : reaching_reg <- old_reg
2374 : MEM <- reaching_reg. */
2375 :
2376 : static void
2377 293792 : pre_insert_copy_insn (struct gcse_expr *expr, rtx_insn *insn)
2378 : {
2379 293792 : rtx reg = expr->reaching_reg;
2380 293792 : int regno = REGNO (reg);
2381 293792 : int indx = expr->bitmap_index;
2382 293792 : rtx pat = PATTERN (insn);
2383 293792 : rtx set, first_set;
2384 293792 : rtx_insn *new_insn;
2385 293792 : rtx old_reg;
2386 293792 : int i;
2387 :
2388 : /* This block matches the logic in hash_scan_insn. */
2389 293792 : switch (GET_CODE (pat))
2390 : {
2391 : case SET:
2392 : set = pat;
2393 : break;
2394 :
2395 : case PARALLEL:
2396 : /* Search through the parallel looking for the set whose
2397 : source was the expression that we're interested in. */
2398 : first_set = NULL_RTX;
2399 196407 : set = NULL_RTX;
2400 196407 : for (i = 0; i < XVECLEN (pat, 0); i++)
2401 : {
2402 196322 : rtx x = XVECEXP (pat, 0, i);
2403 196322 : if (GET_CODE (x) == SET)
2404 : {
2405 : /* If the source was a REG_EQUAL or REG_EQUIV note, we
2406 : may not find an equivalent expression, but in this
2407 : case the PARALLEL will have a single set. */
2408 196237 : if (first_set == NULL_RTX)
2409 196085 : first_set = x;
2410 196237 : if (expr_equiv_p (SET_SRC (x), expr->expr))
2411 : {
2412 : set = x;
2413 : break;
2414 : }
2415 : }
2416 : }
2417 :
2418 196085 : gcc_assert (first_set);
2419 196085 : if (set == NULL_RTX)
2420 85 : set = first_set;
2421 : break;
2422 :
2423 0 : default:
2424 0 : gcc_unreachable ();
2425 : }
2426 :
2427 293792 : if (REG_P (SET_DEST (set)))
2428 : {
2429 293739 : old_reg = SET_DEST (set);
2430 : /* Check if we can modify the set destination in the original insn. */
2431 293739 : if (validate_change (insn, &SET_DEST (set), reg, 0))
2432 : {
2433 293739 : new_insn = gen_move_insn (old_reg, reg);
2434 293739 : new_insn = emit_insn_after (new_insn, insn);
2435 : }
2436 : else
2437 : {
2438 0 : new_insn = gen_move_insn (reg, old_reg);
2439 0 : new_insn = emit_insn_after (new_insn, insn);
2440 : }
2441 : }
2442 : else /* This is possible only in case of a store to memory. */
2443 : {
2444 53 : old_reg = SET_SRC (set);
2445 53 : new_insn = gen_move_insn (reg, old_reg);
2446 :
2447 : /* Check if we can modify the set source in the original insn. */
2448 53 : if (validate_change (insn, &SET_SRC (set), reg, 0))
2449 53 : new_insn = emit_insn_before (new_insn, insn);
2450 : else
2451 0 : new_insn = emit_insn_after (new_insn, insn);
2452 : }
2453 :
2454 293792 : gcse_create_count++;
2455 :
2456 293792 : if (dump_file)
2457 24 : fprintf (dump_file,
2458 : "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2459 8 : BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2460 8 : INSN_UID (insn), regno);
2461 293792 : }
2462 :
2463 : /* Copy available expressions that reach the redundant expression
2464 : to `reaching_reg'. */
2465 :
2466 : static void
2467 417337 : pre_insert_copies (void)
2468 : {
2469 417337 : unsigned int i;
2470 417337 : bool added_copy;
2471 417337 : struct gcse_expr *expr;
2472 417337 : struct gcse_occr *occr;
2473 417337 : struct gcse_occr *avail;
2474 :
2475 : /* For each available expression in the table, copy the result to
2476 : `reaching_reg' if the expression reaches a deleted one.
2477 :
2478 : ??? The current algorithm is rather brute force.
2479 : Need to do some profiling. */
2480 :
2481 20284918 : for (i = 0; i < expr_hash_table.size; i++)
2482 29546797 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2483 : {
2484 : /* If the basic block isn't reachable, PPOUT will be TRUE. However,
2485 : we don't want to insert a copy here because the expression may not
2486 : really be redundant. So only insert an insn if the expression was
2487 : deleted. This test also avoids further processing if the
2488 : expression wasn't deleted anywhere. */
2489 9679216 : if (expr->reaching_reg == NULL)
2490 9327149 : continue;
2491 :
2492 : /* Set when we add a copy for that expression. */
2493 352067 : added_copy = false;
2494 :
2495 1638544 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2496 : {
2497 1286477 : if (! occr->deleted_p)
2498 352812 : continue;
2499 :
2500 16330144 : for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2501 : {
2502 15396479 : rtx_insn *insn = avail->insn;
2503 :
2504 : /* No need to handle this one if handled already. */
2505 15396479 : if (avail->copied_p)
2506 391435 : continue;
2507 :
2508 : /* Don't handle this one if it's a redundant one. */
2509 15005044 : if (insn->deleted ())
2510 13434870 : continue;
2511 :
2512 : /* Or if the expression doesn't reach the deleted one. */
2513 1570174 : if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2514 : expr,
2515 1570174 : BLOCK_FOR_INSN (occr->insn)))
2516 1276382 : continue;
2517 :
2518 293792 : added_copy = true;
2519 :
2520 : /* Copy the result of avail to reaching_reg. */
2521 293792 : pre_insert_copy_insn (expr, insn);
2522 293792 : avail->copied_p = 1;
2523 : }
2524 : }
2525 :
2526 352067 : if (added_copy)
2527 240958 : update_ld_motion_stores (expr);
2528 : }
2529 417337 : }
2530 :
2531 : struct set_data
2532 : {
2533 : rtx_insn *insn;
2534 : const_rtx set;
2535 : int nsets;
2536 : };
2537 :
2538 : /* Increment number of sets and record set in DATA. */
2539 :
2540 : static void
2541 1712522 : record_set_data (rtx dest, const_rtx set, void *data)
2542 : {
2543 1712522 : struct set_data *s = (struct set_data *)data;
2544 :
2545 1712522 : if (GET_CODE (set) == SET)
2546 : {
2547 : /* We allow insns having multiple sets, where all but one are
2548 : dead as single set insns. In the common case only a single
2549 : set is present, so we want to avoid checking for REG_UNUSED
2550 : notes unless necessary. */
2551 856261 : if (s->nsets == 1
2552 0 : && find_reg_note (s->insn, REG_UNUSED, SET_DEST (s->set))
2553 856261 : && !side_effects_p (s->set))
2554 0 : s->nsets = 0;
2555 :
2556 856261 : if (!s->nsets)
2557 : {
2558 : /* Record this set. */
2559 856261 : s->nsets += 1;
2560 856261 : s->set = set;
2561 : }
2562 0 : else if (!find_reg_note (s->insn, REG_UNUSED, dest)
2563 0 : || side_effects_p (set))
2564 0 : s->nsets += 1;
2565 : }
2566 1712522 : }
2567 :
2568 : static const_rtx
2569 1705484 : single_set_gcse (rtx_insn *insn)
2570 : {
2571 1705484 : struct set_data s;
2572 1705484 : rtx pattern;
2573 :
2574 1705484 : gcc_assert (INSN_P (insn));
2575 :
2576 : /* Optimize common case. */
2577 1705484 : pattern = PATTERN (insn);
2578 1705484 : if (GET_CODE (pattern) == SET)
2579 : return pattern;
2580 :
2581 856261 : s.insn = insn;
2582 856261 : s.nsets = 0;
2583 856261 : note_pattern_stores (pattern, record_set_data, &s);
2584 :
2585 : /* Considered invariant insns have exactly one set. */
2586 856261 : gcc_assert (s.nsets == 1);
2587 856261 : return s.set;
2588 : }
2589 :
2590 : /* Emit move from SRC to DEST noting the equivalence with expression computed
2591 : in INSN. */
2592 :
2593 : static rtx_insn *
2594 969848 : gcse_emit_move_after (rtx dest, rtx src, rtx_insn *insn)
2595 : {
2596 969848 : rtx_insn *new_rtx;
2597 969848 : const_rtx set = single_set_gcse (insn);
2598 969848 : rtx set2;
2599 969848 : rtx note;
2600 969848 : rtx eqv = NULL_RTX;
2601 :
2602 : /* This should never fail since we're creating a reg->reg copy
2603 : we've verified to be valid. */
2604 :
2605 969848 : new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2606 :
2607 : /* Note the equivalence for local CSE pass. Take the note from the old
2608 : set if there was one. Otherwise record the SET_SRC from the old set
2609 : unless DEST is also an operand of the SET_SRC. */
2610 969848 : set2 = single_set (new_rtx);
2611 969848 : if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2612 0 : return new_rtx;
2613 969848 : if ((note = find_reg_equal_equiv_note (insn)))
2614 176678 : eqv = XEXP (note, 0);
2615 793170 : else if (! REG_P (dest)
2616 793170 : || ! reg_mentioned_p (dest, SET_SRC (set)))
2617 790299 : eqv = SET_SRC (set);
2618 :
2619 966977 : if (eqv != NULL_RTX)
2620 966977 : set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2621 :
2622 : return new_rtx;
2623 : }
2624 :
2625 : /* Delete redundant computations.
2626 : Deletion is done by changing the insn to copy the `reaching_reg' of
2627 : the expression into the result of the SET. It is left to later passes
2628 : to propagate the copy or eliminate it.
2629 :
2630 : Return true if a change is made. */
2631 :
2632 : static bool
2633 417337 : pre_delete (void)
2634 : {
2635 417337 : unsigned int i;
2636 417337 : bool changed = false;
2637 417337 : struct gcse_expr *expr;
2638 417337 : struct gcse_occr *occr;
2639 :
2640 20284918 : for (i = 0; i < expr_hash_table.size; i++)
2641 29546797 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2642 : {
2643 9679216 : int indx = expr->bitmap_index;
2644 :
2645 : /* We only need to search antic_occr since we require ANTLOC != 0. */
2646 16569472 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2647 : {
2648 6890256 : rtx_insn *insn = occr->insn;
2649 6890256 : rtx set;
2650 6890256 : basic_block bb = BLOCK_FOR_INSN (insn);
2651 :
2652 : /* We only delete insns that have a single_set. */
2653 6890256 : if (bitmap_bit_p (pre_delete_map[bb->index], indx)
2654 933666 : && (set = single_set (insn)) != 0
2655 7823921 : && dbg_cnt (pre_insn))
2656 : {
2657 933665 : rtx dest = SET_DEST (set);
2658 933665 : if (expr->reaching_reg == NULL)
2659 : {
2660 352067 : if (doing_hardreg_pre_p)
2661 : /* Use the hardreg as the reaching register. The
2662 : deleted sets will be replaced with noop moves.
2663 :
2664 : This may change the value of the hardreg in some debug
2665 : instructions, so we will need to reset any debug uses
2666 : of the hardreg. */
2667 0 : expr->reaching_reg = dest;
2668 : else
2669 : /* Create a pseudo-reg to store the result of reaching
2670 : expressions into. Get the mode for the new pseudo from
2671 : the mode of the original destination pseudo. */
2672 352067 : expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2673 : }
2674 :
2675 933665 : gcse_emit_move_after (dest, expr->reaching_reg, insn);
2676 933665 : delete_insn (insn);
2677 933665 : occr->deleted_p = 1;
2678 933665 : changed = true;
2679 933665 : gcse_subst_count++;
2680 :
2681 933665 : if (dump_file)
2682 : {
2683 8 : fprintf (dump_file,
2684 : "PRE: redundant insn %d (expression %d) in ",
2685 8 : INSN_UID (insn), indx);
2686 8 : fprintf (dump_file, "bb %d, reaching reg is %d\n",
2687 8 : bb->index, REGNO (expr->reaching_reg));
2688 : }
2689 : }
2690 : }
2691 : }
2692 :
2693 417337 : return changed;
2694 : }
2695 :
2696 : /* Since hardreg PRE reuses the hardreg as the reaching register, we need to
2697 : eliminate any existing uses in debug insns. This is overly conservative,
2698 : but there's currently no benefit to preserving the debug insns, so there's
2699 : no point doing the work to retain them. */
2700 :
2701 : static void
2702 0 : reset_hardreg_debug_uses ()
2703 : {
2704 0 : df_ref def;
2705 0 : for (def = DF_REG_USE_CHAIN (current_hardreg_regno);
2706 0 : def;
2707 0 : def = DF_REF_NEXT_REG (def))
2708 : {
2709 0 : rtx_insn *insn = DF_REF_INSN (def);
2710 0 : if (DEBUG_INSN_P (insn))
2711 0 : delete_insn (insn);
2712 : }
2713 0 : }
2714 :
2715 : /* Perform GCSE optimizations using PRE.
2716 : This is called by one_pre_gcse_pass after all the dataflow analysis
2717 : has been done.
2718 :
2719 : This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2720 : lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2721 : Compiler Design and Implementation.
2722 :
2723 : ??? A new pseudo reg is created to hold the reaching expression. The nice
2724 : thing about the classical approach is that it would try to use an existing
2725 : reg. If the register can't be adequately optimized [i.e. we introduce
2726 : reload problems], one could add a pass here to propagate the new register
2727 : through the block.
2728 :
2729 : ??? We don't handle single sets in PARALLELs because we're [currently] not
2730 : able to copy the rest of the parallel when we insert copies to create full
2731 : redundancies from partial redundancies. However, there's no reason why we
2732 : can't handle PARALLELs in the cases where there are no partial
2733 : redundancies. */
2734 :
2735 : static bool
2736 417337 : pre_gcse (struct edge_list *edge_list)
2737 : {
2738 417337 : unsigned int i;
2739 417337 : bool did_insert, changed;
2740 417337 : struct gcse_expr **index_map;
2741 417337 : struct gcse_expr *expr;
2742 :
2743 : /* Compute a mapping from expression number (`bitmap_index') to
2744 : hash table entry. */
2745 :
2746 417337 : index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
2747 20702255 : for (i = 0; i < expr_hash_table.size; i++)
2748 29546797 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2749 9679216 : index_map[expr->bitmap_index] = expr;
2750 :
2751 : /* Delete the redundant insns first so that
2752 : - we know what register to use for the new insns and for the other
2753 : ones with reaching expressions
2754 : - we know which insns are redundant when we go to create copies */
2755 :
2756 417337 : changed = pre_delete ();
2757 417337 : did_insert = pre_edge_insert (edge_list, index_map);
2758 : /* In other places with reaching expressions, copy the expression to the
2759 : specially allocated pseudo-reg that reaches the redundant expr. This
2760 : isn't needed for hardreg PRE. */
2761 417337 : if (!doing_hardreg_pre_p)
2762 417337 : pre_insert_copies ();
2763 :
2764 417337 : if (did_insert)
2765 : {
2766 72958 : if (doing_hardreg_pre_p)
2767 0 : reset_hardreg_debug_uses ();
2768 72958 : commit_edge_insertions ();
2769 72958 : changed = true;
2770 : }
2771 :
2772 417337 : free (index_map);
2773 417337 : return changed;
2774 : }
2775 :
2776 : /* Top level routine to perform one PRE GCSE pass.
2777 :
2778 : Return true if a change was made. */
2779 :
2780 : static bool
2781 899223 : one_pre_gcse_pass (void)
2782 : {
2783 899223 : bool changed = false;
2784 :
2785 899223 : gcse_subst_count = 0;
2786 899223 : gcse_create_count = 0;
2787 :
2788 : /* Return if there's nothing to do, or it is too expensive. */
2789 899223 : if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
2790 899223 : || gcse_or_cprop_is_too_expensive (_("PRE disabled")))
2791 420968 : return false;
2792 :
2793 : /* We need alias. */
2794 478255 : init_alias_analysis ();
2795 :
2796 478255 : bytes_used = 0;
2797 478255 : gcc_obstack_init (&gcse_obstack);
2798 478255 : alloc_gcse_mem ();
2799 :
2800 478255 : alloc_hash_table (&expr_hash_table);
2801 478255 : add_noreturn_fake_exit_edges ();
2802 478255 : if (do_load_motion ())
2803 478253 : compute_ld_motion_mems ();
2804 :
2805 478255 : compute_hash_table (&expr_hash_table);
2806 478255 : if (do_load_motion ())
2807 478253 : trim_ld_motion_mems ();
2808 478255 : if (dump_file)
2809 17 : dump_hash_table (dump_file, "Expression", &expr_hash_table);
2810 :
2811 478255 : if (expr_hash_table.n_elems > 0)
2812 : {
2813 417337 : struct edge_list *edge_list;
2814 417337 : alloc_pre_mem (last_basic_block_for_fn (cfun), expr_hash_table.n_elems);
2815 417337 : edge_list = compute_pre_data ();
2816 417337 : if (pre_gcse (edge_list))
2817 : changed = true;
2818 417337 : free_edge_list (edge_list);
2819 417337 : free_pre_mem ();
2820 : }
2821 :
2822 478255 : if (do_load_motion ())
2823 478253 : free_ld_motion_mems ();
2824 478255 : remove_fake_exit_edges ();
2825 478255 : free_hash_table (&expr_hash_table);
2826 :
2827 478255 : free_gcse_mem ();
2828 478255 : obstack_free (&gcse_obstack, NULL);
2829 :
2830 : /* We are finished with alias. */
2831 478255 : end_alias_analysis ();
2832 :
2833 478255 : if (dump_file)
2834 : {
2835 17 : fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2836 17 : current_function_name (), n_basic_blocks_for_fn (cfun),
2837 : bytes_used);
2838 17 : fprintf (dump_file, "%d substs, %d insns created\n",
2839 : gcse_subst_count, gcse_create_count);
2840 : }
2841 :
2842 : return changed;
2843 : }
2844 :
2845 : /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2846 : to INSN. If such notes are added to an insn which references a
2847 : CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
2848 : that note, because the following loop optimization pass requires
2849 : them. */
2850 :
2851 : /* ??? If there was a jump optimization pass after gcse and before loop,
2852 : then we would not need to do this here, because jump would add the
2853 : necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
2854 :
2855 : static void
2856 346289 : add_label_notes (rtx x, rtx_insn *insn)
2857 : {
2858 346289 : enum rtx_code code = GET_CODE (x);
2859 346289 : int i, j;
2860 346289 : const char *fmt;
2861 :
2862 346289 : if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2863 : {
2864 : /* This code used to ignore labels that referred to dispatch tables to
2865 : avoid flow generating (slightly) worse code.
2866 :
2867 : We no longer ignore such label references (see LABEL_REF handling in
2868 : mark_jump_label for additional information). */
2869 :
2870 : /* There's no reason for current users to emit jump-insns with
2871 : such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2872 : notes. */
2873 0 : gcc_assert (!JUMP_P (insn));
2874 0 : add_reg_note (insn, REG_LABEL_OPERAND, label_ref_label (x));
2875 :
2876 0 : if (LABEL_P (label_ref_label (x)))
2877 0 : LABEL_NUSES (label_ref_label (x))++;
2878 :
2879 0 : return;
2880 : }
2881 :
2882 838676 : for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2883 : {
2884 492387 : if (fmt[i] == 'e')
2885 276266 : add_label_notes (XEXP (x, i), insn);
2886 216121 : else if (fmt[i] == 'E')
2887 79 : for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2888 53 : add_label_notes (XVECEXP (x, i, j), insn);
2889 : }
2890 : }
2891 :
2892 : /* Code Hoisting variables and subroutines. */
2893 :
2894 : /* Very busy expressions. */
2895 : static sbitmap *hoist_vbein;
2896 : static sbitmap *hoist_vbeout;
2897 :
2898 : /* ??? We could compute post dominators and run this algorithm in
2899 : reverse to perform tail merging, doing so would probably be
2900 : more effective than the tail merging code in jump.cc.
2901 :
2902 : It's unclear if tail merging could be run in parallel with
2903 : code hoisting. It would be nice. */
2904 :
2905 : /* Allocate vars used for code hoisting analysis. */
2906 :
2907 : static void
2908 25894 : alloc_code_hoist_mem (int n_blocks, int n_exprs)
2909 : {
2910 25894 : antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2911 25894 : transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2912 25894 : comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2913 :
2914 25894 : hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2915 25894 : hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2916 25894 : }
2917 :
2918 : /* Free vars used for code hoisting analysis. */
2919 :
2920 : static void
2921 25894 : free_code_hoist_mem (void)
2922 : {
2923 25894 : sbitmap_vector_free (antloc);
2924 25894 : sbitmap_vector_free (transp);
2925 25894 : sbitmap_vector_free (comp);
2926 :
2927 25894 : sbitmap_vector_free (hoist_vbein);
2928 25894 : sbitmap_vector_free (hoist_vbeout);
2929 :
2930 25894 : free_dominance_info (CDI_DOMINATORS);
2931 25894 : }
2932 :
2933 : /* Compute the very busy expressions at entry/exit from each block.
2934 :
2935 : An expression is very busy if all paths from a given point
2936 : compute the expression. */
2937 :
2938 : static void
2939 25894 : compute_code_hoist_vbeinout (void)
2940 : {
2941 25894 : int changed, passes;
2942 25894 : basic_block bb;
2943 :
2944 25894 : bitmap_vector_clear (hoist_vbeout, last_basic_block_for_fn (cfun));
2945 25894 : bitmap_vector_clear (hoist_vbein, last_basic_block_for_fn (cfun));
2946 :
2947 25894 : passes = 0;
2948 25894 : changed = 1;
2949 :
2950 105935 : while (changed)
2951 : {
2952 54147 : changed = 0;
2953 :
2954 : /* We scan the blocks in the reverse order to speed up
2955 : the convergence. */
2956 899470 : FOR_EACH_BB_REVERSE_FN (bb, cfun)
2957 : {
2958 845323 : if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
2959 : {
2960 791176 : bitmap_intersection_of_succs (hoist_vbeout[bb->index],
2961 : hoist_vbein, bb);
2962 :
2963 : /* Include expressions in VBEout that are calculated
2964 : in BB and available at its end. */
2965 791176 : bitmap_ior (hoist_vbeout[bb->index],
2966 791176 : hoist_vbeout[bb->index], comp[bb->index]);
2967 : }
2968 :
2969 845323 : changed |= bitmap_or_and (hoist_vbein[bb->index],
2970 845323 : antloc[bb->index],
2971 845323 : hoist_vbeout[bb->index],
2972 845323 : transp[bb->index]);
2973 : }
2974 :
2975 54147 : passes++;
2976 : }
2977 :
2978 25894 : if (dump_file)
2979 : {
2980 7 : fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2981 :
2982 35 : FOR_EACH_BB_FN (bb, cfun)
2983 : {
2984 28 : fprintf (dump_file, "vbein (%d): ", bb->index);
2985 28 : dump_bitmap_file (dump_file, hoist_vbein[bb->index]);
2986 28 : fprintf (dump_file, "vbeout(%d): ", bb->index);
2987 28 : dump_bitmap_file (dump_file, hoist_vbeout[bb->index]);
2988 : }
2989 : }
2990 25894 : }
2991 :
2992 : /* Top level routine to do the dataflow analysis needed by code hoisting. */
2993 :
2994 : static void
2995 25894 : compute_code_hoist_data (void)
2996 : {
2997 25894 : compute_local_properties (transp, comp, antloc, &expr_hash_table);
2998 25894 : prune_expressions (false);
2999 25894 : compute_code_hoist_vbeinout ();
3000 25894 : calculate_dominance_info (CDI_DOMINATORS);
3001 25894 : if (dump_file)
3002 7 : fprintf (dump_file, "\n");
3003 25894 : }
3004 :
3005 : /* Update register pressure for BB when hoisting an expression from
3006 : instruction FROM, if live ranges of inputs are shrunk. Also
3007 : maintain live_in information if live range of register referred
3008 : in FROM is shrunk.
3009 :
3010 : Return 0 if register pressure doesn't change, otherwise return
3011 : the number by which register pressure is decreased.
3012 :
3013 : NOTE: Register pressure won't be increased in this function. */
3014 :
3015 : static int
3016 12371275 : update_bb_reg_pressure (basic_block bb, rtx_insn *from)
3017 : {
3018 12371275 : rtx dreg;
3019 12371275 : rtx_insn *insn;
3020 12371275 : basic_block succ_bb;
3021 12371275 : df_ref use, op_ref;
3022 12371275 : edge succ;
3023 12371275 : edge_iterator ei;
3024 12371275 : int decreased_pressure = 0;
3025 12371275 : int nregs;
3026 12371275 : enum reg_class pressure_class;
3027 :
3028 15132173 : FOR_EACH_INSN_USE (use, from)
3029 : {
3030 2760898 : dreg = DF_REF_REAL_REG (use);
3031 : /* The live range of register is shrunk only if it isn't:
3032 : 1. referred on any path from the end of this block to EXIT, or
3033 : 2. referred by insns other than FROM in this block. */
3034 3823104 : FOR_EACH_EDGE (succ, ei, bb->succs)
3035 : {
3036 3263054 : succ_bb = succ->dest;
3037 3263054 : if (succ_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
3038 3987 : continue;
3039 :
3040 3259067 : if (bitmap_bit_p (BB_DATA (succ_bb)->live_in, REGNO (dreg)))
3041 : break;
3042 : }
3043 2760898 : if (succ != NULL)
3044 2200848 : continue;
3045 :
3046 560050 : op_ref = DF_REG_USE_CHAIN (REGNO (dreg));
3047 1839460 : for (; op_ref; op_ref = DF_REF_NEXT_REG (op_ref))
3048 : {
3049 1294216 : if (!DF_REF_INSN_INFO (op_ref))
3050 178457 : continue;
3051 :
3052 1115759 : insn = DF_REF_INSN (op_ref);
3053 1115759 : if (BLOCK_FOR_INSN (insn) == bb
3054 1115759 : && NONDEBUG_INSN_P (insn) && insn != from)
3055 : break;
3056 : }
3057 :
3058 560050 : pressure_class = get_regno_pressure_class (REGNO (dreg), &nregs);
3059 : /* Decrease register pressure and update live_in information for
3060 : this block. */
3061 560050 : if (!op_ref && pressure_class != NO_REGS)
3062 : {
3063 544563 : decreased_pressure += nregs;
3064 544563 : BB_DATA (bb)->max_reg_pressure[pressure_class] -= nregs;
3065 544563 : bitmap_clear_bit (BB_DATA (bb)->live_in, REGNO (dreg));
3066 : }
3067 : }
3068 12371275 : return decreased_pressure;
3069 : }
3070 :
3071 : /* Determine if the expression EXPR should be hoisted to EXPR_BB up in
3072 : flow graph, if it can reach BB unimpared. Stop the search if the
3073 : expression would need to be moved more than DISTANCE instructions.
3074 :
3075 : DISTANCE is the number of instructions through which EXPR can be
3076 : hoisted up in flow graph.
3077 :
3078 : BB_SIZE points to an array which contains the number of instructions
3079 : for each basic block.
3080 :
3081 : PRESSURE_CLASS and NREGS are register class and number of hard registers
3082 : for storing EXPR.
3083 :
3084 : HOISTED_BBS points to a bitmap indicating basic blocks through which
3085 : EXPR is hoisted.
3086 :
3087 : FROM is the instruction from which EXPR is hoisted.
3088 :
3089 : It's unclear exactly what Muchnick meant by "unimpared". It seems
3090 : to me that the expression must either be computed or transparent in
3091 : *every* block in the path(s) from EXPR_BB to BB. Any other definition
3092 : would allow the expression to be hoisted out of loops, even if
3093 : the expression wasn't a loop invariant.
3094 :
3095 : Contrast this to reachability for PRE where an expression is
3096 : considered reachable if *any* path reaches instead of *all*
3097 : paths. */
3098 :
3099 : static bool
3100 12371275 : should_hoist_expr_to_dom (basic_block expr_bb, struct gcse_expr *expr,
3101 : basic_block bb, sbitmap visited,
3102 : HOST_WIDE_INT distance,
3103 : int *bb_size, enum reg_class pressure_class,
3104 : int *nregs, bitmap hoisted_bbs, rtx_insn *from)
3105 : {
3106 12371275 : unsigned int i;
3107 12371275 : edge pred;
3108 12371275 : edge_iterator ei;
3109 12371275 : sbitmap_iterator sbi;
3110 12371275 : bool visited_allocated_locally = false;
3111 12371275 : int decreased_pressure = 0;
3112 :
3113 12371275 : if (flag_ira_hoist_pressure)
3114 : {
3115 : /* Record old information of basic block BB when it is visited
3116 : at the first time. */
3117 12371275 : if (!bitmap_bit_p (hoisted_bbs, bb->index))
3118 : {
3119 6811860 : struct bb_data *data = BB_DATA (bb);
3120 6811860 : bitmap_copy (data->backup, data->live_in);
3121 6811860 : data->old_pressure = data->max_reg_pressure[pressure_class];
3122 : }
3123 12371275 : decreased_pressure = update_bb_reg_pressure (bb, from);
3124 : }
3125 : /* Terminate the search if distance, for which EXPR is allowed to move,
3126 : is exhausted. */
3127 12371275 : if (distance > 0)
3128 : {
3129 12365107 : if (flag_ira_hoist_pressure)
3130 : {
3131 : /* Prefer to hoist EXPR if register pressure is decreased. */
3132 12365107 : if (decreased_pressure > *nregs)
3133 2088 : distance += bb_size[bb->index];
3134 : /* Let EXPR be hoisted through basic block at no cost if one
3135 : of following conditions is satisfied:
3136 :
3137 : 1. The basic block has low register pressure.
3138 : 2. Register pressure won't be increases after hoisting EXPR.
3139 :
3140 : Constant expressions is handled conservatively, because
3141 : hoisting constant expression aggressively results in worse
3142 : code. This decision is made by the observation of CSiBE
3143 : on ARM target, while it has no obvious effect on other
3144 : targets like x86, x86_64, mips and powerpc. */
3145 12363019 : else if (CONST_INT_P (expr->expr)
3146 12183201 : || (BB_DATA (bb)->max_reg_pressure[pressure_class]
3147 12183201 : >= ira_class_hard_regs_num[pressure_class]
3148 152620 : && decreased_pressure < *nregs))
3149 329736 : distance -= bb_size[bb->index];
3150 : }
3151 : else
3152 0 : distance -= bb_size[bb->index];
3153 :
3154 12365107 : if (distance <= 0)
3155 : return 0;
3156 : }
3157 : else
3158 6168 : gcc_assert (distance == 0);
3159 :
3160 12174455 : if (visited == NULL)
3161 : {
3162 614143 : visited_allocated_locally = true;
3163 614143 : visited = sbitmap_alloc (last_basic_block_for_fn (cfun));
3164 614143 : bitmap_clear (visited);
3165 : }
3166 :
3167 27354652 : FOR_EACH_EDGE (pred, ei, bb->preds)
3168 : {
3169 16167436 : basic_block pred_bb = pred->src;
3170 :
3171 16167436 : if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
3172 : break;
3173 16167436 : else if (pred_bb == expr_bb)
3174 615040 : continue;
3175 15552396 : else if (bitmap_bit_p (visited, pred_bb->index))
3176 3839811 : continue;
3177 11712585 : else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
3178 : break;
3179 : /* Not killed. */
3180 : else
3181 : {
3182 11671822 : bitmap_set_bit (visited, pred_bb->index);
3183 11671822 : if (! should_hoist_expr_to_dom (expr_bb, expr, pred_bb,
3184 : visited, distance, bb_size,
3185 : pressure_class, nregs,
3186 : hoisted_bbs, from))
3187 : break;
3188 : }
3189 : }
3190 12174455 : if (visited_allocated_locally)
3191 : {
3192 : /* If EXPR can be hoisted to expr_bb, record basic blocks through
3193 : which EXPR is hoisted in hoisted_bbs. */
3194 614143 : if (flag_ira_hoist_pressure && !pred)
3195 : {
3196 : /* Record the basic block from which EXPR is hoisted. */
3197 461870 : bitmap_set_bit (visited, bb->index);
3198 12042638 : EXECUTE_IF_SET_IN_BITMAP (visited, 0, i, sbi)
3199 11118898 : bitmap_set_bit (hoisted_bbs, i);
3200 : }
3201 614143 : sbitmap_free (visited);
3202 : }
3203 :
3204 12174455 : return (pred == NULL);
3205 : }
3206 :
3207 : /* Find occurrence in BB. */
3208 :
3209 : static struct gcse_occr *
3210 1242066 : find_occr_in_bb (struct gcse_occr *occr, basic_block bb)
3211 : {
3212 : /* Find the right occurrence of this expression. */
3213 16484971 : while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
3214 15242905 : occr = occr->next;
3215 :
3216 1242066 : return occr;
3217 : }
3218 :
3219 : /* Actually perform code hoisting.
3220 :
3221 : The code hoisting pass can hoist multiple computations of the same
3222 : expression along dominated path to a dominating basic block, like
3223 : from b2/b3 to b1 as depicted below:
3224 :
3225 : b1 ------
3226 : /\ |
3227 : / \ |
3228 : bx by distance
3229 : / \ |
3230 : / \ |
3231 : b2 b3 ------
3232 :
3233 : Unfortunately code hoisting generally extends the live range of an
3234 : output pseudo register, which increases register pressure and hurts
3235 : register allocation. To address this issue, an attribute MAX_DISTANCE
3236 : is computed and attached to each expression. The attribute is computed
3237 : from rtx cost of the corresponding expression and it's used to control
3238 : how long the expression can be hoisted up in flow graph. As the
3239 : expression is hoisted up in flow graph, GCC decreases its DISTANCE
3240 : and stops the hoist if DISTANCE reaches 0. Code hoisting can decrease
3241 : register pressure if live ranges of inputs are shrunk.
3242 :
3243 : Option "-fira-hoist-pressure" implements register pressure directed
3244 : hoist based on upper method. The rationale is:
3245 : 1. Calculate register pressure for each basic block by reusing IRA
3246 : facility.
3247 : 2. When expression is hoisted through one basic block, GCC checks
3248 : the change of live ranges for inputs/output. The basic block's
3249 : register pressure will be increased because of extended live
3250 : range of output. However, register pressure will be decreased
3251 : if the live ranges of inputs are shrunk.
3252 : 3. After knowing how hoisting affects register pressure, GCC prefers
3253 : to hoist the expression if it can decrease register pressure, by
3254 : increasing DISTANCE of the corresponding expression.
3255 : 4. If hoisting the expression increases register pressure, GCC checks
3256 : register pressure of the basic block and decrease DISTANCE only if
3257 : the register pressure is high. In other words, expression will be
3258 : hoisted through at no cost if the basic block has low register
3259 : pressure.
3260 : 5. Update register pressure information for basic blocks through
3261 : which expression is hoisted. */
3262 :
3263 : static bool
3264 25894 : hoist_code (void)
3265 : {
3266 25894 : basic_block bb, dominated;
3267 25894 : unsigned int dom_tree_walk_index;
3268 25894 : unsigned int i, j, k;
3269 25894 : struct gcse_expr **index_map;
3270 25894 : struct gcse_expr *expr;
3271 25894 : int *to_bb_head;
3272 25894 : int *bb_size;
3273 25894 : bool changed = false;
3274 25894 : struct bb_data *data;
3275 : /* Basic blocks that have occurrences reachable from BB. */
3276 25894 : bitmap from_bbs;
3277 : /* Basic blocks through which expr is hoisted. */
3278 25894 : bitmap hoisted_bbs = NULL;
3279 25894 : bitmap_iterator bi;
3280 :
3281 : /* Compute a mapping from expression number (`bitmap_index') to
3282 : hash table entry. */
3283 :
3284 25894 : index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
3285 869770 : for (i = 0; i < expr_hash_table.size; i++)
3286 1120590 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
3287 302608 : index_map[expr->bitmap_index] = expr;
3288 :
3289 : /* Calculate sizes of basic blocks and note how far
3290 : each instruction is from the start of its block. We then use this
3291 : data to restrict distance an expression can travel. */
3292 :
3293 25894 : to_bb_head = XCNEWVEC (int, get_max_uid ());
3294 25894 : bb_size = XCNEWVEC (int, last_basic_block_for_fn (cfun));
3295 :
3296 351345 : FOR_EACH_BB_FN (bb, cfun)
3297 : {
3298 325451 : rtx_insn *insn;
3299 325451 : int to_head;
3300 :
3301 325451 : to_head = 0;
3302 2805084 : FOR_BB_INSNS (bb, insn)
3303 : {
3304 : /* Don't count debug instructions to avoid them affecting
3305 : decision choices. */
3306 2479633 : if (NONDEBUG_INSN_P (insn))
3307 1858357 : to_bb_head[INSN_UID (insn)] = to_head++;
3308 : }
3309 :
3310 325451 : bb_size[bb->index] = to_head;
3311 : }
3312 :
3313 25894 : gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) == 1
3314 : && (EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0)->dest
3315 : == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb));
3316 :
3317 25894 : from_bbs = BITMAP_ALLOC (NULL);
3318 25894 : if (flag_ira_hoist_pressure)
3319 25894 : hoisted_bbs = BITMAP_ALLOC (NULL);
3320 :
3321 25894 : auto_vec<basic_block> dom_tree_walk
3322 : = get_all_dominated_blocks (CDI_DOMINATORS,
3323 25894 : ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb);
3324 :
3325 : /* Walk over each basic block looking for potentially hoistable
3326 : expressions, nothing gets hoisted from the entry block. */
3327 351345 : FOR_EACH_VEC_ELT (dom_tree_walk, dom_tree_walk_index, bb)
3328 : {
3329 325451 : auto_vec<basic_block> domby
3330 325451 : = get_dominated_to_depth (CDI_DOMINATORS, bb, param_max_hoist_depth);
3331 :
3332 325451 : if (domby.length () == 0)
3333 0 : continue;
3334 :
3335 : /* Examine each expression that is very busy at the exit of this
3336 : block. These are the potentially hoistable expressions. */
3337 48760682 : for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++)
3338 : {
3339 48435231 : if (bitmap_bit_p (hoist_vbeout[bb->index], i))
3340 : {
3341 3657453 : int nregs = 0;
3342 3657453 : enum reg_class pressure_class = NO_REGS;
3343 : /* Current expression. */
3344 3657453 : struct gcse_expr *expr = index_map[i];
3345 : /* Number of occurrences of EXPR that can be hoisted to BB. */
3346 3657453 : int hoistable = 0;
3347 : /* Occurrences reachable from BB. */
3348 3657453 : vec<occr_t> occrs_to_hoist = vNULL;
3349 : /* We want to insert the expression into BB only once, so
3350 : note when we've inserted it. */
3351 3657453 : int insn_inserted_p;
3352 3657453 : occr_t occr;
3353 :
3354 : /* If an expression is computed in BB and is available at end of
3355 : BB, hoist all occurrences dominated by BB to BB. */
3356 3657453 : if (bitmap_bit_p (comp[bb->index], i))
3357 : {
3358 267942 : occr = find_occr_in_bb (expr->antic_occr, bb);
3359 :
3360 267942 : if (occr)
3361 : {
3362 : /* An occurrence might've been already deleted
3363 : while processing a dominator of BB. */
3364 157634 : if (!occr->deleted_p)
3365 : {
3366 124626 : gcc_assert (NONDEBUG_INSN_P (occr->insn));
3367 : hoistable++;
3368 : }
3369 : }
3370 : else
3371 : hoistable++;
3372 : }
3373 :
3374 : /* We've found a potentially hoistable expression, now
3375 : we look at every block BB dominates to see if it
3376 : computes the expression. */
3377 46783924 : FOR_EACH_VEC_ELT (domby, j, dominated)
3378 : {
3379 43126471 : HOST_WIDE_INT max_distance;
3380 :
3381 : /* Ignore self dominance. */
3382 43126471 : if (bb == dominated)
3383 3657453 : continue;
3384 : /* We've found a dominated block, now see if it computes
3385 : the busy expression and whether or not moving that
3386 : expression to the "beginning" of that block is safe. */
3387 39469018 : if (!bitmap_bit_p (antloc[dominated->index], i))
3388 38494894 : continue;
3389 :
3390 974124 : occr = find_occr_in_bb (expr->antic_occr, dominated);
3391 974124 : gcc_assert (occr);
3392 :
3393 : /* An occurrence might've been already deleted
3394 : while processing a dominator of BB. */
3395 974124 : if (occr->deleted_p)
3396 274671 : continue;
3397 699453 : gcc_assert (NONDEBUG_INSN_P (occr->insn));
3398 :
3399 699453 : max_distance = expr->max_distance;
3400 699453 : if (max_distance > 0)
3401 : /* Adjust MAX_DISTANCE to account for the fact that
3402 : OCCR won't have to travel all of DOMINATED, but
3403 : only part of it. */
3404 1389148 : max_distance += (bb_size[dominated->index]
3405 694574 : - to_bb_head[INSN_UID (occr->insn)]);
3406 :
3407 699453 : pressure_class = get_pressure_class_and_nregs (occr->insn,
3408 : &nregs);
3409 :
3410 : /* Note if the expression should be hoisted from the dominated
3411 : block to BB if it can reach DOMINATED unimpared.
3412 :
3413 : Keep track of how many times this expression is hoistable
3414 : from a dominated block into BB. */
3415 699453 : if (should_hoist_expr_to_dom (bb, expr, dominated, NULL,
3416 : max_distance, bb_size,
3417 : pressure_class, &nregs,
3418 : hoisted_bbs, occr->insn))
3419 : {
3420 461870 : hoistable++;
3421 461870 : occrs_to_hoist.safe_push (occr);
3422 461870 : bitmap_set_bit (from_bbs, dominated->index);
3423 : }
3424 : }
3425 :
3426 : /* If we found more than one hoistable occurrence of this
3427 : expression, then note it in the vector of expressions to
3428 : hoist. It makes no sense to hoist things which are computed
3429 : in only one BB, and doing so tends to pessimize register
3430 : allocation. One could increase this value to try harder
3431 : to avoid any possible code expansion due to register
3432 : allocation issues; however experiments have shown that
3433 : the vast majority of hoistable expressions are only movable
3434 : from two successors, so raising this threshold is likely
3435 : to nullify any benefit we get from code hoisting. */
3436 3657453 : if (hoistable > 1 && dbg_cnt (hoist_insn))
3437 : {
3438 : /* If (hoistable != vec::length), then there is
3439 : an occurrence of EXPR in BB itself. Don't waste
3440 : time looking for LCA in this case. */
3441 193488 : if ((unsigned) hoistable == occrs_to_hoist.length ())
3442 : {
3443 84957 : basic_block lca;
3444 :
3445 84957 : lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3446 : from_bbs);
3447 84957 : if (lca != bb)
3448 : /* Punt, it's better to hoist these occurrences to
3449 : LCA. */
3450 82727 : occrs_to_hoist.release ();
3451 : }
3452 : }
3453 : else
3454 : /* Punt, no point hoisting a single occurrence. */
3455 3560709 : occrs_to_hoist.release ();
3456 :
3457 3657453 : if (flag_ira_hoist_pressure
3458 3657453 : && !occrs_to_hoist.is_empty ())
3459 : {
3460 : /* Increase register pressure of basic blocks to which
3461 : expr is hoisted because of extended live range of
3462 : output. */
3463 14017 : data = BB_DATA (bb);
3464 14017 : data->max_reg_pressure[pressure_class] += nregs;
3465 291787 : EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3466 : {
3467 277770 : data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
3468 277770 : data->max_reg_pressure[pressure_class] += nregs;
3469 : }
3470 : }
3471 3643436 : else if (flag_ira_hoist_pressure)
3472 : {
3473 : /* Restore register pressure and live_in info for basic
3474 : blocks recorded in hoisted_bbs when expr will not be
3475 : hoisted. */
3476 8938237 : EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3477 : {
3478 5294801 : data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
3479 5294801 : bitmap_copy (data->live_in, data->backup);
3480 5294801 : data->max_reg_pressure[pressure_class]
3481 5294801 : = data->old_pressure;
3482 : }
3483 : }
3484 :
3485 3657453 : if (flag_ira_hoist_pressure)
3486 3657453 : bitmap_clear (hoisted_bbs);
3487 :
3488 : insn_inserted_p = 0;
3489 :
3490 : /* Walk through occurrences of I'th expressions we want
3491 : to hoist to BB and make the transformations. */
3492 3693636 : FOR_EACH_VEC_ELT (occrs_to_hoist, j, occr)
3493 : {
3494 36183 : rtx_insn *insn;
3495 36183 : const_rtx set;
3496 :
3497 36183 : gcc_assert (!occr->deleted_p);
3498 :
3499 36183 : insn = occr->insn;
3500 36183 : set = single_set_gcse (insn);
3501 :
3502 : /* Create a pseudo-reg to store the result of reaching
3503 : expressions into. Get the mode for the new pseudo
3504 : from the mode of the original destination pseudo.
3505 :
3506 : It is important to use new pseudos whenever we
3507 : emit a set. This will allow reload to use
3508 : rematerialization for such registers. */
3509 36183 : if (!insn_inserted_p)
3510 14017 : expr->reaching_reg
3511 14017 : = gen_reg_rtx_and_attrs (SET_DEST (set));
3512 :
3513 36183 : gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3514 : insn);
3515 36183 : delete_insn (insn);
3516 36183 : occr->deleted_p = 1;
3517 36183 : changed = true;
3518 36183 : gcse_subst_count++;
3519 :
3520 36183 : if (!insn_inserted_p)
3521 : {
3522 14017 : insert_insn_end_basic_block (expr, bb);
3523 14017 : insn_inserted_p = 1;
3524 : }
3525 : }
3526 :
3527 3657453 : occrs_to_hoist.release ();
3528 3657453 : bitmap_clear (from_bbs);
3529 : }
3530 : }
3531 325451 : }
3532 :
3533 25894 : BITMAP_FREE (from_bbs);
3534 25894 : if (flag_ira_hoist_pressure)
3535 25894 : BITMAP_FREE (hoisted_bbs);
3536 :
3537 25894 : free (bb_size);
3538 25894 : free (to_bb_head);
3539 25894 : free (index_map);
3540 :
3541 25894 : return changed;
3542 25894 : }
3543 :
3544 : /* Return pressure class and number of needed hard registers (through
3545 : *NREGS) of register REGNO. */
3546 : static enum reg_class
3547 5917264 : get_regno_pressure_class (int regno, int *nregs)
3548 : {
3549 5917264 : if (regno >= FIRST_PSEUDO_REGISTER)
3550 : {
3551 3315271 : enum reg_class pressure_class;
3552 :
3553 3315271 : pressure_class = reg_allocno_class (regno);
3554 3315271 : pressure_class = ira_pressure_class_translate[pressure_class];
3555 3315271 : *nregs
3556 3315271 : = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
3557 3315271 : return pressure_class;
3558 : }
3559 2601993 : else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
3560 2601993 : && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
3561 : {
3562 785020 : *nregs = 1;
3563 785020 : return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
3564 : }
3565 : else
3566 : {
3567 1816973 : *nregs = 0;
3568 1816973 : return NO_REGS;
3569 : }
3570 : }
3571 :
3572 : /* Return pressure class and number of hard registers (through *NREGS)
3573 : for destination of INSN. */
3574 : static enum reg_class
3575 699453 : get_pressure_class_and_nregs (rtx_insn *insn, int *nregs)
3576 : {
3577 699453 : rtx reg;
3578 699453 : enum reg_class pressure_class;
3579 699453 : const_rtx set = single_set_gcse (insn);
3580 :
3581 699453 : reg = SET_DEST (set);
3582 699453 : if (GET_CODE (reg) == SUBREG)
3583 0 : reg = SUBREG_REG (reg);
3584 699453 : if (MEM_P (reg))
3585 : {
3586 0 : *nregs = 0;
3587 0 : pressure_class = NO_REGS;
3588 : }
3589 : else
3590 : {
3591 699453 : gcc_assert (REG_P (reg));
3592 699453 : pressure_class = reg_allocno_class (REGNO (reg));
3593 699453 : pressure_class = ira_pressure_class_translate[pressure_class];
3594 699453 : *nregs
3595 699453 : = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))];
3596 : }
3597 699453 : return pressure_class;
3598 : }
3599 :
3600 : /* Increase (if INCR_P) or decrease current register pressure for
3601 : register REGNO. */
3602 : static void
3603 5357214 : change_pressure (int regno, bool incr_p)
3604 : {
3605 5357214 : int nregs;
3606 5357214 : enum reg_class pressure_class;
3607 :
3608 5357214 : pressure_class = get_regno_pressure_class (regno, &nregs);
3609 5357214 : if (! incr_p)
3610 1291154 : curr_reg_pressure[pressure_class] -= nregs;
3611 : else
3612 : {
3613 4066060 : curr_reg_pressure[pressure_class] += nregs;
3614 4066060 : if (BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3615 : < curr_reg_pressure[pressure_class])
3616 1876129 : BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3617 1876129 : = curr_reg_pressure[pressure_class];
3618 : }
3619 5357214 : }
3620 :
3621 : /* Calculate register pressure for each basic block by walking insns
3622 : from last to first. */
3623 : static void
3624 29357 : calculate_bb_reg_pressure (void)
3625 : {
3626 29357 : int i;
3627 29357 : unsigned int j;
3628 29357 : rtx_insn *insn;
3629 29357 : basic_block bb;
3630 29357 : bitmap curr_regs_live;
3631 29357 : bitmap_iterator bi;
3632 :
3633 :
3634 29357 : ira_setup_eliminable_regset ();
3635 29357 : curr_regs_live = BITMAP_ALLOC (®_obstack);
3636 371123 : FOR_EACH_BB_FN (bb, cfun)
3637 : {
3638 341766 : curr_bb = bb;
3639 341766 : BB_DATA (bb)->live_in = BITMAP_ALLOC (NULL);
3640 341766 : BB_DATA (bb)->backup = BITMAP_ALLOC (NULL);
3641 341766 : bitmap_copy (BB_DATA (bb)->live_in, df_get_live_in (bb));
3642 341766 : bitmap_copy (curr_regs_live, df_get_live_out (bb));
3643 2052729 : for (i = 0; i < ira_pressure_classes_num; i++)
3644 1369197 : curr_reg_pressure[ira_pressure_classes[i]] = 0;
3645 3155154 : EXECUTE_IF_SET_IN_BITMAP (curr_regs_live, 0, j, bi)
3646 2813388 : change_pressure (j, true);
3647 :
3648 2906114 : FOR_BB_INSNS_REVERSE (bb, insn)
3649 : {
3650 2564348 : rtx dreg;
3651 2564348 : int regno;
3652 2564348 : df_ref def, use;
3653 :
3654 2564348 : if (! NONDEBUG_INSN_P (insn))
3655 651894 : continue;
3656 :
3657 16346550 : FOR_EACH_INSN_DEF (def, insn)
3658 : {
3659 14434096 : dreg = DF_REF_REAL_REG (def);
3660 14434096 : gcc_assert (REG_P (dreg));
3661 14434096 : regno = REGNO (dreg);
3662 14434096 : if (!(DF_REF_FLAGS (def)
3663 : & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
3664 : {
3665 14431787 : if (bitmap_clear_bit (curr_regs_live, regno))
3666 1291154 : change_pressure (regno, false);
3667 : }
3668 : }
3669 :
3670 4164225 : FOR_EACH_INSN_USE (use, insn)
3671 : {
3672 2251771 : dreg = DF_REF_REAL_REG (use);
3673 2251771 : gcc_assert (REG_P (dreg));
3674 2251771 : regno = REGNO (dreg);
3675 2251771 : if (bitmap_set_bit (curr_regs_live, regno))
3676 1252672 : change_pressure (regno, true);
3677 : }
3678 : }
3679 : }
3680 29357 : BITMAP_FREE (curr_regs_live);
3681 :
3682 29357 : if (dump_file == NULL)
3683 29350 : return;
3684 :
3685 7 : fprintf (dump_file, "\nRegister Pressure: \n");
3686 35 : FOR_EACH_BB_FN (bb, cfun)
3687 : {
3688 28 : fprintf (dump_file, " Basic block %d: \n", bb->index);
3689 140 : for (i = 0; (int) i < ira_pressure_classes_num; i++)
3690 : {
3691 112 : enum reg_class pressure_class;
3692 :
3693 112 : pressure_class = ira_pressure_classes[i];
3694 112 : if (BB_DATA (bb)->max_reg_pressure[pressure_class] == 0)
3695 84 : continue;
3696 :
3697 28 : fprintf (dump_file, " %s=%d\n", reg_class_names[pressure_class],
3698 : BB_DATA (bb)->max_reg_pressure[pressure_class]);
3699 : }
3700 : }
3701 7 : fprintf (dump_file, "\n");
3702 : }
3703 :
3704 : /* Top level routine to perform one code hoisting (aka unification) pass
3705 :
3706 : Return true if a change was made. */
3707 :
3708 : static bool
3709 63999 : one_code_hoisting_pass (void)
3710 : {
3711 63999 : bool changed = false;
3712 :
3713 63999 : gcse_subst_count = 0;
3714 63999 : gcse_create_count = 0;
3715 :
3716 : /* Return if there's nothing to do, or it is too expensive. */
3717 63999 : if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
3718 63999 : || gcse_or_cprop_is_too_expensive (_("GCSE disabled")))
3719 34642 : return false;
3720 :
3721 29357 : doing_code_hoisting_p = true;
3722 :
3723 : /* Calculate register pressure for each basic block. */
3724 29357 : if (flag_ira_hoist_pressure)
3725 : {
3726 29357 : regstat_init_n_sets_and_refs ();
3727 29357 : ira_set_pseudo_classes (false, dump_file);
3728 29357 : alloc_aux_for_blocks (sizeof (struct bb_data));
3729 29357 : calculate_bb_reg_pressure ();
3730 29357 : regstat_free_n_sets_and_refs ();
3731 : }
3732 :
3733 : /* We need alias. */
3734 29357 : init_alias_analysis ();
3735 :
3736 29357 : bytes_used = 0;
3737 29357 : gcc_obstack_init (&gcse_obstack);
3738 29357 : alloc_gcse_mem ();
3739 :
3740 29357 : alloc_hash_table (&expr_hash_table);
3741 29357 : compute_hash_table (&expr_hash_table);
3742 29357 : if (dump_file)
3743 7 : dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3744 :
3745 29357 : if (expr_hash_table.n_elems > 0)
3746 : {
3747 25894 : alloc_code_hoist_mem (last_basic_block_for_fn (cfun),
3748 : expr_hash_table.n_elems);
3749 25894 : compute_code_hoist_data ();
3750 25894 : changed = hoist_code ();
3751 25894 : free_code_hoist_mem ();
3752 : }
3753 :
3754 29357 : if (flag_ira_hoist_pressure)
3755 : {
3756 29357 : free_aux_for_blocks ();
3757 29357 : free_reg_info ();
3758 : }
3759 29357 : free_hash_table (&expr_hash_table);
3760 29357 : free_gcse_mem ();
3761 29357 : obstack_free (&gcse_obstack, NULL);
3762 :
3763 : /* We are finished with alias. */
3764 29357 : end_alias_analysis ();
3765 :
3766 29357 : if (dump_file)
3767 : {
3768 7 : fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3769 7 : current_function_name (), n_basic_blocks_for_fn (cfun),
3770 : bytes_used);
3771 7 : fprintf (dump_file, "%d substs, %d insns created\n",
3772 : gcse_subst_count, gcse_create_count);
3773 : }
3774 :
3775 29357 : doing_code_hoisting_p = false;
3776 :
3777 29357 : return changed;
3778 : }
3779 :
3780 : /* Here we provide the things required to do store motion towards the exit.
3781 : In order for this to be effective, gcse also needed to be taught how to
3782 : move a load when it is killed only by a store to itself.
3783 :
3784 : int i;
3785 : float a[10];
3786 :
3787 : void foo(float scale)
3788 : {
3789 : for (i=0; i<10; i++)
3790 : a[i] *= scale;
3791 : }
3792 :
3793 : 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3794 : the load out since its live around the loop, and stored at the bottom
3795 : of the loop.
3796 :
3797 : The 'Load Motion' referred to and implemented in this file is
3798 : an enhancement to gcse which when using edge based LCM, recognizes
3799 : this situation and allows gcse to move the load out of the loop.
3800 :
3801 : Once gcse has hoisted the load, store motion can then push this
3802 : load towards the exit, and we end up with no loads or stores of 'i'
3803 : in the loop. */
3804 :
3805 : /* This will search the ldst list for a matching expression. If it
3806 : doesn't find one, we create one and initialize it. */
3807 :
3808 : static struct ls_expr *
3809 14309174 : ldst_entry (rtx x)
3810 : {
3811 14309174 : int do_not_record_p = 0;
3812 14309174 : struct ls_expr * ptr;
3813 14309174 : unsigned int hash;
3814 14309174 : ls_expr **slot;
3815 14309174 : struct ls_expr e;
3816 :
3817 14309174 : hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3818 : NULL, /*have_reg_qty=*/false);
3819 :
3820 14309174 : e.pattern = x;
3821 14309174 : slot = pre_ldst_table->find_slot_with_hash (&e, hash, INSERT);
3822 14309174 : if (*slot)
3823 : return *slot;
3824 :
3825 9508350 : ptr = XNEW (struct ls_expr);
3826 :
3827 9508350 : ptr->next = pre_ldst_mems;
3828 9508350 : ptr->expr = NULL;
3829 9508350 : ptr->pattern = x;
3830 9508350 : ptr->pattern_regs = NULL_RTX;
3831 9508350 : ptr->stores.create (0);
3832 9508350 : ptr->reaching_reg = NULL_RTX;
3833 9508350 : ptr->invalid = 0;
3834 9508350 : ptr->index = 0;
3835 9508350 : ptr->hash_index = hash;
3836 9508350 : pre_ldst_mems = ptr;
3837 9508350 : *slot = ptr;
3838 :
3839 9508350 : return ptr;
3840 : }
3841 :
3842 : /* Free up an individual ldst entry. */
3843 :
3844 : static void
3845 9508350 : free_ldst_entry (struct ls_expr * ptr)
3846 : {
3847 0 : ptr->stores.release ();
3848 :
3849 9508350 : free (ptr);
3850 3175268 : }
3851 :
3852 : /* Free up all memory associated with the ldst list. */
3853 :
3854 : static void
3855 478253 : free_ld_motion_mems (void)
3856 : {
3857 478253 : delete pre_ldst_table;
3858 478253 : pre_ldst_table = NULL;
3859 :
3860 3653521 : while (pre_ldst_mems)
3861 : {
3862 3175268 : struct ls_expr * tmp = pre_ldst_mems;
3863 :
3864 3175268 : pre_ldst_mems = pre_ldst_mems->next;
3865 :
3866 3175268 : free_ldst_entry (tmp);
3867 : }
3868 :
3869 478253 : pre_ldst_mems = NULL;
3870 478253 : }
3871 :
3872 : /* Dump debugging info about the ldst list. */
3873 :
3874 : static void
3875 16 : print_ldst_list (FILE * file)
3876 : {
3877 16 : struct ls_expr * ptr;
3878 :
3879 16 : fprintf (file, "LDST list: \n");
3880 :
3881 56 : for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3882 : {
3883 40 : fprintf (file, " Pattern (%3d): ", ptr->index);
3884 :
3885 40 : print_rtl (file, ptr->pattern);
3886 :
3887 40 : fprintf (file, "\n Stores : ");
3888 40 : print_rtx_insn_vec (file, ptr->stores);
3889 :
3890 40 : fprintf (file, "\n\n");
3891 : }
3892 :
3893 16 : fprintf (file, "\n");
3894 16 : }
3895 :
3896 : /* Returns 1 if X is in the list of ldst only expressions. */
3897 :
3898 : static struct ls_expr *
3899 650913 : find_rtx_in_ldst (rtx x)
3900 : {
3901 650913 : struct ls_expr e;
3902 650913 : ls_expr **slot;
3903 650913 : if (!pre_ldst_table)
3904 : return NULL;
3905 650913 : e.pattern = x;
3906 650913 : slot = pre_ldst_table->find_slot (&e, NO_INSERT);
3907 650913 : if (!slot || (*slot)->invalid)
3908 : return NULL;
3909 : return *slot;
3910 : }
3911 :
3912 : /* Load Motion for loads which only kill themselves. */
3913 :
3914 : /* Return true if x, a MEM, is a simple access with no side effects.
3915 : These are the types of loads we consider for the ld_motion list,
3916 : otherwise we let the usual aliasing take care of it. */
3917 :
3918 : static bool
3919 19064484 : simple_mem (const_rtx x)
3920 : {
3921 19064484 : if (MEM_VOLATILE_P (x))
3922 : return false;
3923 :
3924 18199910 : if (GET_MODE (x) == BLKmode)
3925 : return false;
3926 :
3927 : /* If we are handling exceptions, we must be careful with memory references
3928 : that may trap. If we are not, the behavior is undefined, so we may just
3929 : continue. */
3930 18162186 : if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3931 : return false;
3932 :
3933 15989186 : if (side_effects_p (x))
3934 : return false;
3935 :
3936 : /* Do not consider function arguments passed on stack. */
3937 14552063 : if (reg_mentioned_p (stack_pointer_rtx, x))
3938 : return false;
3939 :
3940 14311161 : if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3941 : return false;
3942 :
3943 : return true;
3944 : }
3945 :
3946 : /* Make sure there isn't a buried reference in this pattern anywhere.
3947 : If there is, invalidate the entry for it since we're not capable
3948 : of fixing it up just yet.. We have to be sure we know about ALL
3949 : loads since the aliasing code will allow all entries in the
3950 : ld_motion list to not-alias itself. If we miss a load, we will get
3951 : the wrong value since gcse might common it and we won't know to
3952 : fix it up. */
3953 :
3954 : static void
3955 164036511 : invalidate_any_buried_refs (rtx x)
3956 : {
3957 164036511 : const char * fmt;
3958 164036511 : int i, j;
3959 164036511 : struct ls_expr * ptr;
3960 :
3961 : /* Invalidate it in the list. */
3962 164036511 : if (MEM_P (x) && simple_mem (x))
3963 : {
3964 4954065 : ptr = ldst_entry (x);
3965 4954065 : ptr->invalid = 1;
3966 : }
3967 :
3968 : /* Recursively process the insn. */
3969 164036511 : fmt = GET_RTX_FORMAT (GET_CODE (x));
3970 :
3971 383927830 : for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3972 : {
3973 219891319 : if (fmt[i] == 'e')
3974 98684957 : invalidate_any_buried_refs (XEXP (x, i));
3975 121206362 : else if (fmt[i] == 'E')
3976 29133481 : for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3977 19819316 : invalidate_any_buried_refs (XVECEXP (x, i, j));
3978 : }
3979 164036511 : }
3980 :
3981 : /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
3982 : being defined as MEM loads and stores to symbols, with no side effects
3983 : and no registers in the expression. For a MEM destination, we also
3984 : check that the insn is still valid if we replace the destination with a
3985 : REG, as is done in update_ld_motion_stores. If there are any uses/defs
3986 : which don't match this criteria, they are invalidated and trimmed out
3987 : later. */
3988 :
3989 : static void
3990 478253 : compute_ld_motion_mems (void)
3991 : {
3992 478253 : struct ls_expr * ptr;
3993 478253 : basic_block bb;
3994 478253 : rtx_insn *insn;
3995 :
3996 478253 : pre_ldst_mems = NULL;
3997 478253 : pre_ldst_table = new hash_table<pre_ldst_expr_hasher> (13);
3998 :
3999 9882134 : FOR_EACH_BB_FN (bb, cfun)
4000 : {
4001 118314276 : FOR_BB_INSNS (bb, insn)
4002 : {
4003 108910395 : if (NONDEBUG_INSN_P (insn))
4004 : {
4005 47796848 : if (GET_CODE (PATTERN (insn)) == SET)
4006 : {
4007 37603339 : rtx src = SET_SRC (PATTERN (insn));
4008 37603339 : rtx dest = SET_DEST (PATTERN (insn));
4009 :
4010 : /* Check for a simple load. */
4011 37603339 : if (MEM_P (src) && simple_mem (src))
4012 : {
4013 4794076 : ptr = ldst_entry (src);
4014 4794076 : if (!REG_P (dest))
4015 34761 : ptr->invalid = 1;
4016 : }
4017 : else
4018 : {
4019 : /* Make sure there isn't a buried load somewhere. */
4020 32809263 : invalidate_any_buried_refs (src);
4021 : }
4022 :
4023 : /* Check for a simple load through a REG_EQUAL note. */
4024 37603339 : rtx note = find_reg_equal_equiv_note (insn), src_eq;
4025 37603339 : if (note
4026 2180311 : && REG_NOTE_KIND (note) == REG_EQUAL
4027 2039931 : && (src_eq = XEXP (note, 0))
4028 39643270 : && !(MEM_P (src_eq) && simple_mem (src_eq)))
4029 2039930 : invalidate_any_buried_refs (src_eq);
4030 :
4031 : /* Check for stores. Don't worry about aliased ones, they
4032 : will block any movement we might do later. We only care
4033 : about this exact pattern since those are the only
4034 : circumstance that we will ignore the aliasing info. */
4035 37603339 : if (MEM_P (dest) && simple_mem (dest))
4036 : {
4037 4561033 : ptr = ldst_entry (dest);
4038 4561033 : machine_mode src_mode = GET_MODE (src);
4039 4561033 : if (! MEM_P (src)
4040 4561033 : && GET_CODE (src) != ASM_OPERANDS
4041 : /* Check for REG manually since want_to_gcse_p
4042 : returns 0 for all REGs. */
4043 9122066 : && can_assign_to_reg_without_clobbers_p (src,
4044 : src_mode))
4045 4552544 : ptr->stores.safe_push (insn);
4046 : else
4047 8489 : ptr->invalid = 1;
4048 : }
4049 : }
4050 : else
4051 : {
4052 : /* Invalidate all MEMs in the pattern and... */
4053 10193509 : invalidate_any_buried_refs (PATTERN (insn));
4054 :
4055 : /* ...in REG_EQUAL notes for PARALLELs with single SET. */
4056 10193509 : rtx note = find_reg_equal_equiv_note (insn), src_eq;
4057 10193509 : if (note
4058 489536 : && REG_NOTE_KIND (note) == REG_EQUAL
4059 10683045 : && (src_eq = XEXP (note, 0)))
4060 489536 : invalidate_any_buried_refs (src_eq);
4061 : }
4062 : }
4063 : }
4064 : }
4065 478253 : }
4066 :
4067 : /* Remove any references that have been either invalidated or are not in the
4068 : expression list for pre gcse. */
4069 :
4070 : static void
4071 478253 : trim_ld_motion_mems (void)
4072 : {
4073 478253 : struct ls_expr * * last = & pre_ldst_mems;
4074 478253 : struct ls_expr * ptr = pre_ldst_mems;
4075 :
4076 9986603 : while (ptr != NULL)
4077 : {
4078 9508350 : struct gcse_expr * expr;
4079 :
4080 : /* Delete if entry has been made invalid. */
4081 9508350 : if (! ptr->invalid)
4082 : {
4083 : /* Delete if we cannot find this mem in the expression list. */
4084 6821399 : unsigned int hash = ptr->hash_index % expr_hash_table.size;
4085 :
4086 6821399 : for (expr = expr_hash_table.table[hash];
4087 10362335 : expr != NULL;
4088 3540936 : expr = expr->next_same_hash)
4089 6716204 : if (expr_equiv_p (expr->expr, ptr->pattern))
4090 : break;
4091 : }
4092 : else
4093 : expr = (struct gcse_expr *) 0;
4094 :
4095 6821399 : if (expr)
4096 : {
4097 : /* Set the expression field if we are keeping it. */
4098 3175268 : ptr->expr = expr;
4099 3175268 : last = & ptr->next;
4100 3175268 : ptr = ptr->next;
4101 : }
4102 : else
4103 : {
4104 6333082 : *last = ptr->next;
4105 6333082 : pre_ldst_table->remove_elt_with_hash (ptr, ptr->hash_index);
4106 6333082 : free_ldst_entry (ptr);
4107 6333082 : ptr = * last;
4108 : }
4109 : }
4110 :
4111 : /* Show the world what we've found. */
4112 478253 : if (dump_file && pre_ldst_mems != NULL)
4113 16 : print_ldst_list (dump_file);
4114 478253 : }
4115 :
4116 : /* This routine will take an expression which we are replacing with
4117 : a reaching register, and update any stores that are needed if
4118 : that expression is in the ld_motion list. Stores are updated by
4119 : copying their SRC to the reaching register, and then storing
4120 : the reaching register into the store location. These keeps the
4121 : correct value in the reaching register for the loads. */
4122 :
4123 : static void
4124 543633 : update_ld_motion_stores (struct gcse_expr * expr)
4125 : {
4126 543633 : struct ls_expr * mem_ptr;
4127 :
4128 543633 : if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
4129 : {
4130 : /* We can try to find just the REACHED stores, but is shouldn't
4131 : matter to set the reaching reg everywhere... some might be
4132 : dead and should be eliminated later. */
4133 :
4134 : /* We replace (set mem expr) with (set reg expr) (set mem reg)
4135 : where reg is the reaching reg used in the load. We checked in
4136 : compute_ld_motion_mems that we can replace (set mem expr) with
4137 : (set reg expr) in that insn. */
4138 115147 : rtx_insn *insn;
4139 115147 : unsigned int i;
4140 231381 : FOR_EACH_VEC_ELT_REVERSE (mem_ptr->stores, i, insn)
4141 : {
4142 93991 : rtx pat = PATTERN (insn);
4143 93991 : rtx src = SET_SRC (pat);
4144 93991 : rtx reg = expr->reaching_reg;
4145 :
4146 : /* If we've already copied it, continue. */
4147 93991 : if (expr->reaching_reg == src)
4148 84174 : continue;
4149 :
4150 9817 : if (dump_file)
4151 : {
4152 0 : fprintf (dump_file, "PRE: store updated with reaching reg ");
4153 0 : print_rtl (dump_file, reg);
4154 0 : fprintf (dump_file, ":\n ");
4155 0 : print_inline_rtx (dump_file, insn, 8);
4156 0 : fprintf (dump_file, "\n");
4157 : }
4158 :
4159 9817 : rtx_insn *copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
4160 9817 : emit_insn_before (copy, insn);
4161 9817 : SET_SRC (pat) = reg;
4162 9817 : df_insn_rescan (insn);
4163 :
4164 : /* un-recognize this pattern since it's probably different now. */
4165 9817 : INSN_CODE (insn) = -1;
4166 9817 : gcse_create_count++;
4167 : }
4168 : }
4169 543633 : }
4170 :
4171 : /* Return true if the graph is too expensive to optimize. PASS is the
4172 : optimization about to be performed. */
4173 :
4174 : bool
4175 2112255 : gcse_or_cprop_is_too_expensive (const char *pass)
4176 : {
4177 2112255 : unsigned HOST_WIDE_INT memory_request
4178 2112255 : = ((unsigned HOST_WIDE_INT)n_basic_blocks_for_fn (cfun)
4179 2112255 : * SBITMAP_SET_SIZE (max_reg_num ()) * sizeof (SBITMAP_ELT_TYPE));
4180 :
4181 : /* Trying to perform global optimizations on flow graphs which have
4182 : a high connectivity will take a long time and is unlikely to be
4183 : particularly useful.
4184 :
4185 : In normal circumstances a cfg should have about twice as many
4186 : edges as blocks. But we do not want to punish small functions
4187 : which have a couple switch statements. Rather than simply
4188 : threshold the number of blocks, uses something with a more
4189 : graceful degradation. */
4190 2112255 : if (n_edges_for_fn (cfun) > 20000 + n_basic_blocks_for_fn (cfun) * 4)
4191 : {
4192 0 : warning (OPT_Wdisabled_optimization,
4193 : "%s: %d basic blocks and %d edges/basic block",
4194 : pass, n_basic_blocks_for_fn (cfun),
4195 : n_edges_for_fn (cfun) / n_basic_blocks_for_fn (cfun));
4196 :
4197 0 : return true;
4198 : }
4199 :
4200 : /* If allocating memory for the dataflow bitmaps would take up too much
4201 : storage it's better just to disable the optimization. */
4202 2112255 : if (memory_request / 1024 > (unsigned HOST_WIDE_INT)param_max_gcse_memory)
4203 : {
4204 0 : warning (OPT_Wdisabled_optimization,
4205 : "%s: %d basic blocks and %d registers; "
4206 : "increase %<--param max-gcse-memory%> above %wu",
4207 0 : pass, n_basic_blocks_for_fn (cfun), max_reg_num (),
4208 : memory_request / 1024);
4209 :
4210 0 : return true;
4211 : }
4212 :
4213 : return false;
4214 : }
4215 :
4216 : static unsigned int
4217 899223 : execute_rtl_pre (void)
4218 : {
4219 899223 : int changed;
4220 899223 : delete_unreachable_blocks ();
4221 899223 : df_analyze ();
4222 899223 : changed = one_pre_gcse_pass ();
4223 899223 : flag_rerun_cse_after_global_opts |= changed;
4224 899223 : if (changed)
4225 114371 : cleanup_cfg (0);
4226 899223 : return 0;
4227 : }
4228 :
4229 : static unsigned int
4230 0 : execute_hardreg_pre (void)
4231 : {
4232 : #ifdef HARDREG_PRE_REGNOS
4233 : doing_hardreg_pre_p = true;
4234 : unsigned int regnos[] = HARDREG_PRE_REGNOS;
4235 : /* It's possible to avoid this loop, but it isn't worth doing so until
4236 : hardreg PRE is used for multiple hardregs. */
4237 : for (int i = 0; regnos[i] != 0; i++)
4238 : {
4239 : int changed;
4240 : current_hardreg_regno = regnos[i];
4241 : if (!df_regs_ever_live_p (current_hardreg_regno))
4242 : {
4243 : if (dump_file)
4244 : fprintf (dump_file, "Skipping hardreg PRE for regno %d, which is never live\n",
4245 : current_hardreg_regno);
4246 : continue;
4247 : }
4248 : if (dump_file)
4249 : fprintf (dump_file, "Entering hardreg PRE for regno %d\n",
4250 : current_hardreg_regno);
4251 : delete_unreachable_blocks ();
4252 : df_analyze ();
4253 : changed = one_pre_gcse_pass ();
4254 : if (changed)
4255 : cleanup_cfg (0);
4256 : }
4257 : doing_hardreg_pre_p = false;
4258 : #endif
4259 0 : return 0;
4260 : }
4261 :
4262 : static unsigned int
4263 63999 : execute_rtl_hoist (void)
4264 : {
4265 63999 : int changed;
4266 63999 : delete_unreachable_blocks ();
4267 63999 : df_analyze ();
4268 63999 : changed = one_code_hoisting_pass ();
4269 63999 : flag_rerun_cse_after_global_opts |= changed;
4270 63999 : if (changed)
4271 3109 : cleanup_cfg (0);
4272 63999 : return 0;
4273 : }
4274 :
4275 : namespace {
4276 :
4277 : const pass_data pass_data_rtl_pre =
4278 : {
4279 : RTL_PASS, /* type */
4280 : "rtl pre", /* name */
4281 : OPTGROUP_NONE, /* optinfo_flags */
4282 : TV_PRE, /* tv_id */
4283 : PROP_cfglayout, /* properties_required */
4284 : 0, /* properties_provided */
4285 : 0, /* properties_destroyed */
4286 : 0, /* todo_flags_start */
4287 : TODO_df_finish, /* todo_flags_finish */
4288 : };
4289 :
4290 : class pass_rtl_pre : public rtl_opt_pass
4291 : {
4292 : public:
4293 285722 : pass_rtl_pre (gcc::context *ctxt)
4294 571444 : : rtl_opt_pass (pass_data_rtl_pre, ctxt)
4295 : {}
4296 :
4297 : /* opt_pass methods: */
4298 : bool gate (function *) final override;
4299 899223 : unsigned int execute (function *) final override
4300 : {
4301 899223 : return execute_rtl_pre ();
4302 : }
4303 :
4304 : }; // class pass_rtl_pre
4305 :
4306 : /* We do not construct an accurate cfg in functions which call
4307 : setjmp, so none of these passes runs if the function calls
4308 : setjmp.
4309 : FIXME: Should just handle setjmp via REG_SETJMP notes. */
4310 :
4311 : bool
4312 1471370 : pass_rtl_pre::gate (function *fun)
4313 : {
4314 1043686 : return optimize > 0 && flag_gcse
4315 963916 : && !fun->calls_setjmp
4316 963223 : && optimize_function_for_speed_p (fun)
4317 2370594 : && dbg_cnt (pre);
4318 : }
4319 :
4320 : } // anon namespace
4321 :
4322 : rtl_opt_pass *
4323 285722 : make_pass_rtl_pre (gcc::context *ctxt)
4324 : {
4325 285722 : return new pass_rtl_pre (ctxt);
4326 : }
4327 :
4328 : namespace {
4329 :
4330 : const pass_data pass_data_hardreg_pre =
4331 : {
4332 : RTL_PASS, /* type */
4333 : "hardreg_pre", /* name */
4334 : OPTGROUP_NONE, /* optinfo_flags */
4335 : TV_PRE, /* tv_id */
4336 : PROP_cfglayout, /* properties_required */
4337 : 0, /* properties_provided */
4338 : 0, /* properties_destroyed */
4339 : 0, /* todo_flags_start */
4340 : TODO_df_finish, /* todo_flags_finish */
4341 : };
4342 :
4343 : class pass_hardreg_pre : public rtl_opt_pass
4344 : {
4345 : public:
4346 285722 : pass_hardreg_pre (gcc::context *ctxt)
4347 571444 : : rtl_opt_pass (pass_data_hardreg_pre, ctxt)
4348 : {}
4349 :
4350 : /* opt_pass methods: */
4351 : bool gate (function *) final override;
4352 0 : unsigned int execute (function *) final override
4353 : {
4354 0 : return execute_hardreg_pre ();
4355 : }
4356 :
4357 : }; // class pass_rtl_pre
4358 :
4359 : bool
4360 1471370 : pass_hardreg_pre::gate (function * ARG_UNUSED (fun))
4361 : {
4362 : #ifdef HARDREG_PRE_REGNOS
4363 : return optimize > 0
4364 : && !fun->calls_setjmp;
4365 : #else
4366 1471370 : return false;
4367 : #endif
4368 : }
4369 :
4370 : } // anon namespace
4371 :
4372 : rtl_opt_pass *
4373 285722 : make_pass_hardreg_pre (gcc::context *ctxt)
4374 : {
4375 285722 : return new pass_hardreg_pre (ctxt);
4376 : }
4377 :
4378 : namespace {
4379 :
4380 : const pass_data pass_data_rtl_hoist =
4381 : {
4382 : RTL_PASS, /* type */
4383 : "hoist", /* name */
4384 : OPTGROUP_NONE, /* optinfo_flags */
4385 : TV_HOIST, /* tv_id */
4386 : PROP_cfglayout, /* properties_required */
4387 : 0, /* properties_provided */
4388 : 0, /* properties_destroyed */
4389 : 0, /* todo_flags_start */
4390 : TODO_df_finish, /* todo_flags_finish */
4391 : };
4392 :
4393 : class pass_rtl_hoist : public rtl_opt_pass
4394 : {
4395 : public:
4396 285722 : pass_rtl_hoist (gcc::context *ctxt)
4397 571444 : : rtl_opt_pass (pass_data_rtl_hoist, ctxt)
4398 : {}
4399 :
4400 : /* opt_pass methods: */
4401 : bool gate (function *) final override;
4402 63999 : unsigned int execute (function *) final override
4403 : {
4404 63999 : return execute_rtl_hoist ();
4405 : }
4406 :
4407 : }; // class pass_rtl_hoist
4408 :
4409 : bool
4410 1471370 : pass_rtl_hoist::gate (function *)
4411 : {
4412 1043686 : return optimize > 0 && flag_gcse
4413 963916 : && !cfun->calls_setjmp
4414 : /* It does not make sense to run code hoisting unless we are optimizing
4415 : for code size -- it rarely makes programs faster, and can make then
4416 : bigger if we did PRE (when optimizing for space, we don't run PRE). */
4417 963223 : && optimize_function_for_size_p (cfun)
4418 1535369 : && dbg_cnt (hoist);
4419 : }
4420 :
4421 : } // anon namespace
4422 :
4423 : rtl_opt_pass *
4424 285722 : make_pass_rtl_hoist (gcc::context *ctxt)
4425 : {
4426 285722 : return new pass_rtl_hoist (ctxt);
4427 : }
4428 :
4429 : /* Reset all state within gcse.cc so that we can rerun the compiler
4430 : within the same process. For use by toplev::finalize. */
4431 :
4432 : void
4433 256621 : gcse_cc_finalize (void)
4434 : {
4435 256621 : test_insn = NULL;
4436 256621 : }
4437 :
4438 : #include "gt-gcse.h"
|