Branch data Line data Source code
1 : : /* Partial redundancy elimination / Hoisting for RTL.
2 : : Copyright (C) 1997-2025 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it under
7 : : the terms of the GNU General Public License as published by the Free
8 : : Software Foundation; either version 3, or (at your option) any later
9 : : version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : /* TODO
21 : : - reordering of memory allocation and freeing to be more space efficient
22 : : - calc rough register pressure information and use the info to drive all
23 : : kinds of code motion (including code hoisting) in a unified way.
24 : : */
25 : :
26 : : /* References searched while implementing this.
27 : :
28 : : Compilers Principles, Techniques and Tools
29 : : Aho, Sethi, Ullman
30 : : Addison-Wesley, 1988
31 : :
32 : : Global Optimization by Suppression of Partial Redundancies
33 : : E. Morel, C. Renvoise
34 : : communications of the acm, Vol. 22, Num. 2, Feb. 1979
35 : :
36 : : A Portable Machine-Independent Global Optimizer - Design and Measurements
37 : : Frederick Chow
38 : : Stanford Ph.D. thesis, Dec. 1983
39 : :
40 : : A Fast Algorithm for Code Movement Optimization
41 : : D.M. Dhamdhere
42 : : SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
43 : :
44 : : A Solution to a Problem with Morel and Renvoise's
45 : : Global Optimization by Suppression of Partial Redundancies
46 : : K-H Drechsler, M.P. Stadel
47 : : ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
48 : :
49 : : Practical Adaptation of the Global Optimization
50 : : Algorithm of Morel and Renvoise
51 : : D.M. Dhamdhere
52 : : ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
53 : :
54 : : Efficiently Computing Static Single Assignment Form and the Control
55 : : Dependence Graph
56 : : R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
57 : : ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
58 : :
59 : : Lazy Code Motion
60 : : J. Knoop, O. Ruthing, B. Steffen
61 : : ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
62 : :
63 : : What's In a Region? Or Computing Control Dependence Regions in Near-Linear
64 : : Time for Reducible Flow Control
65 : : Thomas Ball
66 : : ACM Letters on Programming Languages and Systems,
67 : : Vol. 2, Num. 1-4, Mar-Dec 1993
68 : :
69 : : An Efficient Representation for Sparse Sets
70 : : Preston Briggs, Linda Torczon
71 : : ACM Letters on Programming Languages and Systems,
72 : : Vol. 2, Num. 1-4, Mar-Dec 1993
73 : :
74 : : A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
75 : : K-H Drechsler, M.P. Stadel
76 : : ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
77 : :
78 : : Partial Dead Code Elimination
79 : : J. Knoop, O. Ruthing, B. Steffen
80 : : ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
81 : :
82 : : Effective Partial Redundancy Elimination
83 : : P. Briggs, K.D. Cooper
84 : : ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
85 : :
86 : : The Program Structure Tree: Computing Control Regions in Linear Time
87 : : R. Johnson, D. Pearson, K. Pingali
88 : : ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
89 : :
90 : : Optimal Code Motion: Theory and Practice
91 : : J. Knoop, O. Ruthing, B. Steffen
92 : : ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
93 : :
94 : : The power of assignment motion
95 : : J. Knoop, O. Ruthing, B. Steffen
96 : : ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
97 : :
98 : : Global code motion / global value numbering
99 : : C. Click
100 : : ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
101 : :
102 : : Value Driven Redundancy Elimination
103 : : L.T. Simpson
104 : : Rice University Ph.D. thesis, Apr. 1996
105 : :
106 : : Value Numbering
107 : : L.T. Simpson
108 : : Massively Scalar Compiler Project, Rice University, Sep. 1996
109 : :
110 : : High Performance Compilers for Parallel Computing
111 : : Michael Wolfe
112 : : Addison-Wesley, 1996
113 : :
114 : : Advanced Compiler Design and Implementation
115 : : Steven Muchnick
116 : : Morgan Kaufmann, 1997
117 : :
118 : : Building an Optimizing Compiler
119 : : Robert Morgan
120 : : Digital Press, 1998
121 : :
122 : : People wishing to speed up the code here should read:
123 : : Elimination Algorithms for Data Flow Analysis
124 : : B.G. Ryder, M.C. Paull
125 : : ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
126 : :
127 : : How to Analyze Large Programs Efficiently and Informatively
128 : : D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
129 : : ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
130 : :
131 : : People wishing to do something different can find various possibilities
132 : : in the above papers and elsewhere.
133 : : */
134 : :
135 : : #include "config.h"
136 : : #include "system.h"
137 : : #include "coretypes.h"
138 : : #include "backend.h"
139 : : #include "target.h"
140 : : #include "rtl.h"
141 : : #include "tree.h"
142 : : #include "predict.h"
143 : : #include "df.h"
144 : : #include "memmodel.h"
145 : : #include "tm_p.h"
146 : : #include "insn-config.h"
147 : : #include "print-rtl.h"
148 : : #include "regs.h"
149 : : #include "ira.h"
150 : : #include "recog.h"
151 : : #include "diagnostic-core.h"
152 : : #include "cfgrtl.h"
153 : : #include "cfganal.h"
154 : : #include "lcm.h"
155 : : #include "cfgcleanup.h"
156 : : #include "expr.h"
157 : : #include "intl.h"
158 : : #include "tree-pass.h"
159 : : #include "dbgcnt.h"
160 : : #include "gcse.h"
161 : : #include "gcse-common.h"
162 : : #include "function-abi.h"
163 : :
164 : : /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
165 : : are a superset of those done by classic GCSE.
166 : :
167 : : Two passes of copy/constant propagation are done around PRE or hoisting
168 : : because the first one enables more GCSE and the second one helps to clean
169 : : up the copies that PRE and HOIST create. This is needed more for PRE than
170 : : for HOIST because code hoisting will try to use an existing register
171 : : containing the common subexpression rather than create a new one. This is
172 : : harder to do for PRE because of the code motion (which HOIST doesn't do).
173 : :
174 : : Expressions we are interested in GCSE-ing are of the form
175 : : (set (pseudo-reg) (expression)).
176 : : Function want_to_gcse_p says what these are.
177 : :
178 : : In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
179 : : This allows PRE to hoist expressions that are expressed in multiple insns,
180 : : such as complex address calculations (e.g. for PIC code, or loads with a
181 : : high part and a low part).
182 : :
183 : : PRE handles moving invariant expressions out of loops (by treating them as
184 : : partially redundant).
185 : :
186 : : **********************
187 : :
188 : : We used to support multiple passes but there are diminishing returns in
189 : : doing so. The first pass usually makes 90% of the changes that are doable.
190 : : A second pass can make a few more changes made possible by the first pass.
191 : : Experiments show any further passes don't make enough changes to justify
192 : : the expense.
193 : :
194 : : A study of spec92 using an unlimited number of passes:
195 : : [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
196 : : [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
197 : : [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
198 : :
199 : : It was found doing copy propagation between each pass enables further
200 : : substitutions.
201 : :
202 : : This study was done before expressions in REG_EQUAL notes were added as
203 : : candidate expressions for optimization, and before the GIMPLE optimizers
204 : : were added. Probably, multiple passes is even less efficient now than
205 : : at the time when the study was conducted.
206 : :
207 : : PRE is quite expensive in complicated functions because the DFA can take
208 : : a while to converge. Hence we only perform one pass.
209 : :
210 : : **********************
211 : :
212 : : The steps for PRE are:
213 : :
214 : : 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
215 : :
216 : : 2) Perform the data flow analysis for PRE.
217 : :
218 : : 3) Delete the redundant instructions
219 : :
220 : : 4) Insert the required copies [if any] that make the partially
221 : : redundant instructions fully redundant.
222 : :
223 : : 5) For other reaching expressions, insert an instruction to copy the value
224 : : to a newly created pseudo that will reach the redundant instruction.
225 : :
226 : : The deletion is done first so that when we do insertions we
227 : : know which pseudo reg to use.
228 : :
229 : : Various papers have argued that PRE DFA is expensive (O(n^2)) and others
230 : : argue it is not. The number of iterations for the algorithm to converge
231 : : is typically 2-4 so I don't view it as that expensive (relatively speaking).
232 : :
233 : : PRE GCSE depends heavily on the second CPROP pass to clean up the copies
234 : : we create. To make an expression reach the place where it's redundant,
235 : : the result of the expression is copied to a new register, and the redundant
236 : : expression is deleted by replacing it with this new register. Classic GCSE
237 : : doesn't have this problem as much as it computes the reaching defs of
238 : : each register in each block and thus can try to use an existing
239 : : register. */
240 : :
241 : : /* GCSE global vars. */
242 : :
243 : : struct target_gcse default_target_gcse;
244 : : #if SWITCHABLE_TARGET
245 : : struct target_gcse *this_target_gcse = &default_target_gcse;
246 : : #endif
247 : :
248 : : /* Set to non-zero if CSE should run after all GCSE optimizations are done. */
249 : : int flag_rerun_cse_after_global_opts;
250 : :
251 : : /* An obstack for our working variables. */
252 : : static struct obstack gcse_obstack;
253 : :
254 : : /* Hash table of expressions. */
255 : :
256 : : struct gcse_expr
257 : : {
258 : : /* The expression. */
259 : : rtx expr;
260 : : /* Index in the available expression bitmaps. */
261 : : int bitmap_index;
262 : : /* Next entry with the same hash. */
263 : : struct gcse_expr *next_same_hash;
264 : : /* List of anticipatable occurrences in basic blocks in the function.
265 : : An "anticipatable occurrence" is one that is the first occurrence in the
266 : : basic block, the operands are not modified in the basic block prior
267 : : to the occurrence and the output is not used between the start of
268 : : the block and the occurrence. */
269 : : struct gcse_occr *antic_occr;
270 : : /* List of available occurrence in basic blocks in the function.
271 : : An "available occurrence" is one that is the last occurrence in the
272 : : basic block and the operands are not modified by following statements in
273 : : the basic block [including this insn]. */
274 : : struct gcse_occr *avail_occr;
275 : : /* Non-null if the computation is PRE redundant.
276 : : The value is the newly created pseudo-reg to record a copy of the
277 : : expression in all the places that reach the redundant copy. */
278 : : rtx reaching_reg;
279 : : /* Maximum distance in instructions this expression can travel.
280 : : We avoid moving simple expressions for more than a few instructions
281 : : to keep register pressure under control.
282 : : A value of "0" removes restrictions on how far the expression can
283 : : travel. */
284 : : HOST_WIDE_INT max_distance;
285 : : };
286 : :
287 : : /* Occurrence of an expression.
288 : : There is one per basic block. If a pattern appears more than once the
289 : : last appearance is used [or first for anticipatable expressions]. */
290 : :
291 : : struct gcse_occr
292 : : {
293 : : /* Next occurrence of this expression. */
294 : : struct gcse_occr *next;
295 : : /* The insn that computes the expression. */
296 : : rtx_insn *insn;
297 : : /* Nonzero if this [anticipatable] occurrence has been deleted. */
298 : : char deleted_p;
299 : : /* Nonzero if this [available] occurrence has been copied to
300 : : reaching_reg. */
301 : : /* ??? This is mutually exclusive with deleted_p, so they could share
302 : : the same byte. */
303 : : char copied_p;
304 : : };
305 : :
306 : : typedef struct gcse_occr *occr_t;
307 : :
308 : : /* Expression hash tables.
309 : : Each hash table is an array of buckets.
310 : : ??? It is known that if it were an array of entries, structure elements
311 : : `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
312 : : not clear whether in the final analysis a sufficient amount of memory would
313 : : be saved as the size of the available expression bitmaps would be larger
314 : : [one could build a mapping table without holes afterwards though].
315 : : Someday I'll perform the computation and figure it out. */
316 : :
317 : : struct gcse_hash_table_d
318 : : {
319 : : /* The table itself.
320 : : This is an array of `expr_hash_table_size' elements. */
321 : : struct gcse_expr **table;
322 : :
323 : : /* Size of the hash table, in elements. */
324 : : unsigned int size;
325 : :
326 : : /* Number of hash table elements. */
327 : : unsigned int n_elems;
328 : : };
329 : :
330 : : /* Expression hash table. */
331 : : static struct gcse_hash_table_d expr_hash_table;
332 : :
333 : : /* This is a list of expressions which are MEMs and will be used by load
334 : : or store motion.
335 : : Load motion tracks MEMs which aren't killed by anything except itself,
336 : : i.e. loads and stores to a single location.
337 : : We can then allow movement of these MEM refs with a little special
338 : : allowance. (all stores copy the same value to the reaching reg used
339 : : for the loads). This means all values used to store into memory must have
340 : : no side effects so we can re-issue the setter value. */
341 : :
342 : : struct ls_expr
343 : : {
344 : : struct gcse_expr * expr; /* Gcse expression reference for LM. */
345 : : rtx pattern; /* Pattern of this mem. */
346 : : rtx pattern_regs; /* List of registers mentioned by the mem. */
347 : : vec<rtx_insn *> stores; /* INSN list of stores seen. */
348 : : struct ls_expr * next; /* Next in the list. */
349 : : int invalid; /* Invalid for some reason. */
350 : : int index; /* If it maps to a bitmap index. */
351 : : unsigned int hash_index; /* Index when in a hash table. */
352 : : rtx reaching_reg; /* Register to use when re-writing. */
353 : : };
354 : :
355 : : /* Head of the list of load/store memory refs. */
356 : : static struct ls_expr * pre_ldst_mems = NULL;
357 : :
358 : : struct pre_ldst_expr_hasher : nofree_ptr_hash <ls_expr>
359 : : {
360 : : typedef value_type compare_type;
361 : : static inline hashval_t hash (const ls_expr *);
362 : : static inline bool equal (const ls_expr *, const ls_expr *);
363 : : };
364 : :
365 : : /* Hashtable helpers. */
366 : : inline hashval_t
367 : 97082075 : pre_ldst_expr_hasher::hash (const ls_expr *x)
368 : : {
369 : 97082075 : int do_not_record_p = 0;
370 : 97082075 : return
371 : 97082075 : hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
372 : : }
373 : :
374 : : static bool expr_equiv_p (const_rtx, const_rtx);
375 : :
376 : : inline bool
377 : 115698680 : pre_ldst_expr_hasher::equal (const ls_expr *ptr1,
378 : : const ls_expr *ptr2)
379 : : {
380 : 115698680 : return expr_equiv_p (ptr1->pattern, ptr2->pattern);
381 : : }
382 : :
383 : : /* Hashtable for the load/store memory refs. */
384 : : static hash_table<pre_ldst_expr_hasher> *pre_ldst_table;
385 : :
386 : : /* Bitmap containing one bit for each register in the program.
387 : : Used when performing GCSE to track which registers have been set since
388 : : the start of the basic block. */
389 : : static regset reg_set_bitmap;
390 : :
391 : : /* Array, indexed by basic block number for a list of insns which modify
392 : : memory within that block. */
393 : : static vec<rtx_insn *> *modify_mem_list;
394 : : static bitmap modify_mem_list_set;
395 : :
396 : : /* This array parallels modify_mem_list, except that it stores MEMs
397 : : being set and their canonicalized memory addresses. */
398 : : static vec<modify_pair> *canon_modify_mem_list;
399 : :
400 : : /* Bitmap indexed by block numbers to record which blocks contain
401 : : function calls. */
402 : : static bitmap blocks_with_calls;
403 : :
404 : : /* Various variables for statistics gathering. */
405 : :
406 : : /* Memory used in a pass.
407 : : This isn't intended to be absolutely precise. Its intent is only
408 : : to keep an eye on memory usage. */
409 : : static int bytes_used;
410 : :
411 : : /* GCSE substitutions made. */
412 : : static int gcse_subst_count;
413 : : /* Number of copy instructions created. */
414 : : static int gcse_create_count;
415 : :
416 : : /* Doing code hoisting. */
417 : : static bool doing_code_hoisting_p = false;
418 : :
419 : : /* Doing hardreg_pre. */
420 : : static bool doing_hardreg_pre_p = false;
421 : :
422 : : inline bool
423 : 21704822 : do_load_motion ()
424 : : {
425 : 21704780 : return flag_gcse_lm && !doing_hardreg_pre_p;
426 : : }
427 : :
428 : : static unsigned int current_hardreg_regno;
429 : :
430 : : /* For available exprs */
431 : : static sbitmap *ae_kill;
432 : :
433 : : /* Data stored for each basic block. */
434 : : struct bb_data
435 : : {
436 : : /* Maximal register pressure inside basic block for given register class
437 : : (defined only for the pressure classes). */
438 : : int max_reg_pressure[N_REG_CLASSES];
439 : : /* Recorded register pressure of basic block before trying to hoist
440 : : an expression. Will be used to restore the register pressure
441 : : if the expression should not be hoisted. */
442 : : int old_pressure;
443 : : /* Recorded register live_in info of basic block during code hoisting
444 : : process. BACKUP is used to record live_in info before trying to
445 : : hoist an expression, and will be used to restore LIVE_IN if the
446 : : expression should not be hoisted. */
447 : : bitmap live_in, backup;
448 : : };
449 : :
450 : : #define BB_DATA(bb) ((struct bb_data *) (bb)->aux)
451 : :
452 : : static basic_block curr_bb;
453 : :
454 : : /* Current register pressure for each pressure class. */
455 : : static int curr_reg_pressure[N_REG_CLASSES];
456 : :
457 : :
458 : : static void compute_can_copy (void);
459 : : static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
460 : : static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
461 : : static void *gcse_alloc (unsigned long);
462 : : static void alloc_gcse_mem (void);
463 : : static void free_gcse_mem (void);
464 : : static void hash_scan_insn (rtx_insn *, struct gcse_hash_table_d *);
465 : : static void hash_scan_set (rtx, rtx_insn *, struct gcse_hash_table_d *);
466 : : static void hash_scan_clobber (rtx, rtx_insn *, struct gcse_hash_table_d *);
467 : : static void hash_scan_call (rtx, rtx_insn *, struct gcse_hash_table_d *);
468 : : static bool oprs_unchanged_p (const_rtx, const rtx_insn *, bool);
469 : : static bool oprs_anticipatable_p (const_rtx, const rtx_insn *);
470 : : static bool oprs_available_p (const_rtx, const rtx_insn *);
471 : : static void insert_expr_in_table (rtx, machine_mode, rtx_insn *, bool, bool,
472 : : HOST_WIDE_INT, struct gcse_hash_table_d *);
473 : : static unsigned int hash_expr (const_rtx, machine_mode, int *, int);
474 : : static void record_last_reg_set_info (rtx_insn *, int);
475 : : static void record_last_mem_set_info (rtx_insn *);
476 : : static void record_last_set_info (rtx, const_rtx, void *);
477 : : static void compute_hash_table (struct gcse_hash_table_d *);
478 : : static void alloc_hash_table (struct gcse_hash_table_d *);
479 : : static void free_hash_table (struct gcse_hash_table_d *);
480 : : static void compute_hash_table_work (struct gcse_hash_table_d *);
481 : : static void dump_hash_table (FILE *, const char *, struct gcse_hash_table_d *);
482 : : static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
483 : : struct gcse_hash_table_d *);
484 : : static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
485 : : static bool load_killed_in_block_p (const_basic_block, int, const_rtx, bool);
486 : : static void alloc_pre_mem (int, int);
487 : : static void free_pre_mem (void);
488 : : static struct edge_list *compute_pre_data (void);
489 : : static bool pre_expr_reaches_here_p (basic_block, struct gcse_expr *,
490 : : basic_block);
491 : : static void insert_insn_end_basic_block (struct gcse_expr *, basic_block);
492 : : static void pre_insert_copy_insn (struct gcse_expr *, rtx_insn *);
493 : : static void pre_insert_copies (void);
494 : : static bool pre_delete (void);
495 : : static bool pre_gcse (struct edge_list *);
496 : : static bool one_pre_gcse_pass (void);
497 : : static void add_label_notes (rtx, rtx_insn *);
498 : : static void alloc_code_hoist_mem (int, int);
499 : : static void free_code_hoist_mem (void);
500 : : static void compute_code_hoist_vbeinout (void);
501 : : static void compute_code_hoist_data (void);
502 : : static bool should_hoist_expr_to_dom (basic_block, struct gcse_expr *,
503 : : basic_block,
504 : : sbitmap, HOST_WIDE_INT, int *,
505 : : enum reg_class,
506 : : int *, bitmap, rtx_insn *);
507 : : static bool hoist_code (void);
508 : : static enum reg_class get_regno_pressure_class (int regno, int *nregs);
509 : : static enum reg_class get_pressure_class_and_nregs (rtx_insn *insn, int *nregs);
510 : : static bool one_code_hoisting_pass (void);
511 : : static rtx_insn *process_insert_insn (struct gcse_expr *);
512 : : static bool pre_edge_insert (struct edge_list *, struct gcse_expr **);
513 : : static bool pre_expr_reaches_here_p_work (basic_block, struct gcse_expr *,
514 : : basic_block, char *);
515 : : static struct ls_expr * ldst_entry (rtx);
516 : : static void free_ldst_entry (struct ls_expr *);
517 : : static void free_ld_motion_mems (void);
518 : : static void print_ldst_list (FILE *);
519 : : static struct ls_expr * find_rtx_in_ldst (rtx);
520 : : static bool simple_mem (const_rtx);
521 : : static void invalidate_any_buried_refs (rtx);
522 : : static void compute_ld_motion_mems (void);
523 : : static void trim_ld_motion_mems (void);
524 : : static void update_ld_motion_stores (struct gcse_expr *);
525 : : static void clear_modify_mem_tables (void);
526 : : static void free_modify_mem_tables (void);
527 : :
528 : : #define GNEW(T) ((T *) gmalloc (sizeof (T)))
529 : : #define GCNEW(T) ((T *) gcalloc (1, sizeof (T)))
530 : :
531 : : #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N)))
532 : : #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T)))
533 : :
534 : : #define GNEWVAR(T, S) ((T *) gmalloc ((S)))
535 : : #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S)))
536 : :
537 : : #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
538 : : #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
539 : :
540 : : /* Misc. utilities. */
541 : :
542 : : #define can_copy \
543 : : (this_target_gcse->x_can_copy)
544 : : #define can_copy_init_p \
545 : : (this_target_gcse->x_can_copy_init_p)
546 : :
547 : : /* Compute which modes support reg/reg copy operations. */
548 : :
549 : : static void
550 : 83349 : compute_can_copy (void)
551 : : {
552 : 83349 : int i;
553 : : #ifndef AVOID_CCMODE_COPIES
554 : : rtx reg;
555 : : rtx_insn *insn;
556 : : #endif
557 : 83349 : memset (can_copy, 0, NUM_MACHINE_MODES);
558 : :
559 : 83349 : start_sequence ();
560 : 11002068 : for (i = 0; i < NUM_MACHINE_MODES; i++)
561 : 10835370 : if (GET_MODE_CLASS (i) == MODE_CC)
562 : : {
563 : : #ifdef AVOID_CCMODE_COPIES
564 : 1000188 : can_copy[i] = 0;
565 : : #else
566 : : reg = gen_rtx_REG ((machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
567 : : insn = emit_insn (gen_rtx_SET (reg, reg));
568 : : if (recog (PATTERN (insn), insn, NULL) >= 0)
569 : : can_copy[i] = 1;
570 : : #endif
571 : : }
572 : : else
573 : 9835182 : can_copy[i] = 1;
574 : :
575 : 83349 : end_sequence ();
576 : 83349 : }
577 : :
578 : : /* Returns whether the mode supports reg/reg copy operations. */
579 : :
580 : : bool
581 : 98606905 : can_copy_p (machine_mode mode)
582 : : {
583 : 98606905 : if (! can_copy_init_p)
584 : : {
585 : 83349 : compute_can_copy ();
586 : 83349 : can_copy_init_p = true;
587 : : }
588 : :
589 : 98606905 : return can_copy[mode] != 0;
590 : : }
591 : :
592 : : /* Cover function to xmalloc to record bytes allocated. */
593 : :
594 : : static void *
595 : 1000140 : gmalloc (size_t size)
596 : : {
597 : 1000140 : bytes_used += size;
598 : 0 : return xmalloc (size);
599 : : }
600 : :
601 : : /* Cover function to xcalloc to record bytes allocated. */
602 : :
603 : : static void *
604 : 1822324 : gcalloc (size_t nelem, size_t elsize)
605 : : {
606 : 1822324 : bytes_used += nelem * elsize;
607 : 1822324 : return xcalloc (nelem, elsize);
608 : : }
609 : :
610 : : /* Cover function to obstack_alloc. */
611 : :
612 : : static void *
613 : 25310890 : gcse_alloc (unsigned long size)
614 : : {
615 : 25310890 : bytes_used += size;
616 : 25310890 : return obstack_alloc (&gcse_obstack, size);
617 : : }
618 : :
619 : : /* Allocate memory for the reg/memory set tracking tables.
620 : : This is called at the start of each pass. */
621 : :
622 : : static void
623 : 500070 : alloc_gcse_mem (void)
624 : : {
625 : : /* Allocate vars to track sets of regs. */
626 : 500070 : reg_set_bitmap = ALLOC_REG_SET (NULL);
627 : :
628 : : /* Allocate array to keep a list of insns which modify memory in each
629 : : basic block. The two typedefs are needed to work around the
630 : : pre-processor limitation with template types in macro arguments. */
631 : 500070 : typedef vec<rtx_insn *> vec_rtx_heap;
632 : 500070 : typedef vec<modify_pair> vec_modify_pair_heap;
633 : 500070 : modify_mem_list = GCNEWVEC (vec_rtx_heap, last_basic_block_for_fn (cfun));
634 : 500070 : canon_modify_mem_list = GCNEWVEC (vec_modify_pair_heap,
635 : : last_basic_block_for_fn (cfun));
636 : 500070 : modify_mem_list_set = BITMAP_ALLOC (NULL);
637 : 500070 : blocks_with_calls = BITMAP_ALLOC (NULL);
638 : 500070 : }
639 : :
640 : : /* Free memory allocated by alloc_gcse_mem. */
641 : :
642 : : static void
643 : 500070 : free_gcse_mem (void)
644 : : {
645 : 500070 : FREE_REG_SET (reg_set_bitmap);
646 : :
647 : 500070 : free_modify_mem_tables ();
648 : 500070 : BITMAP_FREE (modify_mem_list_set);
649 : 500070 : BITMAP_FREE (blocks_with_calls);
650 : 500070 : }
651 : :
652 : : /* Compute the local properties of each recorded expression.
653 : :
654 : : Local properties are those that are defined by the block, irrespective of
655 : : other blocks.
656 : :
657 : : An expression is transparent in a block if its operands are not modified
658 : : in the block.
659 : :
660 : : An expression is computed (locally available) in a block if it is computed
661 : : at least once and expression would contain the same value if the
662 : : computation was moved to the end of the block.
663 : :
664 : : An expression is locally anticipatable in a block if it is computed at
665 : : least once and expression would contain the same value if the computation
666 : : was moved to the beginning of the block.
667 : :
668 : : We call this routine for pre and code hoisting. They all compute
669 : : basically the same information and thus can easily share this code.
670 : :
671 : : TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
672 : : properties. If NULL, then it is not necessary to compute or record that
673 : : particular property.
674 : :
675 : : TABLE controls which hash table to look at. */
676 : :
677 : : static void
678 : 435844 : compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
679 : : struct gcse_hash_table_d *table)
680 : : {
681 : 435844 : unsigned int i;
682 : :
683 : : /* Initialize any bitmaps that were passed in. */
684 : 435844 : if (transp)
685 : : {
686 : 435844 : bitmap_vector_ones (transp, last_basic_block_for_fn (cfun));
687 : : }
688 : :
689 : 435844 : if (comp)
690 : 435844 : bitmap_vector_clear (comp, last_basic_block_for_fn (cfun));
691 : 435844 : if (antloc)
692 : 435844 : bitmap_vector_clear (antloc, last_basic_block_for_fn (cfun));
693 : :
694 : 20934622 : for (i = 0; i < table->size; i++)
695 : : {
696 : 20498778 : struct gcse_expr *expr;
697 : :
698 : 30150494 : for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
699 : : {
700 : 9651716 : int indx = expr->bitmap_index;
701 : 9651716 : struct gcse_occr *occr;
702 : :
703 : : /* In most cases, the expression is transparent in the block if it is
704 : : not killed. The exception to this is during hardreg PRE, in which
705 : : uses of the hardreg prevent transparency but do not kill the
706 : : expression.
707 : :
708 : : We start by assuming all expressions are transparent [none are
709 : : killed], and then reset the bits for those that are. */
710 : 9651716 : if (transp)
711 : : {
712 : 9651716 : compute_transp (expr->expr, indx, transp,
713 : : blocks_with_calls,
714 : : modify_mem_list_set,
715 : : canon_modify_mem_list);
716 : :
717 : 9651716 : if (doing_hardreg_pre_p)
718 : : {
719 : : /* We also need to check whether the destination hardreg is
720 : : set or call-clobbered in each BB. We'll check for hardreg
721 : : uses later. */
722 : 0 : df_ref def;
723 : 0 : for (def = DF_REG_DEF_CHAIN (current_hardreg_regno);
724 : 0 : def;
725 : 0 : def = DF_REF_NEXT_REG (def))
726 : 0 : bitmap_clear_bit (transp[DF_REF_BB (def)->index], indx);
727 : : }
728 : : }
729 : :
730 : : /* The occurrences recorded in antic_occr are exactly those that
731 : : we want to set to nonzero in ANTLOC. */
732 : 9651716 : if (antloc)
733 : 16555460 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
734 : : {
735 : 6903744 : bitmap_set_bit (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
736 : :
737 : : /* While we're scanning the table, this is a good place to
738 : : initialize this. */
739 : 6903744 : occr->deleted_p = 0;
740 : : }
741 : :
742 : : /* The occurrences recorded in avail_occr are exactly those that
743 : : we want to set to nonzero in COMP. */
744 : 9651716 : if (comp)
745 : 18407146 : for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
746 : : {
747 : 8755430 : bitmap_set_bit (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
748 : :
749 : : /* While we're scanning the table, this is a good place to
750 : : initialize this. */
751 : 8755430 : occr->copied_p = 0;
752 : : }
753 : :
754 : : /* While we're scanning the table, this is a good place to
755 : : initialize this. */
756 : 9651716 : expr->reaching_reg = 0;
757 : : }
758 : : }
759 : 435844 : }
760 : :
761 : : /* A hardreg set is not transparent in a block if there are any uses of that
762 : : hardreg. This filters the results of compute_local_properties, after the
763 : : result of that function has been used to define the kills bitmap.
764 : :
765 : : TRANSP is the destination sbitmap to be updated.
766 : :
767 : : TABLE controls which hash table to look at. */
768 : :
769 : : static void
770 : 0 : prune_hardreg_uses (sbitmap *transp, struct gcse_hash_table_d *table)
771 : : {
772 : 0 : unsigned int i;
773 : 0 : gcc_assert (doing_hardreg_pre_p);
774 : :
775 : 0 : for (i = 0; i < table->size; i++)
776 : : {
777 : 0 : struct gcse_expr *expr;
778 : :
779 : 0 : for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
780 : : {
781 : 0 : int indx = expr->bitmap_index;
782 : 0 : df_ref def;
783 : :
784 : 0 : for (def = DF_REG_USE_CHAIN (current_hardreg_regno);
785 : 0 : def;
786 : 0 : def = DF_REF_NEXT_REG (def))
787 : 0 : bitmap_clear_bit (transp[DF_REF_BB (def)->index], indx);
788 : : }
789 : : }
790 : 0 : }
791 : :
792 : : /* Hash table support. */
793 : :
794 : : struct reg_avail_info
795 : : {
796 : : basic_block last_bb;
797 : : int first_set;
798 : : int last_set;
799 : : };
800 : :
801 : : static struct reg_avail_info *reg_avail_info;
802 : : static basic_block current_bb;
803 : :
804 : : /* See whether X, the source of a set, is something we want to consider for
805 : : GCSE. */
806 : :
807 : : static bool
808 : 21394999 : want_to_gcse_p (rtx x, machine_mode mode, HOST_WIDE_INT *max_distance_ptr)
809 : : {
810 : : #ifdef STACK_REGS
811 : : /* On register stack architectures, don't GCSE constants from the
812 : : constant pool, as the benefits are often swamped by the overhead
813 : : of shuffling the register stack between basic blocks. */
814 : 21394999 : if (IS_STACK_MODE (GET_MODE (x)))
815 : 268865 : x = avoid_constant_pool_reference (x);
816 : : #endif
817 : :
818 : : /* GCSE'ing constants:
819 : :
820 : : We do not specifically distinguish between constant and non-constant
821 : : expressions in PRE and Hoist. We use set_src_cost below to limit
822 : : the maximum distance simple expressions can travel.
823 : :
824 : : Nevertheless, constants are much easier to GCSE, and, hence,
825 : : it is easy to overdo the optimizations. Usually, excessive PRE and
826 : : Hoisting of constant leads to increased register pressure.
827 : :
828 : : RA can deal with this by rematerialing some of the constants.
829 : : Therefore, it is important that the back-end generates sets of constants
830 : : in a way that allows reload rematerialize them under high register
831 : : pressure, i.e., a pseudo register with REG_EQUAL to constant
832 : : is set only once. Failing to do so will result in IRA/reload
833 : : spilling such constants under high register pressure instead of
834 : : rematerializing them.
835 : :
836 : : For hardreg PRE, register pressure is not a concern, and we also want to
837 : : apply GCSE to simple moves. */
838 : :
839 : 21394999 : switch (GET_CODE (x))
840 : : {
841 : 3225798 : case REG:
842 : 3225798 : case SUBREG:
843 : 3225798 : return doing_hardreg_pre_p;
844 : :
845 : : case CALL:
846 : : return false;
847 : :
848 : 2661384 : CASE_CONST_ANY:
849 : 2661384 : if (doing_hardreg_pre_p)
850 : : return true;
851 : 2661384 : else if (!doing_code_hoisting_p)
852 : : /* Do not PRE constants. */
853 : : return false;
854 : :
855 : : /* FALLTHRU */
856 : :
857 : 15640107 : default:
858 : 15640107 : if (doing_code_hoisting_p)
859 : : /* PRE doesn't implement max_distance restriction. */
860 : : {
861 : 618940 : int cost;
862 : 618940 : HOST_WIDE_INT max_distance;
863 : :
864 : 618940 : gcc_assert (!optimize_function_for_speed_p (cfun)
865 : : && optimize_function_for_size_p (cfun));
866 : 618940 : cost = set_src_cost (x, mode, 0);
867 : :
868 : 618940 : if (cost < COSTS_N_INSNS (param_gcse_unrestricted_cost))
869 : : {
870 : 588140 : max_distance
871 : 588140 : = ((HOST_WIDE_INT)param_gcse_cost_distance_ratio * cost) / 10;
872 : 588140 : if (max_distance == 0)
873 : : return false;
874 : :
875 : 524866 : gcc_assert (max_distance > 0);
876 : : }
877 : : else
878 : : max_distance = 0;
879 : :
880 : 555666 : if (max_distance_ptr)
881 : 498189 : *max_distance_ptr = max_distance;
882 : : }
883 : :
884 : 15576833 : return can_assign_to_reg_without_clobbers_p (x, mode);
885 : : }
886 : : }
887 : :
888 : : /* Used internally by can_assign_to_reg_without_clobbers_p. */
889 : :
890 : : static GTY(()) rtx_insn *test_insn;
891 : :
892 : : /* Return true if we can assign X to a pseudo register of mode MODE
893 : : such that the resulting insn does not result in clobbering a hard
894 : : register as a side-effect.
895 : :
896 : : Additionally, if the target requires it, check that the resulting insn
897 : : can be copied. If it cannot, this means that X is special and probably
898 : : has hidden side-effects we don't want to mess with.
899 : :
900 : : This function is typically used by code motion passes, to verify
901 : : that it is safe to insert an insn without worrying about clobbering
902 : : maybe live hard regs. */
903 : :
904 : : bool
905 : 19839204 : can_assign_to_reg_without_clobbers_p (rtx x, machine_mode mode)
906 : : {
907 : 19839204 : int num_clobbers = 0;
908 : 19839204 : int icode;
909 : 19839204 : bool can_assign = false;
910 : :
911 : : /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
912 : 19839204 : if (general_operand (x, mode))
913 : : return true;
914 : 9524301 : else if (GET_MODE (x) == VOIDmode)
915 : : return false;
916 : :
917 : : /* Otherwise, check if we can make a valid insn from it. First initialize
918 : : our test insn if we haven't already. */
919 : 9524301 : if (test_insn == 0)
920 : : {
921 : 66947 : test_insn
922 : 66947 : = make_insn_raw (gen_rtx_SET (gen_rtx_REG (word_mode,
923 : : FIRST_PSEUDO_REGISTER * 2),
924 : : const0_rtx));
925 : 66947 : SET_NEXT_INSN (test_insn) = SET_PREV_INSN (test_insn) = 0;
926 : 66947 : INSN_LOCATION (test_insn) = UNKNOWN_LOCATION;
927 : : }
928 : :
929 : : /* Now make an insn like the one we would make when GCSE'ing and see if
930 : : valid. */
931 : 9524301 : PUT_MODE (SET_DEST (PATTERN (test_insn)), mode);
932 : 9524301 : SET_SRC (PATTERN (test_insn)) = x;
933 : :
934 : 9524301 : icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
935 : :
936 : : /* If the test insn is valid and doesn't need clobbers, and the target also
937 : : has no objections, we're good. */
938 : 9524301 : if (icode >= 0
939 : 8623596 : && (num_clobbers == 0 || !added_clobbers_hard_reg_p (icode))
940 : 16179614 : && ! (targetm.cannot_copy_insn_p
941 : 0 : && targetm.cannot_copy_insn_p (test_insn)))
942 : : can_assign = true;
943 : :
944 : : /* Make sure test_insn doesn't have any pointers into GC space. */
945 : 9524301 : SET_SRC (PATTERN (test_insn)) = NULL_RTX;
946 : :
947 : 9524301 : return can_assign;
948 : : }
949 : :
950 : : /* Return true if the operands of expression X are unchanged from the
951 : : start of INSN's basic block up to but not including INSN
952 : : (if AVAIL_P == false), or from INSN to the end of INSN's basic block
953 : : (if AVAIL_P == true). */
954 : :
955 : : static bool
956 : 70938788 : oprs_unchanged_p (const_rtx x, const rtx_insn *insn, bool avail_p)
957 : : {
958 : 70938788 : int i, j;
959 : 70938788 : enum rtx_code code;
960 : 70938788 : const char *fmt;
961 : :
962 : 70938788 : if (x == 0)
963 : : return true;
964 : :
965 : 70938788 : code = GET_CODE (x);
966 : 70938788 : switch (code)
967 : : {
968 : 21945405 : case REG:
969 : 21945405 : {
970 : 21945405 : struct reg_avail_info *info = ®_avail_info[REGNO (x)];
971 : :
972 : 21945405 : if (info->last_bb != current_bb)
973 : : return true;
974 : 10659180 : if (avail_p)
975 : 5673122 : return info->last_set < DF_INSN_LUID (insn);
976 : : else
977 : 4986058 : return info->first_set >= DF_INSN_LUID (insn);
978 : : }
979 : :
980 : 11537213 : case MEM:
981 : 11537213 : if (! do_load_motion ()
982 : 11537191 : || load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
983 : : x, avail_p))
984 : 3556550 : return false;
985 : : else
986 : 7980663 : return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
987 : :
988 : : case PRE_DEC:
989 : : case PRE_INC:
990 : : case POST_DEC:
991 : : case POST_INC:
992 : : case PRE_MODIFY:
993 : : case POST_MODIFY:
994 : : return false;
995 : :
996 : : case PC:
997 : : case CONST:
998 : : CASE_CONST_ANY:
999 : : case SYMBOL_REF:
1000 : : case LABEL_REF:
1001 : : case ADDR_VEC:
1002 : : case ADDR_DIFF_VEC:
1003 : : return true;
1004 : :
1005 : 20685257 : default:
1006 : 20685257 : break;
1007 : : }
1008 : :
1009 : 37902108 : for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1010 : : {
1011 : 37472315 : if (fmt[i] == 'e')
1012 : : {
1013 : : /* If we are about to do the last recursive call needed at this
1014 : : level, change it into iteration. This function is called enough
1015 : : to be worth it. */
1016 : 36336396 : if (i == 0)
1017 : 18756878 : return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1018 : :
1019 : 17579518 : else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1020 : : return false;
1021 : : }
1022 : 1135919 : else if (fmt[i] == 'E')
1023 : 1953484 : for (j = 0; j < XVECLEN (x, i); j++)
1024 : 1523748 : if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1025 : : return false;
1026 : : }
1027 : :
1028 : : return true;
1029 : : }
1030 : :
1031 : : /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */
1032 : :
1033 : : struct mem_conflict_info
1034 : : {
1035 : : /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
1036 : : see if a memory store conflicts with this memory load. */
1037 : : const_rtx mem;
1038 : :
1039 : : /* True if mems_conflict_for_gcse_p finds a conflict between two memory
1040 : : references. */
1041 : : bool conflict;
1042 : : };
1043 : :
1044 : : /* DEST is the output of an instruction. If it is a memory reference and
1045 : : possibly conflicts with the load found in DATA, then communicate this
1046 : : information back through DATA. */
1047 : :
1048 : : static void
1049 : 13055090 : mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
1050 : : void *data)
1051 : : {
1052 : 13055090 : struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
1053 : :
1054 : 13055090 : while (GET_CODE (dest) == SUBREG
1055 : 13055090 : || GET_CODE (dest) == ZERO_EXTRACT
1056 : 26110180 : || GET_CODE (dest) == STRICT_LOW_PART)
1057 : 0 : dest = XEXP (dest, 0);
1058 : :
1059 : : /* If DEST is not a MEM, then it will not conflict with the load. Note
1060 : : that function calls are assumed to clobber memory, but are handled
1061 : : elsewhere. */
1062 : 13055090 : if (! MEM_P (dest))
1063 : : return;
1064 : :
1065 : : /* If we are setting a MEM in our list of specially recognized MEMs,
1066 : : don't mark as killed this time. */
1067 : 25427891 : if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
1068 : : {
1069 : 103885 : if (!find_rtx_in_ldst (dest))
1070 : 39876 : mci->conflict = true;
1071 : 103885 : return;
1072 : : }
1073 : :
1074 : 12808168 : if (true_dependence (dest, GET_MODE (dest), mci->mem))
1075 : 1433657 : mci->conflict = true;
1076 : : }
1077 : :
1078 : : /* Return true if the expression in X (a memory reference) is killed
1079 : : in block BB before or after the insn with the LUID in UID_LIMIT.
1080 : : AVAIL_P is true for kills after UID_LIMIT, and zero for kills
1081 : : before UID_LIMIT.
1082 : :
1083 : : To check the entire block, set UID_LIMIT to max_uid + 1 and
1084 : : AVAIL_P to false. */
1085 : :
1086 : : static bool
1087 : 11537191 : load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
1088 : : bool avail_p)
1089 : : {
1090 : 11537191 : vec<rtx_insn *> list = modify_mem_list[bb->index];
1091 : 11537191 : rtx_insn *setter;
1092 : 11537191 : unsigned ix;
1093 : :
1094 : : /* If this is a readonly then we aren't going to be changing it. */
1095 : 11537191 : if (MEM_READONLY_P (x))
1096 : : return false;
1097 : :
1098 : 51066892 : FOR_EACH_VEC_ELT_REVERSE (list, ix, setter)
1099 : : {
1100 : 37675271 : struct mem_conflict_info mci;
1101 : :
1102 : : /* Ignore entries in the list that do not apply. */
1103 : 60355515 : if ((avail_p
1104 : 15560930 : && DF_INSN_LUID (setter) < uid_limit)
1105 : 37675271 : || (! avail_p
1106 : 22114341 : && DF_INSN_LUID (setter) > uid_limit))
1107 : 22680244 : continue;
1108 : :
1109 : : /* If SETTER is a call everything is clobbered. Note that calls
1110 : : to pure functions are never put on the list, so we need not
1111 : : worry about them. */
1112 : 14995027 : if (CALL_P (setter))
1113 : 3556528 : return true;
1114 : :
1115 : : /* SETTER must be an INSN of some kind that sets memory. Call
1116 : : note_stores to examine each hunk of memory that is modified. */
1117 : 12912030 : mci.mem = x;
1118 : 12912030 : mci.conflict = false;
1119 : 12912030 : note_stores (setter, mems_conflict_for_gcse_p, &mci);
1120 : 12912030 : if (mci.conflict)
1121 : : return true;
1122 : : }
1123 : : return false;
1124 : : }
1125 : :
1126 : : /* Return true if the operands of expression X are unchanged from
1127 : : the start of INSN's basic block up to but not including INSN. */
1128 : :
1129 : : static bool
1130 : 12548936 : oprs_anticipatable_p (const_rtx x, const rtx_insn *insn)
1131 : : {
1132 : 0 : return oprs_unchanged_p (x, insn, false);
1133 : : }
1134 : :
1135 : : /* Return true if the operands of expression X are unchanged from
1136 : : INSN to the end of INSN's basic block. */
1137 : :
1138 : : static bool
1139 : 12549045 : oprs_available_p (const_rtx x, const rtx_insn *insn)
1140 : : {
1141 : 0 : return oprs_unchanged_p (x, insn, true);
1142 : : }
1143 : :
1144 : : /* Hash expression X.
1145 : :
1146 : : MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1147 : : indicating if a volatile operand is found or if the expression contains
1148 : : something we don't want to insert in the table. HASH_TABLE_SIZE is
1149 : : the current size of the hash table to be probed. */
1150 : :
1151 : : static unsigned int
1152 : 12549045 : hash_expr (const_rtx x, machine_mode mode, int *do_not_record_p,
1153 : : int hash_table_size)
1154 : : {
1155 : 12549045 : unsigned int hash;
1156 : :
1157 : 12549045 : *do_not_record_p = 0;
1158 : :
1159 : 12549045 : hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1160 : 12549045 : return hash % hash_table_size;
1161 : : }
1162 : :
1163 : : /* Return true if exp1 is equivalent to exp2. */
1164 : :
1165 : : static bool
1166 : 141376720 : expr_equiv_p (const_rtx x, const_rtx y)
1167 : : {
1168 : 128214518 : return exp_equiv_p (x, y, 0, true);
1169 : : }
1170 : :
1171 : : /* Insert expression X in INSN in the hash TABLE.
1172 : : If it is already present, record it as the last occurrence in INSN's
1173 : : basic block.
1174 : :
1175 : : MODE is the mode of the value X is being stored into.
1176 : : It is only used if X is a CONST_INT.
1177 : :
1178 : : ANTIC_P is true if X is an anticipatable expression.
1179 : : AVAIL_P is true if X is an available expression.
1180 : :
1181 : : MAX_DISTANCE is the maximum distance in instructions this expression can
1182 : : be moved. */
1183 : :
1184 : : static void
1185 : 12549045 : insert_expr_in_table (rtx x, machine_mode mode, rtx_insn *insn,
1186 : : bool antic_p, bool avail_p, HOST_WIDE_INT max_distance,
1187 : : struct gcse_hash_table_d *table)
1188 : : {
1189 : 12549045 : bool found;
1190 : 12549045 : int do_not_record_p;
1191 : 12549045 : unsigned int hash;
1192 : 12549045 : struct gcse_expr *cur_expr, *last_expr = NULL;
1193 : 12549045 : struct gcse_occr *antic_occr, *avail_occr;
1194 : :
1195 : 12549045 : hash = hash_expr (x, mode, &do_not_record_p, table->size);
1196 : :
1197 : : /* Do not insert expression in table if it contains volatile operands,
1198 : : or if hash_expr determines the expression is something we don't want
1199 : : to or can't handle. */
1200 : 12549045 : if (do_not_record_p)
1201 : 301907 : return;
1202 : :
1203 : 12247138 : cur_expr = table->table[hash];
1204 : 12247138 : found = false;
1205 : :
1206 : 16119932 : while (cur_expr && (found = expr_equiv_p (cur_expr->expr, x)) == 0)
1207 : : {
1208 : : /* If the expression isn't found, save a pointer to the end of
1209 : : the list. */
1210 : 3872794 : last_expr = cur_expr;
1211 : 3872794 : cur_expr = cur_expr->next_same_hash;
1212 : : }
1213 : :
1214 : 12247138 : if (! found)
1215 : : {
1216 : 9651716 : cur_expr = GOBNEW (struct gcse_expr);
1217 : 9651716 : bytes_used += sizeof (struct gcse_expr);
1218 : 9651716 : if (table->table[hash] == NULL)
1219 : : /* This is the first pattern that hashed to this index. */
1220 : 7160591 : table->table[hash] = cur_expr;
1221 : : else
1222 : : /* Add EXPR to end of this hash chain. */
1223 : 2491125 : last_expr->next_same_hash = cur_expr;
1224 : :
1225 : : /* Set the fields of the expr element. */
1226 : 9651716 : cur_expr->expr = x;
1227 : 9651716 : cur_expr->bitmap_index = table->n_elems++;
1228 : 9651716 : cur_expr->next_same_hash = NULL;
1229 : 9651716 : cur_expr->antic_occr = NULL;
1230 : 9651716 : cur_expr->avail_occr = NULL;
1231 : 9651716 : gcc_assert (max_distance >= 0);
1232 : 9651716 : cur_expr->max_distance = max_distance;
1233 : : }
1234 : : else
1235 : 2595422 : gcc_assert (cur_expr->max_distance == max_distance);
1236 : :
1237 : : /* Now record the occurrence(s). */
1238 : 12247138 : if (antic_p)
1239 : : {
1240 : 6913680 : antic_occr = cur_expr->antic_occr;
1241 : :
1242 : 6913680 : if (antic_occr
1243 : 6913680 : && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1244 : : antic_occr = NULL;
1245 : :
1246 : 4969367 : if (antic_occr)
1247 : : /* Found another instance of the expression in the same basic block.
1248 : : Prefer the currently recorded one. We want the first one in the
1249 : : block and the block is scanned from start to end. */
1250 : : ; /* nothing to do */
1251 : : else
1252 : : {
1253 : : /* First occurrence of this expression in this basic block. */
1254 : 6903744 : antic_occr = GOBNEW (struct gcse_occr);
1255 : 6903744 : bytes_used += sizeof (struct gcse_occr);
1256 : 6903744 : antic_occr->insn = insn;
1257 : 6903744 : antic_occr->next = cur_expr->antic_occr;
1258 : 6903744 : antic_occr->deleted_p = 0;
1259 : 6903744 : cur_expr->antic_occr = antic_occr;
1260 : : }
1261 : : }
1262 : :
1263 : 12247138 : if (avail_p)
1264 : : {
1265 : 8766737 : avail_occr = cur_expr->avail_occr;
1266 : :
1267 : 8766737 : if (avail_occr
1268 : 8766737 : && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1269 : : {
1270 : : /* Found another instance of the expression in the same basic block.
1271 : : Prefer this occurrence to the currently recorded one. We want
1272 : : the last one in the block and the block is scanned from start
1273 : : to end. */
1274 : 11307 : avail_occr->insn = insn;
1275 : : }
1276 : : else
1277 : : {
1278 : : /* First occurrence of this expression in this basic block. */
1279 : 8755430 : avail_occr = GOBNEW (struct gcse_occr);
1280 : 8755430 : bytes_used += sizeof (struct gcse_occr);
1281 : 8755430 : avail_occr->insn = insn;
1282 : 8755430 : avail_occr->next = cur_expr->avail_occr;
1283 : 8755430 : avail_occr->deleted_p = 0;
1284 : 8755430 : cur_expr->avail_occr = avail_occr;
1285 : : }
1286 : : }
1287 : : }
1288 : :
1289 : : /* Scan SET present in INSN and add an entry to the hash TABLE. */
1290 : :
1291 : : static void
1292 : 46267942 : hash_scan_set (rtx set, rtx_insn *insn, struct gcse_hash_table_d *table)
1293 : : {
1294 : 46267942 : rtx src = SET_SRC (set);
1295 : 46267942 : rtx dest = SET_DEST (set);
1296 : 46267942 : rtx note;
1297 : :
1298 : 46267942 : if (GET_CODE (src) == CALL)
1299 : : hash_scan_call (src, insn, table);
1300 : :
1301 : 44678400 : else if (REG_P (dest))
1302 : : {
1303 : 33323543 : unsigned int regno = REGNO (dest);
1304 : 33323543 : HOST_WIDE_INT max_distance = 0;
1305 : :
1306 : : /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1307 : :
1308 : : This allows us to do a single GCSE pass and still eliminate
1309 : : redundant constants, addresses or other expressions that are
1310 : : constructed with multiple instructions.
1311 : :
1312 : : However, keep the original SRC if INSN is a simple reg-reg move.
1313 : : In this case, there will almost always be a REG_EQUAL note on the
1314 : : insn that sets SRC. By recording the REG_EQUAL value here as SRC
1315 : : for INSN, we miss copy propagation opportunities and we perform the
1316 : : same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1317 : : do more than one PRE GCSE pass.
1318 : :
1319 : : Note that this does not impede profitable constant propagations. We
1320 : : "look through" reg-reg sets in lookup_avail_set. */
1321 : 33323543 : note = find_reg_equal_equiv_note (insn);
1322 : 33323543 : if (note != 0
1323 : 2821491 : && REG_NOTE_KIND (note) == REG_EQUAL
1324 : 2678654 : && !REG_P (src)
1325 : 34820972 : && want_to_gcse_p (XEXP (note, 0), GET_MODE (dest), NULL))
1326 : 30987 : src = XEXP (note, 0), set = gen_rtx_SET (dest, src);
1327 : :
1328 : : /* Only record sets of pseudo-regs in the hash table, unless we're
1329 : : currently doing hardreg switching. */
1330 : 33323543 : if ((doing_hardreg_pre_p ? regno == current_hardreg_regno
1331 : : : regno >= FIRST_PSEUDO_REGISTER)
1332 : : /* Don't GCSE something if we can't do a reg/reg copy. */
1333 : 19979937 : && can_copy_p (GET_MODE (dest))
1334 : : /* GCSE commonly inserts instruction after the insn. We can't
1335 : : do that easily for EH edges so disable GCSE on these for now. */
1336 : : /* ??? We can now easily create new EH landing pads at the
1337 : : gimple level, for splitting edges; there's no reason we
1338 : : can't do the same thing at the rtl level. */
1339 : 19979937 : && !can_throw_internal (insn)
1340 : : /* Is SET_SRC something we want to gcse? */
1341 : 19897461 : && want_to_gcse_p (src, GET_MODE (dest), &max_distance)
1342 : : /* Don't CSE a nop. */
1343 : 12685036 : && ! set_noop_p (set)
1344 : : /* Don't GCSE if it has attached REG_EQUIV note.
1345 : : At this point this only function parameters should have
1346 : : REG_EQUIV notes and if the argument slot is used somewhere
1347 : : explicitly, it means address of parameter has been taken,
1348 : : so we should not extend the lifetime of the pseudo. */
1349 : 46008579 : && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1350 : : {
1351 : : /* An expression is not anticipatable if its operands are
1352 : : modified before this insn or if this is not the only SET in
1353 : : this insn. The latter condition does not have to mean that
1354 : : SRC itself is not anticipatable, but we just will not be
1355 : : able to handle code motion of insns with multiple sets. */
1356 : 12548936 : bool antic_p = (oprs_anticipatable_p (src, insn)
1357 : 12548936 : && !multiple_sets (insn));
1358 : 12548936 : if (doing_hardreg_pre_p)
1359 : : {
1360 : : /* An hardreg assignment is anticipatable only if the hardreg is
1361 : : neither set nor used prior to this assignment. */
1362 : 0 : auto info = reg_avail_info[current_hardreg_regno];
1363 : 0 : if ((info.last_bb == current_bb
1364 : 0 : && info.first_set < DF_INSN_LUID (insn))
1365 : 0 : || bitmap_bit_p (DF_LR_IN (current_bb),
1366 : : current_hardreg_regno))
1367 : : antic_p = false;
1368 : : }
1369 : :
1370 : : /* An expression is not available if its operands are
1371 : : subsequently modified, including this insn. It's also not
1372 : : available if this is a branch, because we can't insert
1373 : : a set after the branch. */
1374 : 12548936 : bool avail_p = (oprs_available_p (src, insn)
1375 : 12548936 : && ! JUMP_P (insn));
1376 : 12548936 : if (doing_hardreg_pre_p)
1377 : : {
1378 : : /* An hardreg assignment is only available if the hardreg is
1379 : : not set later in the BB. Uses of the hardreg are allowed. */
1380 : 0 : auto info = reg_avail_info[current_hardreg_regno];
1381 : 0 : if (info.last_bb == current_bb
1382 : 0 : && info.last_set > DF_INSN_LUID (insn))
1383 : : avail_p = false;
1384 : : }
1385 : :
1386 : 12548936 : insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1387 : : max_distance, table);
1388 : : }
1389 : : }
1390 : : /* In case of store we want to consider the memory value as available in
1391 : : the REG stored in that memory. This makes it possible to remove
1392 : : redundant loads from due to stores to the same location. */
1393 : 11354857 : else if (flag_gcse_las
1394 : 129 : && !doing_hardreg_pre_p
1395 : 129 : && REG_P (src)
1396 : 109 : && MEM_P (dest))
1397 : : {
1398 : 109 : unsigned int regno = REGNO (src);
1399 : 109 : HOST_WIDE_INT max_distance = 0;
1400 : :
1401 : : /* Only record sets of pseudo-regs in the hash table. */
1402 : 109 : if (regno >= FIRST_PSEUDO_REGISTER
1403 : : /* Don't GCSE something if we can't do a reg/reg copy. */
1404 : 109 : && can_copy_p (GET_MODE (src))
1405 : : /* GCSE commonly inserts instruction after the insn. We can't
1406 : : do that easily for EH edges so disable GCSE on these for now. */
1407 : 109 : && !can_throw_internal (insn)
1408 : : /* Is SET_DEST something we want to gcse? */
1409 : 109 : && want_to_gcse_p (dest, GET_MODE (dest), &max_distance)
1410 : : /* Don't CSE a nop. */
1411 : 109 : && ! set_noop_p (set)
1412 : : /* Don't GCSE if it has attached REG_EQUIV note.
1413 : : At this point this only function parameters should have
1414 : : REG_EQUIV notes and if the argument slot is used somewhere
1415 : : explicitly, it means address of parameter has been taken,
1416 : : so we should not extend the lifetime of the pseudo. */
1417 : 218 : && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1418 : 0 : || ! MEM_P (XEXP (note, 0))))
1419 : : {
1420 : : /* Stores are never anticipatable. */
1421 : 109 : bool antic_p = 0;
1422 : : /* An expression is not available if its operands are
1423 : : subsequently modified, including this insn. It's also not
1424 : : available if this is a branch, because we can't insert
1425 : : a set after the branch. */
1426 : 109 : bool avail_p = oprs_available_p (dest, insn) && ! JUMP_P (insn);
1427 : :
1428 : : /* Record the memory expression (DEST) in the hash table. */
1429 : 109 : insert_expr_in_table (dest, GET_MODE (dest), insn,
1430 : : antic_p, avail_p, max_distance, table);
1431 : : }
1432 : : }
1433 : 46267942 : }
1434 : :
1435 : : static void
1436 : 0 : hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
1437 : : struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
1438 : : {
1439 : : /* Currently nothing to do. */
1440 : 0 : }
1441 : :
1442 : : static void
1443 : 0 : hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
1444 : : struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
1445 : : {
1446 : : /* Currently nothing to do. */
1447 : 0 : }
1448 : :
1449 : : /* Process INSN and add hash table entries as appropriate. */
1450 : :
1451 : : static void
1452 : 48555842 : hash_scan_insn (rtx_insn *insn, struct gcse_hash_table_d *table)
1453 : : {
1454 : 48555842 : rtx pat = PATTERN (insn);
1455 : 48555842 : int i;
1456 : :
1457 : : /* Pick out the sets of INSN and for other forms of instructions record
1458 : : what's been modified. */
1459 : :
1460 : 48555842 : if (GET_CODE (pat) == SET)
1461 : 38129480 : hash_scan_set (pat, insn, table);
1462 : :
1463 : 10426362 : else if (GET_CODE (pat) == CLOBBER)
1464 : : hash_scan_clobber (pat, insn, table);
1465 : :
1466 : 10409577 : else if (GET_CODE (pat) == CALL)
1467 : : hash_scan_call (pat, insn, table);
1468 : :
1469 : 8309336 : else if (GET_CODE (pat) == PARALLEL)
1470 : 24029861 : for (i = 0; i < XVECLEN (pat, 0); i++)
1471 : : {
1472 : 16133180 : rtx x = XVECEXP (pat, 0, i);
1473 : :
1474 : 16133180 : if (GET_CODE (x) == SET)
1475 : 8138462 : hash_scan_set (x, insn, table);
1476 : : else if (GET_CODE (x) == CLOBBER)
1477 : : hash_scan_clobber (x, insn, table);
1478 : : else if (GET_CODE (x) == CALL)
1479 : : hash_scan_call (x, insn, table);
1480 : : }
1481 : 48555842 : }
1482 : :
1483 : : /* Dump the hash table TABLE to file FILE under the name NAME. */
1484 : :
1485 : : static void
1486 : 24 : dump_hash_table (FILE *file, const char *name, struct gcse_hash_table_d *table)
1487 : : {
1488 : 24 : int i;
1489 : : /* Flattened out table, so it's printed in proper order. */
1490 : 24 : struct gcse_expr **flat_table;
1491 : 24 : unsigned int *hash_val;
1492 : 24 : struct gcse_expr *expr;
1493 : :
1494 : 24 : flat_table = XCNEWVEC (struct gcse_expr *, table->n_elems);
1495 : 24 : hash_val = XNEWVEC (unsigned int, table->n_elems);
1496 : :
1497 : 424 : for (i = 0; i < (int) table->size; i++)
1498 : 640 : for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1499 : : {
1500 : 264 : flat_table[expr->bitmap_index] = expr;
1501 : 264 : hash_val[expr->bitmap_index] = i;
1502 : : }
1503 : :
1504 : 24 : fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1505 : : name, table->size, table->n_elems);
1506 : :
1507 : 312 : for (i = 0; i < (int) table->n_elems; i++)
1508 : 264 : if (flat_table[i] != 0)
1509 : : {
1510 : 264 : expr = flat_table[i];
1511 : 264 : fprintf (file, "Index %d (hash value %d; max distance "
1512 : : HOST_WIDE_INT_PRINT_DEC ")\n ",
1513 : 264 : expr->bitmap_index, hash_val[i], expr->max_distance);
1514 : 264 : print_rtl (file, expr->expr);
1515 : 264 : fprintf (file, "\n");
1516 : : }
1517 : :
1518 : 24 : fprintf (file, "\n");
1519 : :
1520 : 24 : free (flat_table);
1521 : 24 : free (hash_val);
1522 : 24 : }
1523 : :
1524 : : /* Record register first/last/block set information for REGNO in INSN.
1525 : :
1526 : : first_set records the first place in the block where the register
1527 : : is set and is used to compute "anticipatability".
1528 : :
1529 : : last_set records the last place in the block where the register
1530 : : is set and is used to compute "availability".
1531 : :
1532 : : last_bb records the block for which first_set and last_set are
1533 : : valid, as a quick test to invalidate them. */
1534 : :
1535 : : static void
1536 : 353119297 : record_last_reg_set_info (rtx_insn *insn, int regno)
1537 : : {
1538 : 353119297 : struct reg_avail_info *info = ®_avail_info[regno];
1539 : 353119297 : int luid = DF_INSN_LUID (insn);
1540 : :
1541 : 353119297 : info->last_set = luid;
1542 : 353119297 : if (info->last_bb != current_bb)
1543 : : {
1544 : 265083792 : info->last_bb = current_bb;
1545 : 265083792 : info->first_set = luid;
1546 : : }
1547 : 353119297 : }
1548 : :
1549 : : /* Record memory modification information for INSN. We do not actually care
1550 : : about the memory location(s) that are set, or even how they are set (consider
1551 : : a CALL_INSN). We merely need to record which insns modify memory. */
1552 : :
1553 : : static void
1554 : 8751954 : record_last_mem_set_info (rtx_insn *insn)
1555 : : {
1556 : 8751954 : if (! do_load_motion ())
1557 : : return;
1558 : :
1559 : 8751940 : record_last_mem_set_info_common (insn, modify_mem_list,
1560 : : canon_modify_mem_list,
1561 : : modify_mem_list_set,
1562 : : blocks_with_calls);
1563 : : }
1564 : :
1565 : : /* Called from compute_hash_table via note_stores to handle one
1566 : : SET or CLOBBER in an insn. DATA is really the instruction in which
1567 : : the SET is taking place. */
1568 : :
1569 : : static void
1570 : 54126581 : record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1571 : : {
1572 : 54126581 : rtx_insn *last_set_insn = (rtx_insn *) data;
1573 : :
1574 : 54126581 : if (GET_CODE (dest) == SUBREG)
1575 : 0 : dest = SUBREG_REG (dest);
1576 : :
1577 : 54126581 : if (REG_P (dest))
1578 : 42965527 : record_last_reg_set_info (last_set_insn, REGNO (dest));
1579 : 11161054 : else if (MEM_P (dest)
1580 : : /* Ignore pushes, they clobber nothing. */
1581 : 11161054 : && ! push_operand (dest, GET_MODE (dest)))
1582 : 5310418 : record_last_mem_set_info (last_set_insn);
1583 : 54126581 : }
1584 : :
1585 : : /* Top level function to create an expression hash table.
1586 : :
1587 : : Expression entries are placed in the hash table if
1588 : : - they are of the form (set (pseudo-reg) src),
1589 : : - src is something we want to perform GCSE on,
1590 : : - none of the operands are subsequently modified in the block
1591 : :
1592 : : Currently src must be a pseudo-reg or a const_int.
1593 : :
1594 : : TABLE is the table computed. */
1595 : :
1596 : : static void
1597 : 500070 : compute_hash_table_work (struct gcse_hash_table_d *table)
1598 : : {
1599 : 500070 : int i;
1600 : :
1601 : : /* re-Cache any INSN_LIST nodes we have allocated. */
1602 : 500070 : clear_modify_mem_tables ();
1603 : : /* Some working arrays used to track first and last set in each block. */
1604 : 500070 : reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1605 : :
1606 : 80006393 : for (i = 0; i < max_reg_num (); ++i)
1607 : 79506323 : reg_avail_info[i].last_bb = NULL;
1608 : :
1609 : 10010368 : FOR_EACH_BB_FN (current_bb, cfun)
1610 : : {
1611 : 9510298 : rtx_insn *insn;
1612 : 9510298 : unsigned int regno;
1613 : :
1614 : : /* First pass over the instructions records information used to
1615 : : determine when registers and memory are first and last set. */
1616 : 116374825 : FOR_BB_INSNS (current_bb, insn)
1617 : : {
1618 : 106864527 : if (!NONDEBUG_INSN_P (insn))
1619 : 58308685 : continue;
1620 : :
1621 : 48555842 : if (CALL_P (insn))
1622 : : {
1623 : 3809536 : hard_reg_set_iterator hrsi;
1624 : :
1625 : : /* We don't track modes of hard registers, so we need
1626 : : to be conservative and assume that partial kills
1627 : : are full kills. */
1628 : 3809536 : HARD_REG_SET callee_clobbers
1629 : 3809536 : = insn_callee_abi (insn).full_and_partial_reg_clobbers ();
1630 : 313963306 : EXECUTE_IF_SET_IN_HARD_REG_SET (callee_clobbers, 0, regno, hrsi)
1631 : 310153770 : record_last_reg_set_info (insn, regno);
1632 : :
1633 : 7480684 : if (! RTL_CONST_OR_PURE_CALL_P (insn)
1634 : 392998 : || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
1635 : 4179074 : || can_throw_external (insn))
1636 : 3441536 : record_last_mem_set_info (insn);
1637 : : }
1638 : :
1639 : 48555842 : note_stores (insn, record_last_set_info, insn);
1640 : : }
1641 : :
1642 : : /* The next pass builds the hash table. */
1643 : 116374825 : FOR_BB_INSNS (current_bb, insn)
1644 : 106864527 : if (NONDEBUG_INSN_P (insn))
1645 : 48555842 : hash_scan_insn (insn, table);
1646 : : }
1647 : :
1648 : 500070 : free (reg_avail_info);
1649 : 500070 : reg_avail_info = NULL;
1650 : 500070 : }
1651 : :
1652 : : /* Allocate space for the set/expr hash TABLE.
1653 : : It is used to determine the number of buckets to use. */
1654 : :
1655 : : static void
1656 : 500070 : alloc_hash_table (struct gcse_hash_table_d *table)
1657 : : {
1658 : 500070 : int n;
1659 : :
1660 : 500070 : n = get_max_insn_count ();
1661 : :
1662 : 500070 : table->size = n / 4;
1663 : 500070 : if (table->size < 11)
1664 : 190071 : table->size = 11;
1665 : :
1666 : : /* Attempt to maintain efficient use of hash table.
1667 : : Making it an odd number is simplest for now.
1668 : : ??? Later take some measurements. */
1669 : 500070 : table->size |= 1;
1670 : 500070 : n = table->size * sizeof (struct gcse_expr *);
1671 : 500070 : table->table = GNEWVAR (struct gcse_expr *, n);
1672 : 500070 : }
1673 : :
1674 : : /* Free things allocated by alloc_hash_table. */
1675 : :
1676 : : static void
1677 : 500070 : free_hash_table (struct gcse_hash_table_d *table)
1678 : : {
1679 : 500070 : free (table->table);
1680 : 0 : }
1681 : :
1682 : : /* Compute the expression hash table TABLE. */
1683 : :
1684 : : static void
1685 : 500070 : compute_hash_table (struct gcse_hash_table_d *table)
1686 : : {
1687 : : /* Initialize count of number of entries in hash table. */
1688 : 500070 : table->n_elems = 0;
1689 : 500070 : memset (table->table, 0, table->size * sizeof (struct gcse_expr *));
1690 : :
1691 : 500070 : compute_hash_table_work (table);
1692 : 500070 : }
1693 : :
1694 : : /* Expression tracking support. */
1695 : :
1696 : : /* Clear canon_modify_mem_list and modify_mem_list tables. */
1697 : : static void
1698 : 1000140 : clear_modify_mem_tables (void)
1699 : : {
1700 : 1000140 : unsigned i;
1701 : 1000140 : bitmap_iterator bi;
1702 : :
1703 : 4874672 : EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1704 : : {
1705 : 3874532 : modify_mem_list[i].release ();
1706 : 3874532 : canon_modify_mem_list[i].release ();
1707 : : }
1708 : 1000140 : bitmap_clear (modify_mem_list_set);
1709 : 1000140 : bitmap_clear (blocks_with_calls);
1710 : 1000140 : }
1711 : :
1712 : : /* Release memory used by modify_mem_list_set. */
1713 : :
1714 : : static void
1715 : 500070 : free_modify_mem_tables (void)
1716 : : {
1717 : 500070 : clear_modify_mem_tables ();
1718 : 500070 : free (modify_mem_list);
1719 : 500070 : free (canon_modify_mem_list);
1720 : 500070 : modify_mem_list = 0;
1721 : 500070 : canon_modify_mem_list = 0;
1722 : 500070 : }
1723 : :
1724 : : /* Compute PRE+LCM working variables. */
1725 : :
1726 : : /* Local properties of expressions. */
1727 : :
1728 : : /* Nonzero for expressions that are transparent in the block. */
1729 : : static sbitmap *transp;
1730 : :
1731 : : /* Nonzero for expressions that are computed (available) in the block. */
1732 : : static sbitmap *comp;
1733 : :
1734 : : /* Nonzero for expressions that are locally anticipatable in the block. */
1735 : : static sbitmap *antloc;
1736 : :
1737 : : /* Nonzero for expressions where this block is an optimal computation
1738 : : point. */
1739 : : static sbitmap *pre_optimal;
1740 : :
1741 : : /* Nonzero for expressions which are redundant in a particular block. */
1742 : : static sbitmap *pre_redundant;
1743 : :
1744 : : /* Nonzero for expressions which should be inserted on a specific edge. */
1745 : : static sbitmap *pre_insert_map;
1746 : :
1747 : : /* Nonzero for expressions which should be deleted in a specific block. */
1748 : : static sbitmap *pre_delete_map;
1749 : :
1750 : : /* Allocate vars used for PRE analysis. */
1751 : :
1752 : : static void
1753 : 411092 : alloc_pre_mem (int n_blocks, int n_exprs)
1754 : : {
1755 : 411092 : transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1756 : 411092 : comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1757 : 411092 : antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1758 : :
1759 : 411092 : pre_optimal = NULL;
1760 : 411092 : pre_redundant = NULL;
1761 : 411092 : pre_insert_map = NULL;
1762 : 411092 : pre_delete_map = NULL;
1763 : 411092 : ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1764 : :
1765 : : /* pre_insert and pre_delete are allocated later. */
1766 : 411092 : }
1767 : :
1768 : : /* Free vars used for PRE analysis. */
1769 : :
1770 : : static void
1771 : 411092 : free_pre_mem (void)
1772 : : {
1773 : 411092 : sbitmap_vector_free (transp);
1774 : 411092 : sbitmap_vector_free (comp);
1775 : :
1776 : : /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
1777 : :
1778 : 411092 : if (pre_optimal)
1779 : 0 : sbitmap_vector_free (pre_optimal);
1780 : 411092 : if (pre_redundant)
1781 : 0 : sbitmap_vector_free (pre_redundant);
1782 : 411092 : if (pre_insert_map)
1783 : 411092 : sbitmap_vector_free (pre_insert_map);
1784 : 411092 : if (pre_delete_map)
1785 : 411092 : sbitmap_vector_free (pre_delete_map);
1786 : :
1787 : 411092 : transp = comp = NULL;
1788 : 411092 : pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1789 : 411092 : }
1790 : :
1791 : : /* Remove certain expressions from anticipatable and transparent
1792 : : sets of basic blocks that have incoming abnormal edge.
1793 : : For PRE remove potentially trapping expressions to avoid placing
1794 : : them on abnormal edges. For hoisting remove memory references that
1795 : : can be clobbered by calls. */
1796 : :
1797 : : static void
1798 : 435844 : prune_expressions (bool pre_p)
1799 : : {
1800 : 435844 : struct gcse_expr *expr;
1801 : 435844 : unsigned int ui;
1802 : 435844 : basic_block bb;
1803 : :
1804 : 435844 : auto_sbitmap prune_exprs (expr_hash_table.n_elems);
1805 : 435844 : bitmap_clear (prune_exprs);
1806 : 20934622 : for (ui = 0; ui < expr_hash_table.size; ui++)
1807 : : {
1808 : 30150494 : for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1809 : : {
1810 : : /* Note potentially trapping expressions. */
1811 : 9651716 : if (may_trap_p (expr->expr))
1812 : : {
1813 : 2642352 : bitmap_set_bit (prune_exprs, expr->bitmap_index);
1814 : 2642352 : continue;
1815 : : }
1816 : :
1817 : 7009364 : if (!pre_p && contains_mem_rtx_p (expr->expr))
1818 : : /* Note memory references that can be clobbered by a call.
1819 : : We do not split abnormal edges in hoisting, so would
1820 : : a memory reference get hoisted along an abnormal edge,
1821 : : it would be placed /before/ the call. Therefore, only
1822 : : constant memory references can be hoisted along abnormal
1823 : : edges. */
1824 : : {
1825 : 36026 : rtx x = expr->expr;
1826 : :
1827 : : /* Common cases where we might find the MEM which may allow us
1828 : : to avoid pruning the expression. */
1829 : 36324 : while (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
1830 : 298 : x = XEXP (x, 0);
1831 : :
1832 : : /* If we found the MEM, go ahead and look at it to see if it has
1833 : : properties that allow us to avoid pruning its expression out
1834 : : of the tables. */
1835 : 36026 : if (MEM_P (x))
1836 : : {
1837 : 36699 : if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1838 : 35966 : && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)))
1839 : 733 : continue;
1840 : :
1841 : 36796 : if (MEM_READONLY_P (x)
1842 : 1563 : && !MEM_VOLATILE_P (x)
1843 : 36796 : && MEM_NOTRAP_P (x))
1844 : : /* Constant memory reference, e.g., a PIC address. */
1845 : 1563 : continue;
1846 : : }
1847 : :
1848 : : /* ??? Optimally, we would use interprocedural alias
1849 : : analysis to determine if this mem is actually killed
1850 : : by this call. */
1851 : :
1852 : 33730 : bitmap_set_bit (prune_exprs, expr->bitmap_index);
1853 : : }
1854 : : }
1855 : : }
1856 : :
1857 : 9628147 : FOR_EACH_BB_FN (bb, cfun)
1858 : : {
1859 : 9192303 : edge e;
1860 : 9192303 : edge_iterator ei;
1861 : :
1862 : : /* If the current block is the destination of an abnormal edge, we
1863 : : kill all trapping (for PRE) and memory (for hoist) expressions
1864 : : because we won't be able to properly place the instruction on
1865 : : the edge. So make them neither anticipatable nor transparent.
1866 : : This is fairly conservative.
1867 : :
1868 : : ??? For hoisting it may be necessary to check for set-and-jump
1869 : : instructions here, not just for abnormal edges. The general problem
1870 : : is that when an expression cannot not be placed right at the end of
1871 : : a basic block we should account for any side-effects of a subsequent
1872 : : jump instructions that could clobber the expression. It would
1873 : : be best to implement this check along the lines of
1874 : : should_hoist_expr_to_dom where the target block is already known
1875 : : and, hence, there's no need to conservatively prune expressions on
1876 : : "intermediate" set-and-jump instructions. */
1877 : 22151712 : FOR_EACH_EDGE (e, ei, bb->preds)
1878 : 13152806 : if ((e->flags & EDGE_ABNORMAL)
1879 : 193485 : && (pre_p || CALL_P (BB_END (e->src))))
1880 : : {
1881 : 193397 : bitmap_and_compl (antloc[bb->index],
1882 : 193397 : antloc[bb->index], prune_exprs);
1883 : 193397 : bitmap_and_compl (transp[bb->index],
1884 : 193397 : transp[bb->index], prune_exprs);
1885 : 193397 : break;
1886 : : }
1887 : : }
1888 : 435844 : }
1889 : :
1890 : : /* It may be necessary to insert a large number of insns on edges to
1891 : : make the existing occurrences of expressions fully redundant. This
1892 : : routine examines the set of insertions and deletions and if the ratio
1893 : : of insertions to deletions is too high for a particular expression, then
1894 : : the expression is removed from the insertion/deletion sets.
1895 : :
1896 : : N_ELEMS is the number of elements in the hash table. */
1897 : :
1898 : : static void
1899 : 411092 : prune_insertions_deletions (int n_elems)
1900 : : {
1901 : 411092 : sbitmap_iterator sbi;
1902 : :
1903 : : /* We always use I to iterate over blocks/edges and J to iterate over
1904 : : expressions. */
1905 : 411092 : unsigned int i, j;
1906 : :
1907 : : /* Counts for the number of times an expression needs to be inserted and
1908 : : number of times an expression can be removed as a result. */
1909 : 411092 : int *insertions = GCNEWVEC (int, n_elems);
1910 : 411092 : int *deletions = GCNEWVEC (int, n_elems);
1911 : :
1912 : : /* Set of expressions which require too many insertions relative to
1913 : : the number of deletions achieved. We will prune these out of the
1914 : : insertion/deletion sets. */
1915 : 411092 : auto_sbitmap prune_exprs (n_elems);
1916 : 411092 : bitmap_clear (prune_exprs);
1917 : :
1918 : : /* Iterate over the edges counting the number of times each expression
1919 : : needs to be inserted. */
1920 : 14859211 : for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
1921 : : {
1922 : 28412498 : EXECUTE_IF_SET_IN_BITMAP (pre_insert_map[i], 0, j, sbi)
1923 : 338444 : insertions[j]++;
1924 : : }
1925 : :
1926 : : /* Similarly for deletions, but those occur in blocks rather than on
1927 : : edges. */
1928 : 10910820 : for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
1929 : : {
1930 : 21973064 : EXECUTE_IF_SET_IN_BITMAP (pre_delete_map[i], 0, j, sbi)
1931 : 973608 : deletions[j]++;
1932 : : }
1933 : :
1934 : : /* Now that we have accurate counts, iterate over the elements in the
1935 : : hash table and see if any need too many insertions relative to the
1936 : : number of evaluations that can be removed. If so, mark them in
1937 : : PRUNE_EXPRS. */
1938 : 9782314 : for (j = 0; j < (unsigned) n_elems; j++)
1939 : 9371222 : if (deletions[j]
1940 : 367549 : && (insertions[j] / deletions[j]) > param_max_gcse_insertion_ratio)
1941 : 559 : bitmap_set_bit (prune_exprs, j);
1942 : :
1943 : : /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
1944 : 822743 : EXECUTE_IF_SET_IN_BITMAP (prune_exprs, 0, j, sbi)
1945 : : {
1946 : 308729 : for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
1947 : 308170 : bitmap_clear_bit (pre_insert_map[i], j);
1948 : :
1949 : 207079 : for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
1950 : 206520 : bitmap_clear_bit (pre_delete_map[i], j);
1951 : : }
1952 : :
1953 : 411092 : free (insertions);
1954 : 411092 : free (deletions);
1955 : 411092 : }
1956 : :
1957 : : /* Top level routine to do the dataflow analysis needed by PRE. */
1958 : :
1959 : : static struct edge_list *
1960 : 411092 : compute_pre_data (void)
1961 : : {
1962 : 411092 : struct edge_list *edge_list;
1963 : 411092 : basic_block bb;
1964 : :
1965 : 411092 : compute_local_properties (transp, comp, antloc, &expr_hash_table);
1966 : 411092 : prune_expressions (true);
1967 : 411092 : bitmap_vector_clear (ae_kill, last_basic_block_for_fn (cfun));
1968 : :
1969 : : /* Compute ae_kill for each basic block using:
1970 : :
1971 : : ~(TRANSP | COMP)
1972 : : */
1973 : :
1974 : 9297522 : FOR_EACH_BB_FN (bb, cfun)
1975 : : {
1976 : 8886430 : bitmap_ior (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1977 : 8886430 : bitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1978 : : }
1979 : :
1980 : 411092 : if (doing_hardreg_pre_p)
1981 : 0 : prune_hardreg_uses (transp, &expr_hash_table);
1982 : :
1983 : 411092 : edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1984 : : ae_kill, &pre_insert_map, &pre_delete_map);
1985 : 411092 : sbitmap_vector_free (antloc);
1986 : 411092 : antloc = NULL;
1987 : 411092 : sbitmap_vector_free (ae_kill);
1988 : 411092 : ae_kill = NULL;
1989 : :
1990 : 411092 : prune_insertions_deletions (expr_hash_table.n_elems);
1991 : :
1992 : 411092 : return edge_list;
1993 : : }
1994 : :
1995 : : /* PRE utilities */
1996 : :
1997 : : /* Return true if an occurrence of expression EXPR in OCCR_BB would reach
1998 : : block BB.
1999 : :
2000 : : VISITED is a pointer to a working buffer for tracking which BB's have
2001 : : been visited. It is NULL for the top-level call.
2002 : :
2003 : : We treat reaching expressions that go through blocks containing the same
2004 : : reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2005 : : 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2006 : : 2 as not reaching. The intent is to improve the probability of finding
2007 : : only one reaching expression and to reduce register lifetimes by picking
2008 : : the closest such expression. */
2009 : :
2010 : : static bool
2011 : 18308544 : pre_expr_reaches_here_p_work (basic_block occr_bb, struct gcse_expr *expr,
2012 : : basic_block bb, char *visited)
2013 : : {
2014 : 18308544 : edge pred;
2015 : 18308544 : edge_iterator ei;
2016 : :
2017 : 38993148 : FOR_EACH_EDGE (pred, ei, bb->preds)
2018 : : {
2019 : 23217688 : basic_block pred_bb = pred->src;
2020 : :
2021 : 23217688 : if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
2022 : : /* Has predecessor has already been visited? */
2023 : 22938155 : || visited[pred_bb->index])
2024 : : ;/* Nothing to do. */
2025 : :
2026 : : /* Does this predecessor generate this expression? */
2027 : 19196514 : else if (bitmap_bit_p (comp[pred_bb->index], expr->bitmap_index))
2028 : : {
2029 : : /* Is this the occurrence we're looking for?
2030 : : Note that there's only one generating occurrence per block
2031 : : so we just need to check the block number. */
2032 : 2224113 : if (occr_bb == pred_bb)
2033 : : return true;
2034 : :
2035 : 1911763 : visited[pred_bb->index] = 1;
2036 : : }
2037 : : /* Ignore this predecessor if it kills the expression.
2038 : :
2039 : : If this were used for hardreg pre, then it would need to use the kills
2040 : : bitmap. */
2041 : 16972401 : else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
2042 : 171571 : visited[pred_bb->index] = 1;
2043 : :
2044 : : /* Neither gen nor kill. */
2045 : : else
2046 : : {
2047 : 16800830 : visited[pred_bb->index] = 1;
2048 : 16800830 : if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2049 : : return true;
2050 : : }
2051 : : }
2052 : :
2053 : : /* All paths have been checked. */
2054 : : return false;
2055 : : }
2056 : :
2057 : : /* The wrapper for pre_expr_reaches_here_work that ensures that any
2058 : : memory allocated for that function is returned. */
2059 : :
2060 : : static bool
2061 : 1507714 : pre_expr_reaches_here_p (basic_block occr_bb, struct gcse_expr *expr,
2062 : : basic_block bb)
2063 : : {
2064 : 1507714 : bool rval;
2065 : 1507714 : char *visited = XCNEWVEC (char, last_basic_block_for_fn (cfun));
2066 : :
2067 : 1507714 : rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2068 : :
2069 : 1507714 : free (visited);
2070 : 1507714 : return rval;
2071 : : }
2072 : :
2073 : : /* Generate RTL to copy an EXP to REG and return it. */
2074 : :
2075 : : rtx_insn *
2076 : 316170 : prepare_copy_insn (rtx reg, rtx exp)
2077 : : {
2078 : 316170 : rtx_insn *pat;
2079 : :
2080 : 316170 : start_sequence ();
2081 : :
2082 : : /* If the expression is something that's an operand, like a constant,
2083 : : just copy it to a register. */
2084 : 316170 : if (general_operand (exp, GET_MODE (reg)))
2085 : 97878 : emit_move_insn (reg, exp);
2086 : :
2087 : : /* Otherwise, make a new insn to compute this expression and make sure the
2088 : : insn will be recognized (this also adds any needed CLOBBERs). */
2089 : : else
2090 : : {
2091 : 218292 : rtx_insn *insn = emit_insn (gen_rtx_SET (reg, exp));
2092 : :
2093 : 218292 : if (insn_invalid_p (insn, false))
2094 : 0 : gcc_unreachable ();
2095 : : }
2096 : :
2097 : 316170 : pat = get_insns ();
2098 : 316170 : end_sequence ();
2099 : :
2100 : 316170 : return pat;
2101 : : }
2102 : :
2103 : : /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */
2104 : :
2105 : : static rtx_insn *
2106 : 316149 : process_insert_insn (struct gcse_expr *expr)
2107 : : {
2108 : 316149 : rtx reg = expr->reaching_reg;
2109 : : /* Copy the expression to make sure we don't have any sharing issues. */
2110 : 316149 : rtx exp = copy_rtx (expr->expr);
2111 : :
2112 : 316149 : return prepare_copy_insn (reg, exp);
2113 : : }
2114 : :
2115 : : /* Return the INSN which is added at the end of the block BB with
2116 : : same instruction pattern with PAT. */
2117 : :
2118 : : rtx_insn *
2119 : 65724 : insert_insn_end_basic_block (rtx_insn *pat, basic_block bb)
2120 : : {
2121 : 65724 : rtx_insn *insn = BB_END (bb);
2122 : 65724 : rtx_insn *new_insn;
2123 : 65724 : rtx_insn *pat_end;
2124 : :
2125 : 65724 : gcc_assert (pat && INSN_P (pat));
2126 : :
2127 : : pat_end = pat;
2128 : 66263 : while (NEXT_INSN (pat_end) != NULL_RTX)
2129 : : pat_end = NEXT_INSN (pat_end);
2130 : :
2131 : : /* If the last insn is a jump, insert EXPR in front. Similarly we need to
2132 : : take care of trapping instructions in presence of non-call exceptions. */
2133 : :
2134 : 65724 : if (JUMP_P (insn)
2135 : 65724 : || (NONJUMP_INSN_P (insn)
2136 : 17890 : && (!single_succ_p (bb)
2137 : 3113 : || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2138 : : {
2139 : : /* FIXME: What if something in jump uses value set in new insn? */
2140 : 23685 : new_insn = emit_insn_before_noloc (pat, insn, bb);
2141 : : }
2142 : :
2143 : : /* Likewise if the last insn is a call, as will happen in the presence
2144 : : of exception handling. */
2145 : 42039 : else if (CALL_P (insn)
2146 : 80961 : && (!single_succ_p (bb)
2147 : 4917 : || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2148 : : {
2149 : : /* Keeping in mind targets with small register classes and parameters
2150 : : in registers, we search backward and place the instructions before
2151 : : the first parameter is loaded. Do this for everyone for consistency
2152 : : and a presumption that we'll get better code elsewhere as well. */
2153 : :
2154 : : /* Since different machines initialize their parameter registers
2155 : : in different orders, assume nothing. Collect the set of all
2156 : : parameter registers. */
2157 : 38847 : insn = find_first_parameter_load (insn, BB_HEAD (bb));
2158 : :
2159 : : /* If we found all the parameter loads, then we want to insert
2160 : : before the first parameter load.
2161 : :
2162 : : If we did not find all the parameter loads, then we might have
2163 : : stopped on the head of the block, which could be a CODE_LABEL.
2164 : : If we inserted before the CODE_LABEL, then we would be putting
2165 : : the insn in the wrong basic block. In that case, put the insn
2166 : : after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
2167 : 38847 : while (LABEL_P (insn)
2168 : 38847 : || NOTE_INSN_BASIC_BLOCK_P (insn))
2169 : 0 : insn = NEXT_INSN (insn);
2170 : :
2171 : 38847 : new_insn = emit_insn_before_noloc (pat, insn, bb);
2172 : : }
2173 : : else
2174 : 3192 : new_insn = emit_insn_after_noloc (pat, insn, bb);
2175 : :
2176 : 539 : while (1)
2177 : : {
2178 : 66263 : if (INSN_P (pat))
2179 : 66263 : add_label_notes (PATTERN (pat), new_insn);
2180 : 66263 : if (pat == pat_end)
2181 : : break;
2182 : 539 : pat = NEXT_INSN (pat);
2183 : : }
2184 : 65724 : return new_insn;
2185 : : }
2186 : :
2187 : : /* Add EXPR to the end of basic block BB.
2188 : :
2189 : : This is used by both the PRE and code hoisting. */
2190 : :
2191 : : static void
2192 : 65724 : insert_insn_end_basic_block (struct gcse_expr *expr, basic_block bb)
2193 : : {
2194 : 65724 : rtx reg = expr->reaching_reg;
2195 : 65724 : int regno = REGNO (reg);
2196 : :
2197 : 65724 : rtx_insn *insn = process_insert_insn (expr);
2198 : 65724 : rtx_insn *new_insn = insert_insn_end_basic_block (insn, bb);
2199 : :
2200 : 65724 : gcse_create_count++;
2201 : :
2202 : 65724 : if (dump_file)
2203 : : {
2204 : 3 : fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2205 : 3 : bb->index, INSN_UID (new_insn));
2206 : 3 : fprintf (dump_file, "copying expression %d to reg %d\n",
2207 : : expr->bitmap_index, regno);
2208 : : }
2209 : 65724 : }
2210 : :
2211 : : /* Return the INSN which is added at the start of the block BB with
2212 : : same instruction pattern with PAT. */
2213 : :
2214 : : rtx_insn *
2215 : 0 : insert_insn_start_basic_block (rtx_insn *pat, basic_block bb)
2216 : : {
2217 : 0 : rtx_insn *insn = BB_HEAD (bb);
2218 : 0 : rtx_insn *next_insn;
2219 : :
2220 : 0 : gcc_assert (pat && INSN_P (pat));
2221 : :
2222 : : /* Insert after the last initial CODE_LABEL or NOTE_INSN_BASIC_BLOCK, before
2223 : : any other instructions. */
2224 : 0 : while ((next_insn = NEXT_INSN (insn))
2225 : 0 : && (LABEL_P (next_insn) || NOTE_INSN_BASIC_BLOCK_P (insn)))
2226 : : insn = next_insn;
2227 : :
2228 : 0 : rtx_insn *new_insn = emit_insn_after_noloc (pat, insn, bb);
2229 : :
2230 : 0 : while (pat != NULL_RTX)
2231 : : {
2232 : 0 : if (INSN_P (pat))
2233 : 0 : add_label_notes (PATTERN (pat), new_insn);
2234 : 0 : pat = NEXT_INSN (pat);
2235 : : }
2236 : :
2237 : 0 : return new_insn;
2238 : : }
2239 : :
2240 : : /* Add EXPR to the start of basic block BB.
2241 : :
2242 : : This is used by hardreg PRE. */
2243 : :
2244 : : static void
2245 : 0 : insert_insn_start_basic_block (struct gcse_expr *expr, basic_block bb)
2246 : : {
2247 : 0 : rtx reg = expr->reaching_reg;
2248 : 0 : int regno = REGNO (reg);
2249 : :
2250 : 0 : rtx_insn *insn = process_insert_insn (expr);
2251 : 0 : rtx_insn *new_insn = insert_insn_start_basic_block (insn, bb);
2252 : :
2253 : 0 : gcse_create_count++;
2254 : :
2255 : 0 : if (dump_file)
2256 : : {
2257 : 0 : fprintf (dump_file, "hardreg PRE: start of bb %d, insn %d, ",
2258 : 0 : bb->index, INSN_UID (new_insn));
2259 : 0 : fprintf (dump_file, "copying expression %d to reg %d\n",
2260 : : expr->bitmap_index, regno);
2261 : : }
2262 : 0 : }
2263 : :
2264 : : /* Insert partially redundant expressions on edges in the CFG to make
2265 : : the expressions fully redundant. */
2266 : :
2267 : : static bool
2268 : 411092 : pre_edge_insert (struct edge_list *edge_list, struct gcse_expr **index_map)
2269 : : {
2270 : 411092 : int e, i, j, num_edges, set_size;
2271 : 411092 : bool did_insert = false;
2272 : 411092 : sbitmap *inserted;
2273 : :
2274 : : /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2275 : : if it reaches any of the deleted expressions. */
2276 : :
2277 : 411092 : set_size = pre_insert_map[0]->size;
2278 : 411092 : num_edges = NUM_EDGES (edge_list);
2279 : 411092 : inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2280 : 411092 : bitmap_vector_clear (inserted, num_edges);
2281 : :
2282 : 14448119 : for (e = 0; e < num_edges; e++)
2283 : : {
2284 : 14037027 : int indx;
2285 : 14037027 : basic_block pred_bb = INDEX_EDGE_PRED_BB (edge_list, e);
2286 : 14037027 : basic_block succ_bb = INDEX_EDGE_SUCC_BB (edge_list, e);
2287 : :
2288 : 50360904 : for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2289 : : {
2290 : 36323877 : SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2291 : :
2292 : 36323877 : for (j = indx;
2293 : 41124793 : insert && j < (int) expr_hash_table.n_elems;
2294 : 4800916 : j++, insert >>= 1)
2295 : 4800916 : if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2296 : : {
2297 : 303728 : struct gcse_expr *expr = index_map[j];
2298 : 303728 : struct gcse_occr *occr;
2299 : :
2300 : : /* Now look at each deleted occurrence of this expression. */
2301 : 1910571 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2302 : : {
2303 : 1606843 : if (! occr->deleted_p)
2304 : 615733 : continue;
2305 : :
2306 : : /* Insert this expression on this edge if it would
2307 : : reach the deleted occurrence in BB. */
2308 : 991110 : if (!bitmap_bit_p (inserted[e], j))
2309 : : {
2310 : 303728 : rtx_insn *insn;
2311 : 303728 : edge eg = INDEX_EDGE (edge_list, e);
2312 : :
2313 : : /* We can't insert anything on an abnormal and
2314 : : critical edge, so we insert the insn at the end of
2315 : : the previous block. There are several alternatives
2316 : : detailed in Morgans book P277 (sec 10.5) for
2317 : : handling this situation. This one is easiest for
2318 : : now.
2319 : :
2320 : : For hardreg PRE this would add an unwanted clobber
2321 : : of the hardreg, so we instead insert in the
2322 : : successor block. This may be partially redundant,
2323 : : but it is at least correct. */
2324 : 303728 : if (eg->flags & EDGE_ABNORMAL)
2325 : : {
2326 : 53303 : if (doing_hardreg_pre_p)
2327 : 0 : insert_insn_start_basic_block (index_map[j],
2328 : : succ_bb);
2329 : : else
2330 : 53303 : insert_insn_end_basic_block (index_map[j],
2331 : : pred_bb);
2332 : : }
2333 : : else
2334 : : {
2335 : 250425 : insn = process_insert_insn (index_map[j]);
2336 : 250425 : insert_insn_on_edge (insn, eg);
2337 : : }
2338 : :
2339 : 303728 : if (dump_file)
2340 : : {
2341 : 8 : fprintf (dump_file, "PRE: edge (%d,%d), ",
2342 : : pred_bb->index,
2343 : : succ_bb->index);
2344 : 8 : fprintf (dump_file, "copy expression %d\n",
2345 : : expr->bitmap_index);
2346 : : }
2347 : :
2348 : 303728 : update_ld_motion_stores (expr);
2349 : 303728 : bitmap_set_bit (inserted[e], j);
2350 : 303728 : did_insert = true;
2351 : 303728 : gcse_create_count++;
2352 : : }
2353 : : }
2354 : : }
2355 : : }
2356 : : }
2357 : :
2358 : 411092 : sbitmap_vector_free (inserted);
2359 : 411092 : return did_insert;
2360 : : }
2361 : :
2362 : : /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2363 : : Given "old_reg <- expr" (INSN), instead of adding after it
2364 : : reaching_reg <- old_reg
2365 : : it's better to do the following:
2366 : : reaching_reg <- expr
2367 : : old_reg <- reaching_reg
2368 : : because this way copy propagation can discover additional PRE
2369 : : opportunities. But if this fails, we try the old way.
2370 : : When "expr" is a store, i.e.
2371 : : given "MEM <- old_reg", instead of adding after it
2372 : : reaching_reg <- old_reg
2373 : : it's better to add it before as follows:
2374 : : reaching_reg <- old_reg
2375 : : MEM <- reaching_reg. */
2376 : :
2377 : : static void
2378 : 312350 : pre_insert_copy_insn (struct gcse_expr *expr, rtx_insn *insn)
2379 : : {
2380 : 312350 : rtx reg = expr->reaching_reg;
2381 : 312350 : int regno = REGNO (reg);
2382 : 312350 : int indx = expr->bitmap_index;
2383 : 312350 : rtx pat = PATTERN (insn);
2384 : 312350 : rtx set, first_set;
2385 : 312350 : rtx_insn *new_insn;
2386 : 312350 : rtx old_reg;
2387 : 312350 : int i;
2388 : :
2389 : : /* This block matches the logic in hash_scan_insn. */
2390 : 312350 : switch (GET_CODE (pat))
2391 : : {
2392 : : case SET:
2393 : : set = pat;
2394 : : break;
2395 : :
2396 : : case PARALLEL:
2397 : : /* Search through the parallel looking for the set whose
2398 : : source was the expression that we're interested in. */
2399 : : first_set = NULL_RTX;
2400 : 216751 : set = NULL_RTX;
2401 : 216751 : for (i = 0; i < XVECLEN (pat, 0); i++)
2402 : : {
2403 : 216669 : rtx x = XVECEXP (pat, 0, i);
2404 : 216669 : if (GET_CODE (x) == SET)
2405 : : {
2406 : : /* If the source was a REG_EQUAL or REG_EQUIV note, we
2407 : : may not find an equivalent expression, but in this
2408 : : case the PARALLEL will have a single set. */
2409 : 216587 : if (first_set == NULL_RTX)
2410 : 216435 : first_set = x;
2411 : 216587 : if (expr_equiv_p (SET_SRC (x), expr->expr))
2412 : : {
2413 : : set = x;
2414 : : break;
2415 : : }
2416 : : }
2417 : : }
2418 : :
2419 : 216435 : gcc_assert (first_set);
2420 : 216435 : if (set == NULL_RTX)
2421 : 82 : set = first_set;
2422 : : break;
2423 : :
2424 : 0 : default:
2425 : 0 : gcc_unreachable ();
2426 : : }
2427 : :
2428 : 312350 : if (REG_P (SET_DEST (set)))
2429 : : {
2430 : 312297 : old_reg = SET_DEST (set);
2431 : : /* Check if we can modify the set destination in the original insn. */
2432 : 312297 : if (validate_change (insn, &SET_DEST (set), reg, 0))
2433 : : {
2434 : 312297 : new_insn = gen_move_insn (old_reg, reg);
2435 : 312297 : new_insn = emit_insn_after (new_insn, insn);
2436 : : }
2437 : : else
2438 : : {
2439 : 0 : new_insn = gen_move_insn (reg, old_reg);
2440 : 0 : new_insn = emit_insn_after (new_insn, insn);
2441 : : }
2442 : : }
2443 : : else /* This is possible only in case of a store to memory. */
2444 : : {
2445 : 53 : old_reg = SET_SRC (set);
2446 : 53 : new_insn = gen_move_insn (reg, old_reg);
2447 : :
2448 : : /* Check if we can modify the set source in the original insn. */
2449 : 53 : if (validate_change (insn, &SET_SRC (set), reg, 0))
2450 : 53 : new_insn = emit_insn_before (new_insn, insn);
2451 : : else
2452 : 0 : new_insn = emit_insn_after (new_insn, insn);
2453 : : }
2454 : :
2455 : 312350 : gcse_create_count++;
2456 : :
2457 : 312350 : if (dump_file)
2458 : 24 : fprintf (dump_file,
2459 : : "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2460 : 8 : BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2461 : 8 : INSN_UID (insn), regno);
2462 : 312350 : }
2463 : :
2464 : : /* Copy available expressions that reach the redundant expression
2465 : : to `reaching_reg'. */
2466 : :
2467 : : static void
2468 : 411092 : pre_insert_copies (void)
2469 : : {
2470 : 411092 : unsigned int i;
2471 : 411092 : bool added_copy;
2472 : 411092 : struct gcse_expr *expr;
2473 : 411092 : struct gcse_occr *occr;
2474 : 411092 : struct gcse_occr *avail;
2475 : :
2476 : : /* For each available expression in the table, copy the result to
2477 : : `reaching_reg' if the expression reaches a deleted one.
2478 : :
2479 : : ??? The current algorithm is rather brute force.
2480 : : Need to do some profiling. */
2481 : :
2482 : 20135924 : for (i = 0; i < expr_hash_table.size; i++)
2483 : 29096054 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2484 : : {
2485 : : /* If the basic block isn't reachable, PPOUT will be TRUE. However,
2486 : : we don't want to insert a copy here because the expression may not
2487 : : really be redundant. So only insert an insn if the expression was
2488 : : deleted. This test also avoids further processing if the
2489 : : expression wasn't deleted anywhere. */
2490 : 9371222 : if (expr->reaching_reg == NULL)
2491 : 9004233 : continue;
2492 : :
2493 : : /* Set when we add a copy for that expression. */
2494 : 366989 : added_copy = false;
2495 : :
2496 : 1715655 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2497 : : {
2498 : 1348666 : if (! occr->deleted_p)
2499 : 376121 : continue;
2500 : :
2501 : 15865166 : for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2502 : : {
2503 : 14892621 : rtx_insn *insn = avail->insn;
2504 : :
2505 : : /* No need to handle this one if handled already. */
2506 : 14892621 : if (avail->copied_p)
2507 : 423695 : continue;
2508 : :
2509 : : /* Don't handle this one if it's a redundant one. */
2510 : 14468926 : if (insn->deleted ())
2511 : 12961212 : continue;
2512 : :
2513 : : /* Or if the expression doesn't reach the deleted one. */
2514 : 1507714 : if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2515 : : expr,
2516 : 1507714 : BLOCK_FOR_INSN (occr->insn)))
2517 : 1195364 : continue;
2518 : :
2519 : 312350 : added_copy = true;
2520 : :
2521 : : /* Copy the result of avail to reaching_reg. */
2522 : 312350 : pre_insert_copy_insn (expr, insn);
2523 : 312350 : avail->copied_p = 1;
2524 : : }
2525 : : }
2526 : :
2527 : 366989 : if (added_copy)
2528 : 253579 : update_ld_motion_stores (expr);
2529 : : }
2530 : 411092 : }
2531 : :
2532 : : struct set_data
2533 : : {
2534 : : rtx_insn *insn;
2535 : : const_rtx set;
2536 : : int nsets;
2537 : : };
2538 : :
2539 : : /* Increment number of sets and record set in DATA. */
2540 : :
2541 : : static void
2542 : 1777618 : record_set_data (rtx dest, const_rtx set, void *data)
2543 : : {
2544 : 1777618 : struct set_data *s = (struct set_data *)data;
2545 : :
2546 : 1777618 : if (GET_CODE (set) == SET)
2547 : : {
2548 : : /* We allow insns having multiple sets, where all but one are
2549 : : dead as single set insns. In the common case only a single
2550 : : set is present, so we want to avoid checking for REG_UNUSED
2551 : : notes unless necessary. */
2552 : 888809 : if (s->nsets == 1
2553 : 0 : && find_reg_note (s->insn, REG_UNUSED, SET_DEST (s->set))
2554 : 888809 : && !side_effects_p (s->set))
2555 : 0 : s->nsets = 0;
2556 : :
2557 : 888809 : if (!s->nsets)
2558 : : {
2559 : : /* Record this set. */
2560 : 888809 : s->nsets += 1;
2561 : 888809 : s->set = set;
2562 : : }
2563 : 0 : else if (!find_reg_note (s->insn, REG_UNUSED, dest)
2564 : 0 : || side_effects_p (set))
2565 : 0 : s->nsets += 1;
2566 : : }
2567 : 1777618 : }
2568 : :
2569 : : static const_rtx
2570 : 1709960 : single_set_gcse (rtx_insn *insn)
2571 : : {
2572 : 1709960 : struct set_data s;
2573 : 1709960 : rtx pattern;
2574 : :
2575 : 1709960 : gcc_assert (INSN_P (insn));
2576 : :
2577 : : /* Optimize common case. */
2578 : 1709960 : pattern = PATTERN (insn);
2579 : 1709960 : if (GET_CODE (pattern) == SET)
2580 : : return pattern;
2581 : :
2582 : 888809 : s.insn = insn;
2583 : 888809 : s.nsets = 0;
2584 : 888809 : note_pattern_stores (pattern, record_set_data, &s);
2585 : :
2586 : : /* Considered invariant insns have exactly one set. */
2587 : 888809 : gcc_assert (s.nsets == 1);
2588 : 888809 : return s.set;
2589 : : }
2590 : :
2591 : : /* Emit move from SRC to DEST noting the equivalence with expression computed
2592 : : in INSN. */
2593 : :
2594 : : static rtx_insn *
2595 : 1004559 : gcse_emit_move_after (rtx dest, rtx src, rtx_insn *insn)
2596 : : {
2597 : 1004559 : rtx_insn *new_rtx;
2598 : 1004559 : const_rtx set = single_set_gcse (insn);
2599 : 1004559 : rtx set2;
2600 : 1004559 : rtx note;
2601 : 1004559 : rtx eqv = NULL_RTX;
2602 : :
2603 : : /* This should never fail since we're creating a reg->reg copy
2604 : : we've verified to be valid. */
2605 : :
2606 : 1004559 : new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2607 : :
2608 : : /* Note the equivalence for local CSE pass. Take the note from the old
2609 : : set if there was one. Otherwise record the SET_SRC from the old set
2610 : : unless DEST is also an operand of the SET_SRC. */
2611 : 1004559 : set2 = single_set (new_rtx);
2612 : 1004559 : if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2613 : 0 : return new_rtx;
2614 : 1004559 : if ((note = find_reg_equal_equiv_note (insn)))
2615 : 168363 : eqv = XEXP (note, 0);
2616 : 836196 : else if (! REG_P (dest)
2617 : 836196 : || ! reg_mentioned_p (dest, SET_SRC (set)))
2618 : 834000 : eqv = SET_SRC (set);
2619 : :
2620 : 1002363 : if (eqv != NULL_RTX)
2621 : 1002363 : set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2622 : :
2623 : : return new_rtx;
2624 : : }
2625 : :
2626 : : /* Delete redundant computations.
2627 : : Deletion is done by changing the insn to copy the `reaching_reg' of
2628 : : the expression into the result of the SET. It is left to later passes
2629 : : to propagate the copy or eliminate it.
2630 : :
2631 : : Return true if a change is made. */
2632 : :
2633 : : static bool
2634 : 411092 : pre_delete (void)
2635 : : {
2636 : 411092 : unsigned int i;
2637 : 411092 : bool changed = false;
2638 : 411092 : struct gcse_expr *expr;
2639 : 411092 : struct gcse_occr *occr;
2640 : :
2641 : 20135924 : for (i = 0; i < expr_hash_table.size; i++)
2642 : 29096054 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2643 : : {
2644 : 9371222 : int indx = expr->bitmap_index;
2645 : :
2646 : : /* We only need to search antic_occr since we require ANTLOC != 0. */
2647 : 16066448 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2648 : : {
2649 : 6695226 : rtx_insn *insn = occr->insn;
2650 : 6695226 : rtx set;
2651 : 6695226 : basic_block bb = BLOCK_FOR_INSN (insn);
2652 : :
2653 : : /* We only delete insns that have a single_set. */
2654 : 6695226 : if (bitmap_bit_p (pre_delete_map[bb->index], indx)
2655 : 972546 : && (set = single_set (insn)) != 0
2656 : 7667771 : && dbg_cnt (pre_insn))
2657 : : {
2658 : 972545 : rtx dest = SET_DEST (set);
2659 : 972545 : if (expr->reaching_reg == NULL)
2660 : : {
2661 : 366989 : if (doing_hardreg_pre_p)
2662 : : /* Use the hardreg as the reaching register. The
2663 : : deleted sets will be replaced with noop moves.
2664 : :
2665 : : This may change the value of the hardreg in some debug
2666 : : instructions, so we will need to reset any debug uses
2667 : : of the hardreg. */
2668 : 0 : expr->reaching_reg = dest;
2669 : : else
2670 : : /* Create a pseudo-reg to store the result of reaching
2671 : : expressions into. Get the mode for the new pseudo from
2672 : : the mode of the original destination pseudo. */
2673 : 366989 : expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2674 : : }
2675 : :
2676 : 972545 : gcse_emit_move_after (dest, expr->reaching_reg, insn);
2677 : 972545 : delete_insn (insn);
2678 : 972545 : occr->deleted_p = 1;
2679 : 972545 : changed = true;
2680 : 972545 : gcse_subst_count++;
2681 : :
2682 : 972545 : if (dump_file)
2683 : : {
2684 : 8 : fprintf (dump_file,
2685 : : "PRE: redundant insn %d (expression %d) in ",
2686 : 8 : INSN_UID (insn), indx);
2687 : 8 : fprintf (dump_file, "bb %d, reaching reg is %d\n",
2688 : 8 : bb->index, REGNO (expr->reaching_reg));
2689 : : }
2690 : : }
2691 : : }
2692 : : }
2693 : :
2694 : 411092 : return changed;
2695 : : }
2696 : :
2697 : : /* Since hardreg PRE reuses the hardreg as the reaching register, we need to
2698 : : eliminate any existing uses in debug insns. This is overly conservative,
2699 : : but there's currently no benefit to preserving the debug insns, so there's
2700 : : no point doing the work to retain them. */
2701 : :
2702 : : static void
2703 : 0 : reset_hardreg_debug_uses ()
2704 : : {
2705 : 0 : df_ref def;
2706 : 0 : for (def = DF_REG_USE_CHAIN (current_hardreg_regno);
2707 : 0 : def;
2708 : 0 : def = DF_REF_NEXT_REG (def))
2709 : : {
2710 : 0 : rtx_insn *insn = DF_REF_INSN (def);
2711 : 0 : if (DEBUG_INSN_P (insn))
2712 : 0 : delete_insn (insn);
2713 : : }
2714 : 0 : }
2715 : :
2716 : : /* Perform GCSE optimizations using PRE.
2717 : : This is called by one_pre_gcse_pass after all the dataflow analysis
2718 : : has been done.
2719 : :
2720 : : This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2721 : : lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2722 : : Compiler Design and Implementation.
2723 : :
2724 : : ??? A new pseudo reg is created to hold the reaching expression. The nice
2725 : : thing about the classical approach is that it would try to use an existing
2726 : : reg. If the register can't be adequately optimized [i.e. we introduce
2727 : : reload problems], one could add a pass here to propagate the new register
2728 : : through the block.
2729 : :
2730 : : ??? We don't handle single sets in PARALLELs because we're [currently] not
2731 : : able to copy the rest of the parallel when we insert copies to create full
2732 : : redundancies from partial redundancies. However, there's no reason why we
2733 : : can't handle PARALLELs in the cases where there are no partial
2734 : : redundancies. */
2735 : :
2736 : : static bool
2737 : 411092 : pre_gcse (struct edge_list *edge_list)
2738 : : {
2739 : 411092 : unsigned int i;
2740 : 411092 : bool did_insert, changed;
2741 : 411092 : struct gcse_expr **index_map;
2742 : 411092 : struct gcse_expr *expr;
2743 : :
2744 : : /* Compute a mapping from expression number (`bitmap_index') to
2745 : : hash table entry. */
2746 : :
2747 : 411092 : index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
2748 : 20547016 : for (i = 0; i < expr_hash_table.size; i++)
2749 : 29096054 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2750 : 9371222 : index_map[expr->bitmap_index] = expr;
2751 : :
2752 : : /* Delete the redundant insns first so that
2753 : : - we know what register to use for the new insns and for the other
2754 : : ones with reaching expressions
2755 : : - we know which insns are redundant when we go to create copies */
2756 : :
2757 : 411092 : changed = pre_delete ();
2758 : 411092 : did_insert = pre_edge_insert (edge_list, index_map);
2759 : : /* In other places with reaching expressions, copy the expression to the
2760 : : specially allocated pseudo-reg that reaches the redundant expr. This
2761 : : isn't needed for hardreg PRE. */
2762 : 411092 : if (!doing_hardreg_pre_p)
2763 : 411092 : pre_insert_copies ();
2764 : :
2765 : 411092 : if (did_insert)
2766 : : {
2767 : 76802 : if (doing_hardreg_pre_p)
2768 : 0 : reset_hardreg_debug_uses ();
2769 : 76802 : commit_edge_insertions ();
2770 : 76802 : changed = true;
2771 : : }
2772 : :
2773 : 411092 : free (index_map);
2774 : 411092 : return changed;
2775 : : }
2776 : :
2777 : : /* Top level routine to perform one PRE GCSE pass.
2778 : :
2779 : : Return true if a change was made. */
2780 : :
2781 : : static bool
2782 : 879894 : one_pre_gcse_pass (void)
2783 : : {
2784 : 879894 : bool changed = false;
2785 : :
2786 : 879894 : gcse_subst_count = 0;
2787 : 879894 : gcse_create_count = 0;
2788 : :
2789 : : /* Return if there's nothing to do, or it is too expensive. */
2790 : 879894 : if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
2791 : 879894 : || gcse_or_cprop_is_too_expensive (_("PRE disabled")))
2792 : 408009 : return false;
2793 : :
2794 : : /* We need alias. */
2795 : 471885 : init_alias_analysis ();
2796 : :
2797 : 471885 : bytes_used = 0;
2798 : 471885 : gcc_obstack_init (&gcse_obstack);
2799 : 471885 : alloc_gcse_mem ();
2800 : :
2801 : 471885 : alloc_hash_table (&expr_hash_table);
2802 : 471885 : add_noreturn_fake_exit_edges ();
2803 : 471885 : if (do_load_motion ())
2804 : 471883 : compute_ld_motion_mems ();
2805 : :
2806 : 471885 : compute_hash_table (&expr_hash_table);
2807 : 471885 : if (do_load_motion ())
2808 : 471883 : trim_ld_motion_mems ();
2809 : 471885 : if (dump_file)
2810 : 17 : dump_hash_table (dump_file, "Expression", &expr_hash_table);
2811 : :
2812 : 471885 : if (expr_hash_table.n_elems > 0)
2813 : : {
2814 : 411092 : struct edge_list *edge_list;
2815 : 411092 : alloc_pre_mem (last_basic_block_for_fn (cfun), expr_hash_table.n_elems);
2816 : 411092 : edge_list = compute_pre_data ();
2817 : 411092 : if (pre_gcse (edge_list))
2818 : : changed = true;
2819 : 411092 : free_edge_list (edge_list);
2820 : 411092 : free_pre_mem ();
2821 : : }
2822 : :
2823 : 471885 : if (do_load_motion ())
2824 : 471883 : free_ld_motion_mems ();
2825 : 471885 : remove_fake_exit_edges ();
2826 : 471885 : free_hash_table (&expr_hash_table);
2827 : :
2828 : 471885 : free_gcse_mem ();
2829 : 471885 : obstack_free (&gcse_obstack, NULL);
2830 : :
2831 : : /* We are finished with alias. */
2832 : 471885 : end_alias_analysis ();
2833 : :
2834 : 471885 : if (dump_file)
2835 : : {
2836 : 17 : fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2837 : 17 : current_function_name (), n_basic_blocks_for_fn (cfun),
2838 : : bytes_used);
2839 : 17 : fprintf (dump_file, "%d substs, %d insns created\n",
2840 : : gcse_subst_count, gcse_create_count);
2841 : : }
2842 : :
2843 : : return changed;
2844 : : }
2845 : :
2846 : : /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2847 : : to INSN. If such notes are added to an insn which references a
2848 : : CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
2849 : : that note, because the following loop optimization pass requires
2850 : : them. */
2851 : :
2852 : : /* ??? If there was a jump optimization pass after gcse and before loop,
2853 : : then we would not need to do this here, because jump would add the
2854 : : necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
2855 : :
2856 : : static void
2857 : 326343 : add_label_notes (rtx x, rtx_insn *insn)
2858 : : {
2859 : 326343 : enum rtx_code code = GET_CODE (x);
2860 : 326343 : int i, j;
2861 : 326343 : const char *fmt;
2862 : :
2863 : 326343 : if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2864 : : {
2865 : : /* This code used to ignore labels that referred to dispatch tables to
2866 : : avoid flow generating (slightly) worse code.
2867 : :
2868 : : We no longer ignore such label references (see LABEL_REF handling in
2869 : : mark_jump_label for additional information). */
2870 : :
2871 : : /* There's no reason for current users to emit jump-insns with
2872 : : such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2873 : : notes. */
2874 : 0 : gcc_assert (!JUMP_P (insn));
2875 : 0 : add_reg_note (insn, REG_LABEL_OPERAND, label_ref_label (x));
2876 : :
2877 : 0 : if (LABEL_P (label_ref_label (x)))
2878 : 0 : LABEL_NUSES (label_ref_label (x))++;
2879 : :
2880 : 0 : return;
2881 : : }
2882 : :
2883 : 790131 : for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2884 : : {
2885 : 463788 : if (fmt[i] == 'e')
2886 : 260026 : add_label_notes (XEXP (x, i), insn);
2887 : 203762 : else if (fmt[i] == 'E')
2888 : 81 : for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2889 : 54 : add_label_notes (XVECEXP (x, i, j), insn);
2890 : : }
2891 : : }
2892 : :
2893 : : /* Code Hoisting variables and subroutines. */
2894 : :
2895 : : /* Very busy expressions. */
2896 : : static sbitmap *hoist_vbein;
2897 : : static sbitmap *hoist_vbeout;
2898 : :
2899 : : /* ??? We could compute post dominators and run this algorithm in
2900 : : reverse to perform tail merging, doing so would probably be
2901 : : more effective than the tail merging code in jump.cc.
2902 : :
2903 : : It's unclear if tail merging could be run in parallel with
2904 : : code hoisting. It would be nice. */
2905 : :
2906 : : /* Allocate vars used for code hoisting analysis. */
2907 : :
2908 : : static void
2909 : 24752 : alloc_code_hoist_mem (int n_blocks, int n_exprs)
2910 : : {
2911 : 24752 : antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2912 : 24752 : transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2913 : 24752 : comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2914 : :
2915 : 24752 : hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2916 : 24752 : hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2917 : 24752 : }
2918 : :
2919 : : /* Free vars used for code hoisting analysis. */
2920 : :
2921 : : static void
2922 : 24752 : free_code_hoist_mem (void)
2923 : : {
2924 : 24752 : sbitmap_vector_free (antloc);
2925 : 24752 : sbitmap_vector_free (transp);
2926 : 24752 : sbitmap_vector_free (comp);
2927 : :
2928 : 24752 : sbitmap_vector_free (hoist_vbein);
2929 : 24752 : sbitmap_vector_free (hoist_vbeout);
2930 : :
2931 : 24752 : free_dominance_info (CDI_DOMINATORS);
2932 : 24752 : }
2933 : :
2934 : : /* Compute the very busy expressions at entry/exit from each block.
2935 : :
2936 : : An expression is very busy if all paths from a given point
2937 : : compute the expression. */
2938 : :
2939 : : static void
2940 : 24752 : compute_code_hoist_vbeinout (void)
2941 : : {
2942 : 24752 : int changed, passes;
2943 : 24752 : basic_block bb;
2944 : :
2945 : 24752 : bitmap_vector_clear (hoist_vbeout, last_basic_block_for_fn (cfun));
2946 : 24752 : bitmap_vector_clear (hoist_vbein, last_basic_block_for_fn (cfun));
2947 : :
2948 : 24752 : passes = 0;
2949 : 24752 : changed = 1;
2950 : :
2951 : 101453 : while (changed)
2952 : : {
2953 : 51949 : changed = 0;
2954 : :
2955 : : /* We scan the blocks in the reverse order to speed up
2956 : : the convergence. */
2957 : 852850 : FOR_EACH_BB_REVERSE_FN (bb, cfun)
2958 : : {
2959 : 800901 : if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
2960 : : {
2961 : 748952 : bitmap_intersection_of_succs (hoist_vbeout[bb->index],
2962 : : hoist_vbein, bb);
2963 : :
2964 : : /* Include expressions in VBEout that are calculated
2965 : : in BB and available at its end. */
2966 : 748952 : bitmap_ior (hoist_vbeout[bb->index],
2967 : 748952 : hoist_vbeout[bb->index], comp[bb->index]);
2968 : : }
2969 : :
2970 : 800901 : changed |= bitmap_or_and (hoist_vbein[bb->index],
2971 : 800901 : antloc[bb->index],
2972 : 800901 : hoist_vbeout[bb->index],
2973 : 800901 : transp[bb->index]);
2974 : : }
2975 : :
2976 : 51949 : passes++;
2977 : : }
2978 : :
2979 : 24752 : if (dump_file)
2980 : : {
2981 : 7 : fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2982 : :
2983 : 35 : FOR_EACH_BB_FN (bb, cfun)
2984 : : {
2985 : 28 : fprintf (dump_file, "vbein (%d): ", bb->index);
2986 : 28 : dump_bitmap_file (dump_file, hoist_vbein[bb->index]);
2987 : 28 : fprintf (dump_file, "vbeout(%d): ", bb->index);
2988 : 28 : dump_bitmap_file (dump_file, hoist_vbeout[bb->index]);
2989 : : }
2990 : : }
2991 : 24752 : }
2992 : :
2993 : : /* Top level routine to do the dataflow analysis needed by code hoisting. */
2994 : :
2995 : : static void
2996 : 24752 : compute_code_hoist_data (void)
2997 : : {
2998 : 24752 : compute_local_properties (transp, comp, antloc, &expr_hash_table);
2999 : 24752 : prune_expressions (false);
3000 : 24752 : compute_code_hoist_vbeinout ();
3001 : 24752 : calculate_dominance_info (CDI_DOMINATORS);
3002 : 24752 : if (dump_file)
3003 : 7 : fprintf (dump_file, "\n");
3004 : 24752 : }
3005 : :
3006 : : /* Update register pressure for BB when hoisting an expression from
3007 : : instruction FROM, if live ranges of inputs are shrunk. Also
3008 : : maintain live_in information if live range of register referred
3009 : : in FROM is shrunk.
3010 : :
3011 : : Return 0 if register pressure doesn't change, otherwise return
3012 : : the number by which register pressure is decreased.
3013 : :
3014 : : NOTE: Register pressure won't be increased in this function. */
3015 : :
3016 : : static int
3017 : 12170570 : update_bb_reg_pressure (basic_block bb, rtx_insn *from)
3018 : : {
3019 : 12170570 : rtx dreg;
3020 : 12170570 : rtx_insn *insn;
3021 : 12170570 : basic_block succ_bb;
3022 : 12170570 : df_ref use, op_ref;
3023 : 12170570 : edge succ;
3024 : 12170570 : edge_iterator ei;
3025 : 12170570 : int decreased_pressure = 0;
3026 : 12170570 : int nregs;
3027 : 12170570 : enum reg_class pressure_class;
3028 : :
3029 : 14728354 : FOR_EACH_INSN_USE (use, from)
3030 : : {
3031 : 2557784 : dreg = DF_REF_REAL_REG (use);
3032 : : /* The live range of register is shrunk only if it isn't:
3033 : : 1. referred on any path from the end of this block to EXIT, or
3034 : : 2. referred by insns other than FROM in this block. */
3035 : 3614960 : FOR_EACH_EDGE (succ, ei, bb->succs)
3036 : : {
3037 : 3058059 : succ_bb = succ->dest;
3038 : 3058059 : if (succ_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
3039 : 3744 : continue;
3040 : :
3041 : 3054315 : if (bitmap_bit_p (BB_DATA (succ_bb)->live_in, REGNO (dreg)))
3042 : : break;
3043 : : }
3044 : 2557784 : if (succ != NULL)
3045 : 2000883 : continue;
3046 : :
3047 : 556901 : op_ref = DF_REG_USE_CHAIN (REGNO (dreg));
3048 : 1776126 : for (; op_ref; op_ref = DF_REF_NEXT_REG (op_ref))
3049 : : {
3050 : 1232132 : if (!DF_REF_INSN_INFO (op_ref))
3051 : 177809 : continue;
3052 : :
3053 : 1054323 : insn = DF_REF_INSN (op_ref);
3054 : 1054323 : if (BLOCK_FOR_INSN (insn) == bb
3055 : 1054323 : && NONDEBUG_INSN_P (insn) && insn != from)
3056 : : break;
3057 : : }
3058 : :
3059 : 556901 : pressure_class = get_regno_pressure_class (REGNO (dreg), &nregs);
3060 : : /* Decrease register pressure and update live_in information for
3061 : : this block. */
3062 : 556901 : if (!op_ref && pressure_class != NO_REGS)
3063 : : {
3064 : 543320 : decreased_pressure += nregs;
3065 : 543320 : BB_DATA (bb)->max_reg_pressure[pressure_class] -= nregs;
3066 : 543320 : bitmap_clear_bit (BB_DATA (bb)->live_in, REGNO (dreg));
3067 : : }
3068 : : }
3069 : 12170570 : return decreased_pressure;
3070 : : }
3071 : :
3072 : : /* Determine if the expression EXPR should be hoisted to EXPR_BB up in
3073 : : flow graph, if it can reach BB unimpared. Stop the search if the
3074 : : expression would need to be moved more than DISTANCE instructions.
3075 : :
3076 : : DISTANCE is the number of instructions through which EXPR can be
3077 : : hoisted up in flow graph.
3078 : :
3079 : : BB_SIZE points to an array which contains the number of instructions
3080 : : for each basic block.
3081 : :
3082 : : PRESSURE_CLASS and NREGS are register class and number of hard registers
3083 : : for storing EXPR.
3084 : :
3085 : : HOISTED_BBS points to a bitmap indicating basic blocks through which
3086 : : EXPR is hoisted.
3087 : :
3088 : : FROM is the instruction from which EXPR is hoisted.
3089 : :
3090 : : It's unclear exactly what Muchnick meant by "unimpared". It seems
3091 : : to me that the expression must either be computed or transparent in
3092 : : *every* block in the path(s) from EXPR_BB to BB. Any other definition
3093 : : would allow the expression to be hoisted out of loops, even if
3094 : : the expression wasn't a loop invariant.
3095 : :
3096 : : Contrast this to reachability for PRE where an expression is
3097 : : considered reachable if *any* path reaches instead of *all*
3098 : : paths. */
3099 : :
3100 : : static bool
3101 : 12170570 : should_hoist_expr_to_dom (basic_block expr_bb, struct gcse_expr *expr,
3102 : : basic_block bb, sbitmap visited,
3103 : : HOST_WIDE_INT distance,
3104 : : int *bb_size, enum reg_class pressure_class,
3105 : : int *nregs, bitmap hoisted_bbs, rtx_insn *from)
3106 : : {
3107 : 12170570 : unsigned int i;
3108 : 12170570 : edge pred;
3109 : 12170570 : edge_iterator ei;
3110 : 12170570 : sbitmap_iterator sbi;
3111 : 12170570 : bool visited_allocated_locally = false;
3112 : 12170570 : int decreased_pressure = 0;
3113 : :
3114 : 12170570 : if (flag_ira_hoist_pressure)
3115 : : {
3116 : : /* Record old information of basic block BB when it is visited
3117 : : at the first time. */
3118 : 12170570 : if (!bitmap_bit_p (hoisted_bbs, bb->index))
3119 : : {
3120 : 6733071 : struct bb_data *data = BB_DATA (bb);
3121 : 6733071 : bitmap_copy (data->backup, data->live_in);
3122 : 6733071 : data->old_pressure = data->max_reg_pressure[pressure_class];
3123 : : }
3124 : 12170570 : decreased_pressure = update_bb_reg_pressure (bb, from);
3125 : : }
3126 : : /* Terminate the search if distance, for which EXPR is allowed to move,
3127 : : is exhausted. */
3128 : 12170570 : if (distance > 0)
3129 : : {
3130 : 12168162 : if (flag_ira_hoist_pressure)
3131 : : {
3132 : : /* Prefer to hoist EXPR if register pressure is decreased. */
3133 : 12168162 : if (decreased_pressure > *nregs)
3134 : 2006 : distance += bb_size[bb->index];
3135 : : /* Let EXPR be hoisted through basic block at no cost if one
3136 : : of following conditions is satisfied:
3137 : :
3138 : : 1. The basic block has low register pressure.
3139 : : 2. Register pressure won't be increases after hoisting EXPR.
3140 : :
3141 : : Constant expressions is handled conservatively, because
3142 : : hoisting constant expression aggressively results in worse
3143 : : code. This decision is made by the observation of CSiBE
3144 : : on ARM target, while it has no obvious effect on other
3145 : : targets like x86, x86_64, mips and powerpc. */
3146 : 12166156 : else if (CONST_INT_P (expr->expr)
3147 : 11996672 : || (BB_DATA (bb)->max_reg_pressure[pressure_class]
3148 : 11996672 : >= ira_class_hard_regs_num[pressure_class]
3149 : 146272 : && decreased_pressure < *nregs))
3150 : 313468 : distance -= bb_size[bb->index];
3151 : : }
3152 : : else
3153 : 0 : distance -= bb_size[bb->index];
3154 : :
3155 : 12168162 : if (distance <= 0)
3156 : : return 0;
3157 : : }
3158 : : else
3159 : 2408 : gcc_assert (distance == 0);
3160 : :
3161 : 11984152 : if (visited == NULL)
3162 : : {
3163 : 594252 : visited_allocated_locally = true;
3164 : 594252 : visited = sbitmap_alloc (last_basic_block_for_fn (cfun));
3165 : 594252 : bitmap_clear (visited);
3166 : : }
3167 : :
3168 : 26949233 : FOR_EACH_EDGE (pred, ei, bb->preds)
3169 : : {
3170 : 15923212 : basic_block pred_bb = pred->src;
3171 : :
3172 : 15923212 : if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
3173 : : break;
3174 : 15923212 : else if (pred_bb == expr_bb)
3175 : 604077 : continue;
3176 : 15319135 : else if (bitmap_bit_p (visited, pred_bb->index))
3177 : 3785350 : continue;
3178 : 11533785 : else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
3179 : : break;
3180 : : /* Not killed. */
3181 : : else
3182 : : {
3183 : 11497183 : bitmap_set_bit (visited, pred_bb->index);
3184 : 11497183 : if (! should_hoist_expr_to_dom (expr_bb, expr, pred_bb,
3185 : : visited, distance, bb_size,
3186 : : pressure_class, nregs,
3187 : : hoisted_bbs, from))
3188 : : break;
3189 : : }
3190 : : }
3191 : 11984152 : if (visited_allocated_locally)
3192 : : {
3193 : : /* If EXPR can be hoisted to expr_bb, record basic blocks through
3194 : : which EXPR is hoisted in hoisted_bbs. */
3195 : 594252 : if (flag_ira_hoist_pressure && !pred)
3196 : : {
3197 : : /* Record the basic block from which EXPR is hoisted. */
3198 : 450367 : bitmap_set_bit (visited, bb->index);
3199 : 11868747 : EXECUTE_IF_SET_IN_BITMAP (visited, 0, i, sbi)
3200 : 10968013 : bitmap_set_bit (hoisted_bbs, i);
3201 : : }
3202 : 594252 : sbitmap_free (visited);
3203 : : }
3204 : :
3205 : 11984152 : return (pred == NULL);
3206 : : }
3207 : :
3208 : : /* Find occurrence in BB. */
3209 : :
3210 : : static struct gcse_occr *
3211 : 1188995 : find_occr_in_bb (struct gcse_occr *occr, basic_block bb)
3212 : : {
3213 : : /* Find the right occurrence of this expression. */
3214 : 15597665 : while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
3215 : 14408670 : occr = occr->next;
3216 : :
3217 : 1188995 : return occr;
3218 : : }
3219 : :
3220 : : /* Actually perform code hoisting.
3221 : :
3222 : : The code hoisting pass can hoist multiple computations of the same
3223 : : expression along dominated path to a dominating basic block, like
3224 : : from b2/b3 to b1 as depicted below:
3225 : :
3226 : : b1 ------
3227 : : /\ |
3228 : : / \ |
3229 : : bx by distance
3230 : : / \ |
3231 : : / \ |
3232 : : b2 b3 ------
3233 : :
3234 : : Unfortunately code hoisting generally extends the live range of an
3235 : : output pseudo register, which increases register pressure and hurts
3236 : : register allocation. To address this issue, an attribute MAX_DISTANCE
3237 : : is computed and attached to each expression. The attribute is computed
3238 : : from rtx cost of the corresponding expression and it's used to control
3239 : : how long the expression can be hoisted up in flow graph. As the
3240 : : expression is hoisted up in flow graph, GCC decreases its DISTANCE
3241 : : and stops the hoist if DISTANCE reaches 0. Code hoisting can decrease
3242 : : register pressure if live ranges of inputs are shrunk.
3243 : :
3244 : : Option "-fira-hoist-pressure" implements register pressure directed
3245 : : hoist based on upper method. The rationale is:
3246 : : 1. Calculate register pressure for each basic block by reusing IRA
3247 : : facility.
3248 : : 2. When expression is hoisted through one basic block, GCC checks
3249 : : the change of live ranges for inputs/output. The basic block's
3250 : : register pressure will be increased because of extended live
3251 : : range of output. However, register pressure will be decreased
3252 : : if the live ranges of inputs are shrunk.
3253 : : 3. After knowing how hoisting affects register pressure, GCC prefers
3254 : : to hoist the expression if it can decrease register pressure, by
3255 : : increasing DISTANCE of the corresponding expression.
3256 : : 4. If hoisting the expression increases register pressure, GCC checks
3257 : : register pressure of the basic block and decrease DISTANCE only if
3258 : : the register pressure is high. In other words, expression will be
3259 : : hoisted through at no cost if the basic block has low register
3260 : : pressure.
3261 : : 5. Update register pressure information for basic blocks through
3262 : : which expression is hoisted. */
3263 : :
3264 : : static bool
3265 : 24752 : hoist_code (void)
3266 : : {
3267 : 24752 : basic_block bb, dominated;
3268 : 24752 : unsigned int dom_tree_walk_index;
3269 : 24752 : unsigned int i, j, k;
3270 : 24752 : struct gcse_expr **index_map;
3271 : 24752 : struct gcse_expr *expr;
3272 : 24752 : int *to_bb_head;
3273 : 24752 : int *bb_size;
3274 : 24752 : bool changed = false;
3275 : 24752 : struct bb_data *data;
3276 : : /* Basic blocks that have occurrences reachable from BB. */
3277 : 24752 : bitmap from_bbs;
3278 : : /* Basic blocks through which expr is hoisted. */
3279 : 24752 : bitmap hoisted_bbs = NULL;
3280 : 24752 : bitmap_iterator bi;
3281 : :
3282 : : /* Compute a mapping from expression number (`bitmap_index') to
3283 : : hash table entry. */
3284 : :
3285 : 24752 : index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
3286 : 823450 : for (i = 0; i < expr_hash_table.size; i++)
3287 : 1054440 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
3288 : 280494 : index_map[expr->bitmap_index] = expr;
3289 : :
3290 : : /* Calculate sizes of basic blocks and note how far
3291 : : each instruction is from the start of its block. We then use this
3292 : : data to restrict distance an expression can travel. */
3293 : :
3294 : 24752 : to_bb_head = XCNEWVEC (int, get_max_uid ());
3295 : 24752 : bb_size = XCNEWVEC (int, last_basic_block_for_fn (cfun));
3296 : :
3297 : 330625 : FOR_EACH_BB_FN (bb, cfun)
3298 : : {
3299 : 305873 : rtx_insn *insn;
3300 : 305873 : int to_head;
3301 : :
3302 : 305873 : to_head = 0;
3303 : 2603478 : FOR_BB_INSNS (bb, insn)
3304 : : {
3305 : : /* Don't count debug instructions to avoid them affecting
3306 : : decision choices. */
3307 : 2297605 : if (NONDEBUG_INSN_P (insn))
3308 : 1740576 : to_bb_head[INSN_UID (insn)] = to_head++;
3309 : : }
3310 : :
3311 : 305873 : bb_size[bb->index] = to_head;
3312 : : }
3313 : :
3314 : 24752 : gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) == 1
3315 : : && (EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0)->dest
3316 : : == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb));
3317 : :
3318 : 24752 : from_bbs = BITMAP_ALLOC (NULL);
3319 : 24752 : if (flag_ira_hoist_pressure)
3320 : 24752 : hoisted_bbs = BITMAP_ALLOC (NULL);
3321 : :
3322 : 24752 : auto_vec<basic_block> dom_tree_walk
3323 : : = get_all_dominated_blocks (CDI_DOMINATORS,
3324 : 24752 : ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb);
3325 : :
3326 : : /* Walk over each basic block looking for potentially hoistable
3327 : : expressions, nothing gets hoisted from the entry block. */
3328 : 330625 : FOR_EACH_VEC_ELT (dom_tree_walk, dom_tree_walk_index, bb)
3329 : : {
3330 : 305873 : auto_vec<basic_block> domby
3331 : 305873 : = get_dominated_to_depth (CDI_DOMINATORS, bb, param_max_hoist_depth);
3332 : :
3333 : 305873 : if (domby.length () == 0)
3334 : 0 : continue;
3335 : :
3336 : : /* Examine each expression that is very busy at the exit of this
3337 : : block. These are the potentially hoistable expressions. */
3338 : 44722461 : for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++)
3339 : : {
3340 : 44416588 : if (bitmap_bit_p (hoist_vbeout[bb->index], i))
3341 : : {
3342 : 3565930 : int nregs = 0;
3343 : 3565930 : enum reg_class pressure_class = NO_REGS;
3344 : : /* Current expression. */
3345 : 3565930 : struct gcse_expr *expr = index_map[i];
3346 : : /* Number of occurrences of EXPR that can be hoisted to BB. */
3347 : 3565930 : int hoistable = 0;
3348 : : /* Occurrences reachable from BB. */
3349 : 3565930 : vec<occr_t> occrs_to_hoist = vNULL;
3350 : : /* We want to insert the expression into BB only once, so
3351 : : note when we've inserted it. */
3352 : 3565930 : int insn_inserted_p;
3353 : 3565930 : occr_t occr;
3354 : :
3355 : : /* If an expression is computed in BB and is available at end of
3356 : : BB, hoist all occurrences dominated by BB to BB. */
3357 : 3565930 : if (bitmap_bit_p (comp[bb->index], i))
3358 : : {
3359 : 250056 : occr = find_occr_in_bb (expr->antic_occr, bb);
3360 : :
3361 : 250056 : if (occr)
3362 : : {
3363 : : /* An occurrence might've been already deleted
3364 : : while processing a dominator of BB. */
3365 : 147311 : if (!occr->deleted_p)
3366 : : {
3367 : 116117 : gcc_assert (NONDEBUG_INSN_P (occr->insn));
3368 : : hoistable++;
3369 : : }
3370 : : }
3371 : : else
3372 : : hoistable++;
3373 : : }
3374 : :
3375 : : /* We've found a potentially hoistable expression, now
3376 : : we look at every block BB dominates to see if it
3377 : : computes the expression. */
3378 : 46090981 : FOR_EACH_VEC_ELT (domby, j, dominated)
3379 : : {
3380 : 42525051 : HOST_WIDE_INT max_distance;
3381 : :
3382 : : /* Ignore self dominance. */
3383 : 42525051 : if (bb == dominated)
3384 : 3565930 : continue;
3385 : : /* We've found a dominated block, now see if it computes
3386 : : the busy expression and whether or not moving that
3387 : : expression to the "beginning" of that block is safe. */
3388 : 38959121 : if (!bitmap_bit_p (antloc[dominated->index], i))
3389 : 38020182 : continue;
3390 : :
3391 : 938939 : occr = find_occr_in_bb (expr->antic_occr, dominated);
3392 : 938939 : gcc_assert (occr);
3393 : :
3394 : : /* An occurrence might've been already deleted
3395 : : while processing a dominator of BB. */
3396 : 938939 : if (occr->deleted_p)
3397 : 265552 : continue;
3398 : 673387 : gcc_assert (NONDEBUG_INSN_P (occr->insn));
3399 : :
3400 : 673387 : max_distance = expr->max_distance;
3401 : 673387 : if (max_distance > 0)
3402 : : /* Adjust MAX_DISTANCE to account for the fact that
3403 : : OCCR won't have to travel all of DOMINATED, but
3404 : : only part of it. */
3405 : 1344466 : max_distance += (bb_size[dominated->index]
3406 : 672233 : - to_bb_head[INSN_UID (occr->insn)]);
3407 : :
3408 : 673387 : pressure_class = get_pressure_class_and_nregs (occr->insn,
3409 : : &nregs);
3410 : :
3411 : : /* Note if the expression should be hoisted from the dominated
3412 : : block to BB if it can reach DOMINATED unimpared.
3413 : :
3414 : : Keep track of how many times this expression is hoistable
3415 : : from a dominated block into BB. */
3416 : 673387 : if (should_hoist_expr_to_dom (bb, expr, dominated, NULL,
3417 : : max_distance, bb_size,
3418 : : pressure_class, &nregs,
3419 : : hoisted_bbs, occr->insn))
3420 : : {
3421 : 450367 : hoistable++;
3422 : 450367 : occrs_to_hoist.safe_push (occr);
3423 : 450367 : bitmap_set_bit (from_bbs, dominated->index);
3424 : : }
3425 : : }
3426 : :
3427 : : /* If we found more than one hoistable occurrence of this
3428 : : expression, then note it in the vector of expressions to
3429 : : hoist. It makes no sense to hoist things which are computed
3430 : : in only one BB, and doing so tends to pessimize register
3431 : : allocation. One could increase this value to try harder
3432 : : to avoid any possible code expansion due to register
3433 : : allocation issues; however experiments have shown that
3434 : : the vast majority of hoistable expressions are only movable
3435 : : from two successors, so raising this threshold is likely
3436 : : to nullify any benefit we get from code hoisting. */
3437 : 3565930 : if (hoistable > 1 && dbg_cnt (hoist_insn))
3438 : : {
3439 : : /* If (hoistable != vec::length), then there is
3440 : : an occurrence of EXPR in BB itself. Don't waste
3441 : : time looking for LCA in this case. */
3442 : 187362 : if ((unsigned) hoistable == occrs_to_hoist.length ())
3443 : : {
3444 : 82277 : basic_block lca;
3445 : :
3446 : 82277 : lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3447 : : from_bbs);
3448 : 82277 : if (lca != bb)
3449 : : /* Punt, it's better to hoist these occurrences to
3450 : : LCA. */
3451 : 81260 : occrs_to_hoist.release ();
3452 : : }
3453 : : }
3454 : : else
3455 : : /* Punt, no point hoisting a single occurrence. */
3456 : 3472249 : occrs_to_hoist.release ();
3457 : :
3458 : 3565930 : if (flag_ira_hoist_pressure
3459 : 3565930 : && !occrs_to_hoist.is_empty ())
3460 : : {
3461 : : /* Increase register pressure of basic blocks to which
3462 : : expr is hoisted because of extended live range of
3463 : : output. */
3464 : 12421 : data = BB_DATA (bb);
3465 : 12421 : data->max_reg_pressure[pressure_class] += nregs;
3466 : 276313 : EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3467 : : {
3468 : 263892 : data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
3469 : 263892 : data->max_reg_pressure[pressure_class] += nregs;
3470 : : }
3471 : : }
3472 : 3553509 : else if (flag_ira_hoist_pressure)
3473 : : {
3474 : : /* Restore register pressure and live_in info for basic
3475 : : blocks recorded in hoisted_bbs when expr will not be
3476 : : hoisted. */
3477 : 8831762 : EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3478 : : {
3479 : 5278253 : data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
3480 : 5278253 : bitmap_copy (data->live_in, data->backup);
3481 : 5278253 : data->max_reg_pressure[pressure_class]
3482 : 5278253 : = data->old_pressure;
3483 : : }
3484 : : }
3485 : :
3486 : 3565930 : if (flag_ira_hoist_pressure)
3487 : 3565930 : bitmap_clear (hoisted_bbs);
3488 : :
3489 : : insn_inserted_p = 0;
3490 : :
3491 : : /* Walk through occurrences of I'th expressions we want
3492 : : to hoist to BB and make the transformations. */
3493 : 3597944 : FOR_EACH_VEC_ELT (occrs_to_hoist, j, occr)
3494 : : {
3495 : 32014 : rtx_insn *insn;
3496 : 32014 : const_rtx set;
3497 : :
3498 : 32014 : gcc_assert (!occr->deleted_p);
3499 : :
3500 : 32014 : insn = occr->insn;
3501 : 32014 : set = single_set_gcse (insn);
3502 : :
3503 : : /* Create a pseudo-reg to store the result of reaching
3504 : : expressions into. Get the mode for the new pseudo
3505 : : from the mode of the original destination pseudo.
3506 : :
3507 : : It is important to use new pseudos whenever we
3508 : : emit a set. This will allow reload to use
3509 : : rematerialization for such registers. */
3510 : 32014 : if (!insn_inserted_p)
3511 : 12421 : expr->reaching_reg
3512 : 12421 : = gen_reg_rtx_and_attrs (SET_DEST (set));
3513 : :
3514 : 32014 : gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3515 : : insn);
3516 : 32014 : delete_insn (insn);
3517 : 32014 : occr->deleted_p = 1;
3518 : 32014 : changed = true;
3519 : 32014 : gcse_subst_count++;
3520 : :
3521 : 32014 : if (!insn_inserted_p)
3522 : : {
3523 : 12421 : insert_insn_end_basic_block (expr, bb);
3524 : 12421 : insn_inserted_p = 1;
3525 : : }
3526 : : }
3527 : :
3528 : 3565930 : occrs_to_hoist.release ();
3529 : 3565930 : bitmap_clear (from_bbs);
3530 : : }
3531 : : }
3532 : 305873 : }
3533 : :
3534 : 24752 : BITMAP_FREE (from_bbs);
3535 : 24752 : if (flag_ira_hoist_pressure)
3536 : 24752 : BITMAP_FREE (hoisted_bbs);
3537 : :
3538 : 24752 : free (bb_size);
3539 : 24752 : free (to_bb_head);
3540 : 24752 : free (index_map);
3541 : :
3542 : 24752 : return changed;
3543 : 24752 : }
3544 : :
3545 : : /* Return pressure class and number of needed hard registers (through
3546 : : *NREGS) of register REGNO. */
3547 : : static enum reg_class
3548 : 5584683 : get_regno_pressure_class (int regno, int *nregs)
3549 : : {
3550 : 5584683 : if (regno >= FIRST_PSEUDO_REGISTER)
3551 : : {
3552 : 3112685 : enum reg_class pressure_class;
3553 : :
3554 : 3112685 : pressure_class = reg_allocno_class (regno);
3555 : 3112685 : pressure_class = ira_pressure_class_translate[pressure_class];
3556 : 3112685 : *nregs
3557 : 3112685 : = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
3558 : 3112685 : return pressure_class;
3559 : : }
3560 : 2471998 : else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
3561 : 2471998 : && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
3562 : : {
3563 : 754463 : *nregs = 1;
3564 : 754463 : return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
3565 : : }
3566 : : else
3567 : : {
3568 : 1717535 : *nregs = 0;
3569 : 1717535 : return NO_REGS;
3570 : : }
3571 : : }
3572 : :
3573 : : /* Return pressure class and number of hard registers (through *NREGS)
3574 : : for destination of INSN. */
3575 : : static enum reg_class
3576 : 673387 : get_pressure_class_and_nregs (rtx_insn *insn, int *nregs)
3577 : : {
3578 : 673387 : rtx reg;
3579 : 673387 : enum reg_class pressure_class;
3580 : 673387 : const_rtx set = single_set_gcse (insn);
3581 : :
3582 : 673387 : reg = SET_DEST (set);
3583 : 673387 : if (GET_CODE (reg) == SUBREG)
3584 : 0 : reg = SUBREG_REG (reg);
3585 : 673387 : if (MEM_P (reg))
3586 : : {
3587 : 0 : *nregs = 0;
3588 : 0 : pressure_class = NO_REGS;
3589 : : }
3590 : : else
3591 : : {
3592 : 673387 : gcc_assert (REG_P (reg));
3593 : 673387 : pressure_class = reg_allocno_class (REGNO (reg));
3594 : 673387 : pressure_class = ira_pressure_class_translate[pressure_class];
3595 : 673387 : *nregs
3596 : 673387 : = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))];
3597 : : }
3598 : 673387 : return pressure_class;
3599 : : }
3600 : :
3601 : : /* Increase (if INCR_P) or decrease current register pressure for
3602 : : register REGNO. */
3603 : : static void
3604 : 5027782 : change_pressure (int regno, bool incr_p)
3605 : : {
3606 : 5027782 : int nregs;
3607 : 5027782 : enum reg_class pressure_class;
3608 : :
3609 : 5027782 : pressure_class = get_regno_pressure_class (regno, &nregs);
3610 : 5027782 : if (! incr_p)
3611 : 1219425 : curr_reg_pressure[pressure_class] -= nregs;
3612 : : else
3613 : : {
3614 : 3808357 : curr_reg_pressure[pressure_class] += nregs;
3615 : 3808357 : if (BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3616 : : < curr_reg_pressure[pressure_class])
3617 : 1738742 : BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3618 : 1738742 : = curr_reg_pressure[pressure_class];
3619 : : }
3620 : 5027782 : }
3621 : :
3622 : : /* Calculate register pressure for each basic block by walking insns
3623 : : from last to first. */
3624 : : static void
3625 : 28185 : calculate_bb_reg_pressure (void)
3626 : : {
3627 : 28185 : int i;
3628 : 28185 : unsigned int j;
3629 : 28185 : rtx_insn *insn;
3630 : 28185 : basic_block bb;
3631 : 28185 : bitmap curr_regs_live;
3632 : 28185 : bitmap_iterator bi;
3633 : :
3634 : :
3635 : 28185 : ira_setup_eliminable_regset ();
3636 : 28185 : curr_regs_live = BITMAP_ALLOC (®_obstack);
3637 : 350248 : FOR_EACH_BB_FN (bb, cfun)
3638 : : {
3639 : 322063 : curr_bb = bb;
3640 : 322063 : BB_DATA (bb)->live_in = BITMAP_ALLOC (NULL);
3641 : 322063 : BB_DATA (bb)->backup = BITMAP_ALLOC (NULL);
3642 : 322063 : bitmap_copy (BB_DATA (bb)->live_in, df_get_live_in (bb));
3643 : 322063 : bitmap_copy (curr_regs_live, df_get_live_out (bb));
3644 : 1934511 : for (i = 0; i < ira_pressure_classes_num; i++)
3645 : 1290385 : curr_reg_pressure[ira_pressure_classes[i]] = 0;
3646 : 2953018 : EXECUTE_IF_SET_IN_BITMAP (curr_regs_live, 0, j, bi)
3647 : 2630955 : change_pressure (j, true);
3648 : :
3649 : 2704443 : FOR_BB_INSNS_REVERSE (bb, insn)
3650 : : {
3651 : 2382380 : rtx dreg;
3652 : 2382380 : int regno;
3653 : 2382380 : df_ref def, use;
3654 : :
3655 : 2382380 : if (! NONDEBUG_INSN_P (insn))
3656 : 587672 : continue;
3657 : :
3658 : 15636391 : FOR_EACH_INSN_DEF (def, insn)
3659 : : {
3660 : 13841683 : dreg = DF_REF_REAL_REG (def);
3661 : 13841683 : gcc_assert (REG_P (dreg));
3662 : 13841683 : regno = REGNO (dreg);
3663 : 13841683 : if (!(DF_REF_FLAGS (def)
3664 : : & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
3665 : : {
3666 : 13839677 : if (bitmap_clear_bit (curr_regs_live, regno))
3667 : 1219425 : change_pressure (regno, false);
3668 : : }
3669 : : }
3670 : :
3671 : 3876028 : FOR_EACH_INSN_USE (use, insn)
3672 : : {
3673 : 2081320 : dreg = DF_REF_REAL_REG (use);
3674 : 2081320 : gcc_assert (REG_P (dreg));
3675 : 2081320 : regno = REGNO (dreg);
3676 : 2081320 : if (bitmap_set_bit (curr_regs_live, regno))
3677 : 1177402 : change_pressure (regno, true);
3678 : : }
3679 : : }
3680 : : }
3681 : 28185 : BITMAP_FREE (curr_regs_live);
3682 : :
3683 : 28185 : if (dump_file == NULL)
3684 : 28178 : return;
3685 : :
3686 : 7 : fprintf (dump_file, "\nRegister Pressure: \n");
3687 : 35 : FOR_EACH_BB_FN (bb, cfun)
3688 : : {
3689 : 28 : fprintf (dump_file, " Basic block %d: \n", bb->index);
3690 : 140 : for (i = 0; (int) i < ira_pressure_classes_num; i++)
3691 : : {
3692 : 112 : enum reg_class pressure_class;
3693 : :
3694 : 112 : pressure_class = ira_pressure_classes[i];
3695 : 112 : if (BB_DATA (bb)->max_reg_pressure[pressure_class] == 0)
3696 : 84 : continue;
3697 : :
3698 : 28 : fprintf (dump_file, " %s=%d\n", reg_class_names[pressure_class],
3699 : : BB_DATA (bb)->max_reg_pressure[pressure_class]);
3700 : : }
3701 : : }
3702 : 7 : fprintf (dump_file, "\n");
3703 : : }
3704 : :
3705 : : /* Top level routine to perform one code hoisting (aka unification) pass
3706 : :
3707 : : Return true if a change was made. */
3708 : :
3709 : : static bool
3710 : 61465 : one_code_hoisting_pass (void)
3711 : : {
3712 : 61465 : bool changed = false;
3713 : :
3714 : 61465 : gcse_subst_count = 0;
3715 : 61465 : gcse_create_count = 0;
3716 : :
3717 : : /* Return if there's nothing to do, or it is too expensive. */
3718 : 61465 : if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
3719 : 61465 : || gcse_or_cprop_is_too_expensive (_("GCSE disabled")))
3720 : 33280 : return false;
3721 : :
3722 : 28185 : doing_code_hoisting_p = true;
3723 : :
3724 : : /* Calculate register pressure for each basic block. */
3725 : 28185 : if (flag_ira_hoist_pressure)
3726 : : {
3727 : 28185 : regstat_init_n_sets_and_refs ();
3728 : 28185 : ira_set_pseudo_classes (false, dump_file);
3729 : 28185 : alloc_aux_for_blocks (sizeof (struct bb_data));
3730 : 28185 : calculate_bb_reg_pressure ();
3731 : 28185 : regstat_free_n_sets_and_refs ();
3732 : : }
3733 : :
3734 : : /* We need alias. */
3735 : 28185 : init_alias_analysis ();
3736 : :
3737 : 28185 : bytes_used = 0;
3738 : 28185 : gcc_obstack_init (&gcse_obstack);
3739 : 28185 : alloc_gcse_mem ();
3740 : :
3741 : 28185 : alloc_hash_table (&expr_hash_table);
3742 : 28185 : compute_hash_table (&expr_hash_table);
3743 : 28185 : if (dump_file)
3744 : 7 : dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3745 : :
3746 : 28185 : if (expr_hash_table.n_elems > 0)
3747 : : {
3748 : 24752 : alloc_code_hoist_mem (last_basic_block_for_fn (cfun),
3749 : : expr_hash_table.n_elems);
3750 : 24752 : compute_code_hoist_data ();
3751 : 24752 : changed = hoist_code ();
3752 : 24752 : free_code_hoist_mem ();
3753 : : }
3754 : :
3755 : 28185 : if (flag_ira_hoist_pressure)
3756 : : {
3757 : 28185 : free_aux_for_blocks ();
3758 : 28185 : free_reg_info ();
3759 : : }
3760 : 28185 : free_hash_table (&expr_hash_table);
3761 : 28185 : free_gcse_mem ();
3762 : 28185 : obstack_free (&gcse_obstack, NULL);
3763 : :
3764 : : /* We are finished with alias. */
3765 : 28185 : end_alias_analysis ();
3766 : :
3767 : 28185 : if (dump_file)
3768 : : {
3769 : 7 : fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3770 : 7 : current_function_name (), n_basic_blocks_for_fn (cfun),
3771 : : bytes_used);
3772 : 7 : fprintf (dump_file, "%d substs, %d insns created\n",
3773 : : gcse_subst_count, gcse_create_count);
3774 : : }
3775 : :
3776 : 28185 : doing_code_hoisting_p = false;
3777 : :
3778 : 28185 : return changed;
3779 : : }
3780 : :
3781 : : /* Here we provide the things required to do store motion towards the exit.
3782 : : In order for this to be effective, gcse also needed to be taught how to
3783 : : move a load when it is killed only by a store to itself.
3784 : :
3785 : : int i;
3786 : : float a[10];
3787 : :
3788 : : void foo(float scale)
3789 : : {
3790 : : for (i=0; i<10; i++)
3791 : : a[i] *= scale;
3792 : : }
3793 : :
3794 : : 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3795 : : the load out since its live around the loop, and stored at the bottom
3796 : : of the loop.
3797 : :
3798 : : The 'Load Motion' referred to and implemented in this file is
3799 : : an enhancement to gcse which when using edge based LCM, recognizes
3800 : : this situation and allows gcse to move the load out of the loop.
3801 : :
3802 : : Once gcse has hoisted the load, store motion can then push this
3803 : : load towards the exit, and we end up with no loads or stores of 'i'
3804 : : in the loop. */
3805 : :
3806 : : /* This will search the ldst list for a matching expression. If it
3807 : : doesn't find one, we create one and initialize it. */
3808 : :
3809 : : static struct ls_expr *
3810 : 13865481 : ldst_entry (rtx x)
3811 : : {
3812 : 13865481 : int do_not_record_p = 0;
3813 : 13865481 : struct ls_expr * ptr;
3814 : 13865481 : unsigned int hash;
3815 : 13865481 : ls_expr **slot;
3816 : 13865481 : struct ls_expr e;
3817 : :
3818 : 13865481 : hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3819 : : NULL, /*have_reg_qty=*/false);
3820 : :
3821 : 13865481 : e.pattern = x;
3822 : 13865481 : slot = pre_ldst_table->find_slot_with_hash (&e, hash, INSERT);
3823 : 13865481 : if (*slot)
3824 : : return *slot;
3825 : :
3826 : 9177428 : ptr = XNEW (struct ls_expr);
3827 : :
3828 : 9177428 : ptr->next = pre_ldst_mems;
3829 : 9177428 : ptr->expr = NULL;
3830 : 9177428 : ptr->pattern = x;
3831 : 9177428 : ptr->pattern_regs = NULL_RTX;
3832 : 9177428 : ptr->stores.create (0);
3833 : 9177428 : ptr->reaching_reg = NULL_RTX;
3834 : 9177428 : ptr->invalid = 0;
3835 : 9177428 : ptr->index = 0;
3836 : 9177428 : ptr->hash_index = hash;
3837 : 9177428 : pre_ldst_mems = ptr;
3838 : 9177428 : *slot = ptr;
3839 : :
3840 : 9177428 : return ptr;
3841 : : }
3842 : :
3843 : : /* Free up an individual ldst entry. */
3844 : :
3845 : : static void
3846 : 9177428 : free_ldst_entry (struct ls_expr * ptr)
3847 : : {
3848 : 0 : ptr->stores.release ();
3849 : :
3850 : 9177428 : free (ptr);
3851 : 3093228 : }
3852 : :
3853 : : /* Free up all memory associated with the ldst list. */
3854 : :
3855 : : static void
3856 : 471883 : free_ld_motion_mems (void)
3857 : : {
3858 : 471883 : delete pre_ldst_table;
3859 : 471883 : pre_ldst_table = NULL;
3860 : :
3861 : 3565111 : while (pre_ldst_mems)
3862 : : {
3863 : 3093228 : struct ls_expr * tmp = pre_ldst_mems;
3864 : :
3865 : 3093228 : pre_ldst_mems = pre_ldst_mems->next;
3866 : :
3867 : 3093228 : free_ldst_entry (tmp);
3868 : : }
3869 : :
3870 : 471883 : pre_ldst_mems = NULL;
3871 : 471883 : }
3872 : :
3873 : : /* Dump debugging info about the ldst list. */
3874 : :
3875 : : static void
3876 : 16 : print_ldst_list (FILE * file)
3877 : : {
3878 : 16 : struct ls_expr * ptr;
3879 : :
3880 : 16 : fprintf (file, "LDST list: \n");
3881 : :
3882 : 56 : for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3883 : : {
3884 : 40 : fprintf (file, " Pattern (%3d): ", ptr->index);
3885 : :
3886 : 40 : print_rtl (file, ptr->pattern);
3887 : :
3888 : 40 : fprintf (file, "\n Stores : ");
3889 : 40 : print_rtx_insn_vec (file, ptr->stores);
3890 : :
3891 : 40 : fprintf (file, "\n\n");
3892 : : }
3893 : :
3894 : 16 : fprintf (file, "\n");
3895 : 16 : }
3896 : :
3897 : : /* Returns 1 if X is in the list of ldst only expressions. */
3898 : :
3899 : : static struct ls_expr *
3900 : 661192 : find_rtx_in_ldst (rtx x)
3901 : : {
3902 : 661192 : struct ls_expr e;
3903 : 661192 : ls_expr **slot;
3904 : 661192 : if (!pre_ldst_table)
3905 : : return NULL;
3906 : 661192 : e.pattern = x;
3907 : 661192 : slot = pre_ldst_table->find_slot (&e, NO_INSERT);
3908 : 661192 : if (!slot || (*slot)->invalid)
3909 : : return NULL;
3910 : : return *slot;
3911 : : }
3912 : :
3913 : : /* Load Motion for loads which only kill themselves. */
3914 : :
3915 : : /* Return true if x, a MEM, is a simple access with no side effects.
3916 : : These are the types of loads we consider for the ld_motion list,
3917 : : otherwise we let the usual aliasing take care of it. */
3918 : :
3919 : : static bool
3920 : 18428940 : simple_mem (const_rtx x)
3921 : : {
3922 : 18428940 : if (MEM_VOLATILE_P (x))
3923 : : return false;
3924 : :
3925 : 17571464 : if (GET_MODE (x) == BLKmode)
3926 : : return false;
3927 : :
3928 : : /* If we are handling exceptions, we must be careful with memory references
3929 : : that may trap. If we are not, the behavior is undefined, so we may just
3930 : : continue. */
3931 : 17512480 : if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3932 : : return false;
3933 : :
3934 : 15524937 : if (side_effects_p (x))
3935 : : return false;
3936 : :
3937 : : /* Do not consider function arguments passed on stack. */
3938 : 14103648 : if (reg_mentioned_p (stack_pointer_rtx, x))
3939 : : return false;
3940 : :
3941 : 13867443 : if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3942 : : return false;
3943 : :
3944 : : return true;
3945 : : }
3946 : :
3947 : : /* Make sure there isn't a buried reference in this pattern anywhere.
3948 : : If there is, invalidate the entry for it since we're not capable
3949 : : of fixing it up just yet.. We have to be sure we know about ALL
3950 : : loads since the aliasing code will allow all entries in the
3951 : : ld_motion list to not-alias itself. If we miss a load, we will get
3952 : : the wrong value since gcse might common it and we won't know to
3953 : : fix it up. */
3954 : :
3955 : : static void
3956 : 160454463 : invalidate_any_buried_refs (rtx x)
3957 : : {
3958 : 160454463 : const char * fmt;
3959 : 160454463 : int i, j;
3960 : 160454463 : struct ls_expr * ptr;
3961 : :
3962 : : /* Invalidate it in the list. */
3963 : 160454463 : if (MEM_P (x) && simple_mem (x))
3964 : : {
3965 : 4907369 : ptr = ldst_entry (x);
3966 : 4907369 : ptr->invalid = 1;
3967 : : }
3968 : :
3969 : : /* Recursively process the insn. */
3970 : 160454463 : fmt = GET_RTX_FORMAT (GET_CODE (x));
3971 : :
3972 : 375743732 : for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3973 : : {
3974 : 215289269 : if (fmt[i] == 'e')
3975 : 96564438 : invalidate_any_buried_refs (XEXP (x, i));
3976 : 118724831 : else if (fmt[i] == 'E')
3977 : 28370513 : for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3978 : 19263048 : invalidate_any_buried_refs (XVECEXP (x, i, j));
3979 : : }
3980 : 160454463 : }
3981 : :
3982 : : /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
3983 : : being defined as MEM loads and stores to symbols, with no side effects
3984 : : and no registers in the expression. For a MEM destination, we also
3985 : : check that the insn is still valid if we replace the destination with a
3986 : : REG, as is done in update_ld_motion_stores. If there are any uses/defs
3987 : : which don't match this criteria, they are invalidated and trimmed out
3988 : : later. */
3989 : :
3990 : : static void
3991 : 471883 : compute_ld_motion_mems (void)
3992 : : {
3993 : 471883 : struct ls_expr * ptr;
3994 : 471883 : basic_block bb;
3995 : 471883 : rtx_insn *insn;
3996 : :
3997 : 471883 : pre_ldst_mems = NULL;
3998 : 471883 : pre_ldst_table = new hash_table<pre_ldst_expr_hasher> (13);
3999 : :
4000 : 9660090 : FOR_EACH_BB_FN (bb, cfun)
4001 : : {
4002 : 113670192 : FOR_BB_INSNS (bb, insn)
4003 : : {
4004 : 104481985 : if (NONDEBUG_INSN_P (insn))
4005 : : {
4006 : 46761038 : if (GET_CODE (PATTERN (insn)) == SET)
4007 : : {
4008 : 36699160 : rtx src = SET_SRC (PATTERN (insn));
4009 : 36699160 : rtx dest = SET_DEST (PATTERN (insn));
4010 : :
4011 : : /* Check for a simple load. */
4012 : 36699160 : if (MEM_P (src) && simple_mem (src))
4013 : : {
4014 : 4695905 : ptr = ldst_entry (src);
4015 : 4695905 : if (!REG_P (dest))
4016 : 34646 : ptr->invalid = 1;
4017 : : }
4018 : : else
4019 : : {
4020 : : /* Make sure there isn't a buried load somewhere. */
4021 : 32003255 : invalidate_any_buried_refs (src);
4022 : : }
4023 : :
4024 : : /* Check for a simple load through a REG_EQUAL note. */
4025 : 36699160 : rtx note = find_reg_equal_equiv_note (insn), src_eq;
4026 : 36699160 : if (note
4027 : 2207128 : && REG_NOTE_KIND (note) == REG_EQUAL
4028 : 2068928 : && (src_eq = XEXP (note, 0))
4029 : 38768088 : && !(MEM_P (src_eq) && simple_mem (src_eq)))
4030 : 2068927 : invalidate_any_buried_refs (src_eq);
4031 : :
4032 : : /* Check for stores. Don't worry about aliased ones, they
4033 : : will block any movement we might do later. We only care
4034 : : about this exact pattern since those are the only
4035 : : circumstance that we will ignore the aliasing info. */
4036 : 36699160 : if (MEM_P (dest) && simple_mem (dest))
4037 : : {
4038 : 4262207 : ptr = ldst_entry (dest);
4039 : 4262207 : machine_mode src_mode = GET_MODE (src);
4040 : 4262207 : if (! MEM_P (src)
4041 : 4262207 : && GET_CODE (src) != ASM_OPERANDS
4042 : : /* Check for REG manually since want_to_gcse_p
4043 : : returns 0 for all REGs. */
4044 : 8524414 : && can_assign_to_reg_without_clobbers_p (src,
4045 : : src_mode))
4046 : 4253921 : ptr->stores.safe_push (insn);
4047 : : else
4048 : 8286 : ptr->invalid = 1;
4049 : : }
4050 : : }
4051 : : else
4052 : : {
4053 : : /* Invalidate all MEMs in the pattern and... */
4054 : 10061878 : invalidate_any_buried_refs (PATTERN (insn));
4055 : :
4056 : : /* ...in REG_EQUAL notes for PARALLELs with single SET. */
4057 : 10061878 : rtx note = find_reg_equal_equiv_note (insn), src_eq;
4058 : 10061878 : if (note
4059 : 492917 : && REG_NOTE_KIND (note) == REG_EQUAL
4060 : 10554795 : && (src_eq = XEXP (note, 0)))
4061 : 492917 : invalidate_any_buried_refs (src_eq);
4062 : : }
4063 : : }
4064 : : }
4065 : : }
4066 : 471883 : }
4067 : :
4068 : : /* Remove any references that have been either invalidated or are not in the
4069 : : expression list for pre gcse. */
4070 : :
4071 : : static void
4072 : 471883 : trim_ld_motion_mems (void)
4073 : : {
4074 : 471883 : struct ls_expr * * last = & pre_ldst_mems;
4075 : 471883 : struct ls_expr * ptr = pre_ldst_mems;
4076 : :
4077 : 9649311 : while (ptr != NULL)
4078 : : {
4079 : 9177428 : struct gcse_expr * expr;
4080 : :
4081 : : /* Delete if entry has been made invalid. */
4082 : 9177428 : if (! ptr->invalid)
4083 : : {
4084 : : /* Delete if we cannot find this mem in the expression list. */
4085 : 6499738 : unsigned int hash = ptr->hash_index % expr_hash_table.size;
4086 : :
4087 : 6499738 : for (expr = expr_hash_table.table[hash];
4088 : 9883909 : expr != NULL;
4089 : 3384171 : expr = expr->next_same_hash)
4090 : 6477399 : if (expr_equiv_p (expr->expr, ptr->pattern))
4091 : : break;
4092 : : }
4093 : : else
4094 : : expr = (struct gcse_expr *) 0;
4095 : :
4096 : 6499738 : if (expr)
4097 : : {
4098 : : /* Set the expression field if we are keeping it. */
4099 : 3093228 : ptr->expr = expr;
4100 : 3093228 : last = & ptr->next;
4101 : 3093228 : ptr = ptr->next;
4102 : : }
4103 : : else
4104 : : {
4105 : 6084200 : *last = ptr->next;
4106 : 6084200 : pre_ldst_table->remove_elt_with_hash (ptr, ptr->hash_index);
4107 : 6084200 : free_ldst_entry (ptr);
4108 : 6084200 : ptr = * last;
4109 : : }
4110 : : }
4111 : :
4112 : : /* Show the world what we've found. */
4113 : 471883 : if (dump_file && pre_ldst_mems != NULL)
4114 : 16 : print_ldst_list (dump_file);
4115 : 471883 : }
4116 : :
4117 : : /* This routine will take an expression which we are replacing with
4118 : : a reaching register, and update any stores that are needed if
4119 : : that expression is in the ld_motion list. Stores are updated by
4120 : : copying their SRC to the reaching register, and then storing
4121 : : the reaching register into the store location. These keeps the
4122 : : correct value in the reaching register for the loads. */
4123 : :
4124 : : static void
4125 : 557307 : update_ld_motion_stores (struct gcse_expr * expr)
4126 : : {
4127 : 557307 : struct ls_expr * mem_ptr;
4128 : :
4129 : 557307 : if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
4130 : : {
4131 : : /* We can try to find just the REACHED stores, but is shouldn't
4132 : : matter to set the reaching reg everywhere... some might be
4133 : : dead and should be eliminated later. */
4134 : :
4135 : : /* We replace (set mem expr) with (set reg expr) (set mem reg)
4136 : : where reg is the reaching reg used in the load. We checked in
4137 : : compute_ld_motion_mems that we can replace (set mem expr) with
4138 : : (set reg expr) in that insn. */
4139 : 102077 : rtx_insn *insn;
4140 : 102077 : unsigned int i;
4141 : 170521 : FOR_EACH_VEC_ELT_REVERSE (mem_ptr->stores, i, insn)
4142 : : {
4143 : 53061 : rtx pat = PATTERN (insn);
4144 : 53061 : rtx src = SET_SRC (pat);
4145 : 53061 : rtx reg = expr->reaching_reg;
4146 : :
4147 : : /* If we've already copied it, continue. */
4148 : 53061 : if (expr->reaching_reg == src)
4149 : 45124 : continue;
4150 : :
4151 : 7937 : if (dump_file)
4152 : : {
4153 : 0 : fprintf (dump_file, "PRE: store updated with reaching reg ");
4154 : 0 : print_rtl (dump_file, reg);
4155 : 0 : fprintf (dump_file, ":\n ");
4156 : 0 : print_inline_rtx (dump_file, insn, 8);
4157 : 0 : fprintf (dump_file, "\n");
4158 : : }
4159 : :
4160 : 7937 : rtx_insn *copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
4161 : 7937 : emit_insn_before (copy, insn);
4162 : 7937 : SET_SRC (pat) = reg;
4163 : 7937 : df_insn_rescan (insn);
4164 : :
4165 : : /* un-recognize this pattern since it's probably different now. */
4166 : 7937 : INSN_CODE (insn) = -1;
4167 : 7937 : gcse_create_count++;
4168 : : }
4169 : : }
4170 : 557307 : }
4171 : :
4172 : : /* Return true if the graph is too expensive to optimize. PASS is the
4173 : : optimization about to be performed. */
4174 : :
4175 : : bool
4176 : 2076576 : gcse_or_cprop_is_too_expensive (const char *pass)
4177 : : {
4178 : 2076576 : unsigned HOST_WIDE_INT memory_request
4179 : 2076576 : = ((unsigned HOST_WIDE_INT)n_basic_blocks_for_fn (cfun)
4180 : 2076576 : * SBITMAP_SET_SIZE (max_reg_num ()) * sizeof (SBITMAP_ELT_TYPE));
4181 : :
4182 : : /* Trying to perform global optimizations on flow graphs which have
4183 : : a high connectivity will take a long time and is unlikely to be
4184 : : particularly useful.
4185 : :
4186 : : In normal circumstances a cfg should have about twice as many
4187 : : edges as blocks. But we do not want to punish small functions
4188 : : which have a couple switch statements. Rather than simply
4189 : : threshold the number of blocks, uses something with a more
4190 : : graceful degradation. */
4191 : 2076576 : if (n_edges_for_fn (cfun) > 20000 + n_basic_blocks_for_fn (cfun) * 4)
4192 : : {
4193 : 0 : warning (OPT_Wdisabled_optimization,
4194 : : "%s: %d basic blocks and %d edges/basic block",
4195 : : pass, n_basic_blocks_for_fn (cfun),
4196 : : n_edges_for_fn (cfun) / n_basic_blocks_for_fn (cfun));
4197 : :
4198 : 0 : return true;
4199 : : }
4200 : :
4201 : : /* If allocating memory for the dataflow bitmaps would take up too much
4202 : : storage it's better just to disable the optimization. */
4203 : 2076576 : if (memory_request / 1024 > (unsigned HOST_WIDE_INT)param_max_gcse_memory)
4204 : : {
4205 : 0 : warning (OPT_Wdisabled_optimization,
4206 : : "%s: %d basic blocks and %d registers; "
4207 : : "increase %<--param max-gcse-memory%> above %wu",
4208 : 0 : pass, n_basic_blocks_for_fn (cfun), max_reg_num (),
4209 : : memory_request / 1024);
4210 : :
4211 : 0 : return true;
4212 : : }
4213 : :
4214 : : return false;
4215 : : }
4216 : :
4217 : : static unsigned int
4218 : 879894 : execute_rtl_pre (void)
4219 : : {
4220 : 879894 : int changed;
4221 : 879894 : delete_unreachable_blocks ();
4222 : 879894 : df_analyze ();
4223 : 879894 : changed = one_pre_gcse_pass ();
4224 : 879894 : flag_rerun_cse_after_global_opts |= changed;
4225 : 879894 : if (changed)
4226 : 117886 : cleanup_cfg (0);
4227 : 879894 : return 0;
4228 : : }
4229 : :
4230 : : static unsigned int
4231 : 0 : execute_hardreg_pre (void)
4232 : : {
4233 : : #ifdef HARDREG_PRE_REGNOS
4234 : : doing_hardreg_pre_p = true;
4235 : : unsigned int regnos[] = HARDREG_PRE_REGNOS;
4236 : : /* It's possible to avoid this loop, but it isn't worth doing so until
4237 : : hardreg PRE is used for multiple hardregs. */
4238 : : for (int i = 0; regnos[i] != 0; i++)
4239 : : {
4240 : : int changed;
4241 : : current_hardreg_regno = regnos[i];
4242 : : if (dump_file)
4243 : : fprintf(dump_file, "Entering hardreg PRE for regno %d\n",
4244 : : current_hardreg_regno);
4245 : : delete_unreachable_blocks ();
4246 : : df_analyze ();
4247 : : changed = one_pre_gcse_pass ();
4248 : : if (changed)
4249 : : cleanup_cfg (0);
4250 : : }
4251 : : doing_hardreg_pre_p = false;
4252 : : #endif
4253 : 0 : return 0;
4254 : : }
4255 : :
4256 : : static unsigned int
4257 : 61465 : execute_rtl_hoist (void)
4258 : : {
4259 : 61465 : int changed;
4260 : 61465 : delete_unreachable_blocks ();
4261 : 61465 : df_analyze ();
4262 : 61465 : changed = one_code_hoisting_pass ();
4263 : 61465 : flag_rerun_cse_after_global_opts |= changed;
4264 : 61465 : if (changed)
4265 : 2909 : cleanup_cfg (0);
4266 : 61465 : return 0;
4267 : : }
4268 : :
4269 : : namespace {
4270 : :
4271 : : const pass_data pass_data_rtl_pre =
4272 : : {
4273 : : RTL_PASS, /* type */
4274 : : "rtl pre", /* name */
4275 : : OPTGROUP_NONE, /* optinfo_flags */
4276 : : TV_PRE, /* tv_id */
4277 : : PROP_cfglayout, /* properties_required */
4278 : : 0, /* properties_provided */
4279 : : 0, /* properties_destroyed */
4280 : : 0, /* todo_flags_start */
4281 : : TODO_df_finish, /* todo_flags_finish */
4282 : : };
4283 : :
4284 : : class pass_rtl_pre : public rtl_opt_pass
4285 : : {
4286 : : public:
4287 : 284263 : pass_rtl_pre (gcc::context *ctxt)
4288 : 568526 : : rtl_opt_pass (pass_data_rtl_pre, ctxt)
4289 : : {}
4290 : :
4291 : : /* opt_pass methods: */
4292 : : bool gate (function *) final override;
4293 : 879894 : unsigned int execute (function *) final override
4294 : : {
4295 : 879894 : return execute_rtl_pre ();
4296 : : }
4297 : :
4298 : : }; // class pass_rtl_pre
4299 : :
4300 : : /* We do not construct an accurate cfg in functions which call
4301 : : setjmp, so none of these passes runs if the function calls
4302 : : setjmp.
4303 : : FIXME: Should just handle setjmp via REG_SETJMP notes. */
4304 : :
4305 : : bool
4306 : 1446387 : pass_rtl_pre::gate (function *fun)
4307 : : {
4308 : 1017698 : return optimize > 0 && flag_gcse
4309 : 942027 : && !fun->calls_setjmp
4310 : 941360 : && optimize_function_for_speed_p (fun)
4311 : 2326282 : && dbg_cnt (pre);
4312 : : }
4313 : :
4314 : : } // anon namespace
4315 : :
4316 : : rtl_opt_pass *
4317 : 284263 : make_pass_rtl_pre (gcc::context *ctxt)
4318 : : {
4319 : 284263 : return new pass_rtl_pre (ctxt);
4320 : : }
4321 : :
4322 : : namespace {
4323 : :
4324 : : const pass_data pass_data_hardreg_pre =
4325 : : {
4326 : : RTL_PASS, /* type */
4327 : : "hardreg_pre", /* name */
4328 : : OPTGROUP_NONE, /* optinfo_flags */
4329 : : TV_PRE, /* tv_id */
4330 : : PROP_cfglayout, /* properties_required */
4331 : : 0, /* properties_provided */
4332 : : 0, /* properties_destroyed */
4333 : : 0, /* todo_flags_start */
4334 : : TODO_df_finish, /* todo_flags_finish */
4335 : : };
4336 : :
4337 : : class pass_hardreg_pre : public rtl_opt_pass
4338 : : {
4339 : : public:
4340 : 284263 : pass_hardreg_pre (gcc::context *ctxt)
4341 : 568526 : : rtl_opt_pass (pass_data_hardreg_pre, ctxt)
4342 : : {}
4343 : :
4344 : : /* opt_pass methods: */
4345 : : bool gate (function *) final override;
4346 : 0 : unsigned int execute (function *) final override
4347 : : {
4348 : 0 : return execute_hardreg_pre ();
4349 : : }
4350 : :
4351 : : }; // class pass_rtl_pre
4352 : :
4353 : : bool
4354 : 1446387 : pass_hardreg_pre::gate (function * ARG_UNUSED (fun))
4355 : : {
4356 : : #ifdef HARDREG_PRE_REGNOS
4357 : : return optimize > 0
4358 : : && !fun->calls_setjmp;
4359 : : #else
4360 : 1446387 : return false;
4361 : : #endif
4362 : : }
4363 : :
4364 : : } // anon namespace
4365 : :
4366 : : rtl_opt_pass *
4367 : 284263 : make_pass_hardreg_pre (gcc::context *ctxt)
4368 : : {
4369 : 284263 : return new pass_hardreg_pre (ctxt);
4370 : : }
4371 : :
4372 : : namespace {
4373 : :
4374 : : const pass_data pass_data_rtl_hoist =
4375 : : {
4376 : : RTL_PASS, /* type */
4377 : : "hoist", /* name */
4378 : : OPTGROUP_NONE, /* optinfo_flags */
4379 : : TV_HOIST, /* tv_id */
4380 : : PROP_cfglayout, /* properties_required */
4381 : : 0, /* properties_provided */
4382 : : 0, /* properties_destroyed */
4383 : : 0, /* todo_flags_start */
4384 : : TODO_df_finish, /* todo_flags_finish */
4385 : : };
4386 : :
4387 : : class pass_rtl_hoist : public rtl_opt_pass
4388 : : {
4389 : : public:
4390 : 284263 : pass_rtl_hoist (gcc::context *ctxt)
4391 : 568526 : : rtl_opt_pass (pass_data_rtl_hoist, ctxt)
4392 : : {}
4393 : :
4394 : : /* opt_pass methods: */
4395 : : bool gate (function *) final override;
4396 : 61465 : unsigned int execute (function *) final override
4397 : : {
4398 : 61465 : return execute_rtl_hoist ();
4399 : : }
4400 : :
4401 : : }; // class pass_rtl_hoist
4402 : :
4403 : : bool
4404 : 1446387 : pass_rtl_hoist::gate (function *)
4405 : : {
4406 : 1017698 : return optimize > 0 && flag_gcse
4407 : 942027 : && !cfun->calls_setjmp
4408 : : /* It does not make sense to run code hoisting unless we are optimizing
4409 : : for code size -- it rarely makes programs faster, and can make then
4410 : : bigger if we did PRE (when optimizing for space, we don't run PRE). */
4411 : 941360 : && optimize_function_for_size_p (cfun)
4412 : 1507852 : && dbg_cnt (hoist);
4413 : : }
4414 : :
4415 : : } // anon namespace
4416 : :
4417 : : rtl_opt_pass *
4418 : 284263 : make_pass_rtl_hoist (gcc::context *ctxt)
4419 : : {
4420 : 284263 : return new pass_rtl_hoist (ctxt);
4421 : : }
4422 : :
4423 : : /* Reset all state within gcse.cc so that we can rerun the compiler
4424 : : within the same process. For use by toplev::finalize. */
4425 : :
4426 : : void
4427 : 255915 : gcse_cc_finalize (void)
4428 : : {
4429 : 255915 : test_insn = NULL;
4430 : 255915 : }
4431 : :
4432 : : #include "gt-gcse.h"
|