Branch data Line data Source code
1 : : /* Partial redundancy elimination / Hoisting for RTL.
2 : : Copyright (C) 1997-2025 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it under
7 : : the terms of the GNU General Public License as published by the Free
8 : : Software Foundation; either version 3, or (at your option) any later
9 : : version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : /* TODO
21 : : - reordering of memory allocation and freeing to be more space efficient
22 : : - calc rough register pressure information and use the info to drive all
23 : : kinds of code motion (including code hoisting) in a unified way.
24 : : */
25 : :
26 : : /* References searched while implementing this.
27 : :
28 : : Compilers Principles, Techniques and Tools
29 : : Aho, Sethi, Ullman
30 : : Addison-Wesley, 1988
31 : :
32 : : Global Optimization by Suppression of Partial Redundancies
33 : : E. Morel, C. Renvoise
34 : : communications of the acm, Vol. 22, Num. 2, Feb. 1979
35 : :
36 : : A Portable Machine-Independent Global Optimizer - Design and Measurements
37 : : Frederick Chow
38 : : Stanford Ph.D. thesis, Dec. 1983
39 : :
40 : : A Fast Algorithm for Code Movement Optimization
41 : : D.M. Dhamdhere
42 : : SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
43 : :
44 : : A Solution to a Problem with Morel and Renvoise's
45 : : Global Optimization by Suppression of Partial Redundancies
46 : : K-H Drechsler, M.P. Stadel
47 : : ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
48 : :
49 : : Practical Adaptation of the Global Optimization
50 : : Algorithm of Morel and Renvoise
51 : : D.M. Dhamdhere
52 : : ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
53 : :
54 : : Efficiently Computing Static Single Assignment Form and the Control
55 : : Dependence Graph
56 : : R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
57 : : ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
58 : :
59 : : Lazy Code Motion
60 : : J. Knoop, O. Ruthing, B. Steffen
61 : : ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
62 : :
63 : : What's In a Region? Or Computing Control Dependence Regions in Near-Linear
64 : : Time for Reducible Flow Control
65 : : Thomas Ball
66 : : ACM Letters on Programming Languages and Systems,
67 : : Vol. 2, Num. 1-4, Mar-Dec 1993
68 : :
69 : : An Efficient Representation for Sparse Sets
70 : : Preston Briggs, Linda Torczon
71 : : ACM Letters on Programming Languages and Systems,
72 : : Vol. 2, Num. 1-4, Mar-Dec 1993
73 : :
74 : : A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
75 : : K-H Drechsler, M.P. Stadel
76 : : ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
77 : :
78 : : Partial Dead Code Elimination
79 : : J. Knoop, O. Ruthing, B. Steffen
80 : : ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
81 : :
82 : : Effective Partial Redundancy Elimination
83 : : P. Briggs, K.D. Cooper
84 : : ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
85 : :
86 : : The Program Structure Tree: Computing Control Regions in Linear Time
87 : : R. Johnson, D. Pearson, K. Pingali
88 : : ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
89 : :
90 : : Optimal Code Motion: Theory and Practice
91 : : J. Knoop, O. Ruthing, B. Steffen
92 : : ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
93 : :
94 : : The power of assignment motion
95 : : J. Knoop, O. Ruthing, B. Steffen
96 : : ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
97 : :
98 : : Global code motion / global value numbering
99 : : C. Click
100 : : ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
101 : :
102 : : Value Driven Redundancy Elimination
103 : : L.T. Simpson
104 : : Rice University Ph.D. thesis, Apr. 1996
105 : :
106 : : Value Numbering
107 : : L.T. Simpson
108 : : Massively Scalar Compiler Project, Rice University, Sep. 1996
109 : :
110 : : High Performance Compilers for Parallel Computing
111 : : Michael Wolfe
112 : : Addison-Wesley, 1996
113 : :
114 : : Advanced Compiler Design and Implementation
115 : : Steven Muchnick
116 : : Morgan Kaufmann, 1997
117 : :
118 : : Building an Optimizing Compiler
119 : : Robert Morgan
120 : : Digital Press, 1998
121 : :
122 : : People wishing to speed up the code here should read:
123 : : Elimination Algorithms for Data Flow Analysis
124 : : B.G. Ryder, M.C. Paull
125 : : ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
126 : :
127 : : How to Analyze Large Programs Efficiently and Informatively
128 : : D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
129 : : ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
130 : :
131 : : People wishing to do something different can find various possibilities
132 : : in the above papers and elsewhere.
133 : : */
134 : :
135 : : #include "config.h"
136 : : #include "system.h"
137 : : #include "coretypes.h"
138 : : #include "backend.h"
139 : : #include "target.h"
140 : : #include "rtl.h"
141 : : #include "tree.h"
142 : : #include "predict.h"
143 : : #include "df.h"
144 : : #include "memmodel.h"
145 : : #include "tm_p.h"
146 : : #include "insn-config.h"
147 : : #include "print-rtl.h"
148 : : #include "regs.h"
149 : : #include "ira.h"
150 : : #include "recog.h"
151 : : #include "diagnostic-core.h"
152 : : #include "cfgrtl.h"
153 : : #include "cfganal.h"
154 : : #include "lcm.h"
155 : : #include "cfgcleanup.h"
156 : : #include "expr.h"
157 : : #include "intl.h"
158 : : #include "tree-pass.h"
159 : : #include "dbgcnt.h"
160 : : #include "gcse.h"
161 : : #include "gcse-common.h"
162 : : #include "function-abi.h"
163 : :
164 : : /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
165 : : are a superset of those done by classic GCSE.
166 : :
167 : : Two passes of copy/constant propagation are done around PRE or hoisting
168 : : because the first one enables more GCSE and the second one helps to clean
169 : : up the copies that PRE and HOIST create. This is needed more for PRE than
170 : : for HOIST because code hoisting will try to use an existing register
171 : : containing the common subexpression rather than create a new one. This is
172 : : harder to do for PRE because of the code motion (which HOIST doesn't do).
173 : :
174 : : Expressions we are interested in GCSE-ing are of the form
175 : : (set (pseudo-reg) (expression)).
176 : : Function want_to_gcse_p says what these are.
177 : :
178 : : In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
179 : : This allows PRE to hoist expressions that are expressed in multiple insns,
180 : : such as complex address calculations (e.g. for PIC code, or loads with a
181 : : high part and a low part).
182 : :
183 : : PRE handles moving invariant expressions out of loops (by treating them as
184 : : partially redundant).
185 : :
186 : : **********************
187 : :
188 : : We used to support multiple passes but there are diminishing returns in
189 : : doing so. The first pass usually makes 90% of the changes that are doable.
190 : : A second pass can make a few more changes made possible by the first pass.
191 : : Experiments show any further passes don't make enough changes to justify
192 : : the expense.
193 : :
194 : : A study of spec92 using an unlimited number of passes:
195 : : [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
196 : : [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
197 : : [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
198 : :
199 : : It was found doing copy propagation between each pass enables further
200 : : substitutions.
201 : :
202 : : This study was done before expressions in REG_EQUAL notes were added as
203 : : candidate expressions for optimization, and before the GIMPLE optimizers
204 : : were added. Probably, multiple passes is even less efficient now than
205 : : at the time when the study was conducted.
206 : :
207 : : PRE is quite expensive in complicated functions because the DFA can take
208 : : a while to converge. Hence we only perform one pass.
209 : :
210 : : **********************
211 : :
212 : : The steps for PRE are:
213 : :
214 : : 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
215 : :
216 : : 2) Perform the data flow analysis for PRE.
217 : :
218 : : 3) Delete the redundant instructions
219 : :
220 : : 4) Insert the required copies [if any] that make the partially
221 : : redundant instructions fully redundant.
222 : :
223 : : 5) For other reaching expressions, insert an instruction to copy the value
224 : : to a newly created pseudo that will reach the redundant instruction.
225 : :
226 : : The deletion is done first so that when we do insertions we
227 : : know which pseudo reg to use.
228 : :
229 : : Various papers have argued that PRE DFA is expensive (O(n^2)) and others
230 : : argue it is not. The number of iterations for the algorithm to converge
231 : : is typically 2-4 so I don't view it as that expensive (relatively speaking).
232 : :
233 : : PRE GCSE depends heavily on the second CPROP pass to clean up the copies
234 : : we create. To make an expression reach the place where it's redundant,
235 : : the result of the expression is copied to a new register, and the redundant
236 : : expression is deleted by replacing it with this new register. Classic GCSE
237 : : doesn't have this problem as much as it computes the reaching defs of
238 : : each register in each block and thus can try to use an existing
239 : : register. */
240 : :
241 : : /* GCSE global vars. */
242 : :
243 : : struct target_gcse default_target_gcse;
244 : : #if SWITCHABLE_TARGET
245 : : struct target_gcse *this_target_gcse = &default_target_gcse;
246 : : #endif
247 : :
248 : : /* Set to non-zero if CSE should run after all GCSE optimizations are done. */
249 : : int flag_rerun_cse_after_global_opts;
250 : :
251 : : /* An obstack for our working variables. */
252 : : static struct obstack gcse_obstack;
253 : :
254 : : /* Hash table of expressions. */
255 : :
256 : : struct gcse_expr
257 : : {
258 : : /* The expression. */
259 : : rtx expr;
260 : : /* Index in the available expression bitmaps. */
261 : : int bitmap_index;
262 : : /* Next entry with the same hash. */
263 : : struct gcse_expr *next_same_hash;
264 : : /* List of anticipatable occurrences in basic blocks in the function.
265 : : An "anticipatable occurrence" is one that is the first occurrence in the
266 : : basic block, the operands are not modified in the basic block prior
267 : : to the occurrence and the output is not used between the start of
268 : : the block and the occurrence. */
269 : : struct gcse_occr *antic_occr;
270 : : /* List of available occurrence in basic blocks in the function.
271 : : An "available occurrence" is one that is the last occurrence in the
272 : : basic block and the operands are not modified by following statements in
273 : : the basic block [including this insn]. */
274 : : struct gcse_occr *avail_occr;
275 : : /* Non-null if the computation is PRE redundant.
276 : : The value is the newly created pseudo-reg to record a copy of the
277 : : expression in all the places that reach the redundant copy. */
278 : : rtx reaching_reg;
279 : : /* Maximum distance in instructions this expression can travel.
280 : : We avoid moving simple expressions for more than a few instructions
281 : : to keep register pressure under control.
282 : : A value of "0" removes restrictions on how far the expression can
283 : : travel. */
284 : : HOST_WIDE_INT max_distance;
285 : : };
286 : :
287 : : /* Occurrence of an expression.
288 : : There is one per basic block. If a pattern appears more than once the
289 : : last appearance is used [or first for anticipatable expressions]. */
290 : :
291 : : struct gcse_occr
292 : : {
293 : : /* Next occurrence of this expression. */
294 : : struct gcse_occr *next;
295 : : /* The insn that computes the expression. */
296 : : rtx_insn *insn;
297 : : /* Nonzero if this [anticipatable] occurrence has been deleted. */
298 : : char deleted_p;
299 : : /* Nonzero if this [available] occurrence has been copied to
300 : : reaching_reg. */
301 : : /* ??? This is mutually exclusive with deleted_p, so they could share
302 : : the same byte. */
303 : : char copied_p;
304 : : };
305 : :
306 : : typedef struct gcse_occr *occr_t;
307 : :
308 : : /* Expression hash tables.
309 : : Each hash table is an array of buckets.
310 : : ??? It is known that if it were an array of entries, structure elements
311 : : `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
312 : : not clear whether in the final analysis a sufficient amount of memory would
313 : : be saved as the size of the available expression bitmaps would be larger
314 : : [one could build a mapping table without holes afterwards though].
315 : : Someday I'll perform the computation and figure it out. */
316 : :
317 : : struct gcse_hash_table_d
318 : : {
319 : : /* The table itself.
320 : : This is an array of `expr_hash_table_size' elements. */
321 : : struct gcse_expr **table;
322 : :
323 : : /* Size of the hash table, in elements. */
324 : : unsigned int size;
325 : :
326 : : /* Number of hash table elements. */
327 : : unsigned int n_elems;
328 : : };
329 : :
330 : : /* Expression hash table. */
331 : : static struct gcse_hash_table_d expr_hash_table;
332 : :
333 : : /* This is a list of expressions which are MEMs and will be used by load
334 : : or store motion.
335 : : Load motion tracks MEMs which aren't killed by anything except itself,
336 : : i.e. loads and stores to a single location.
337 : : We can then allow movement of these MEM refs with a little special
338 : : allowance. (all stores copy the same value to the reaching reg used
339 : : for the loads). This means all values used to store into memory must have
340 : : no side effects so we can re-issue the setter value. */
341 : :
342 : : struct ls_expr
343 : : {
344 : : struct gcse_expr * expr; /* Gcse expression reference for LM. */
345 : : rtx pattern; /* Pattern of this mem. */
346 : : rtx pattern_regs; /* List of registers mentioned by the mem. */
347 : : vec<rtx_insn *> stores; /* INSN list of stores seen. */
348 : : struct ls_expr * next; /* Next in the list. */
349 : : int invalid; /* Invalid for some reason. */
350 : : int index; /* If it maps to a bitmap index. */
351 : : unsigned int hash_index; /* Index when in a hash table. */
352 : : rtx reaching_reg; /* Register to use when re-writing. */
353 : : };
354 : :
355 : : /* Head of the list of load/store memory refs. */
356 : : static struct ls_expr * pre_ldst_mems = NULL;
357 : :
358 : : struct pre_ldst_expr_hasher : nofree_ptr_hash <ls_expr>
359 : : {
360 : : typedef value_type compare_type;
361 : : static inline hashval_t hash (const ls_expr *);
362 : : static inline bool equal (const ls_expr *, const ls_expr *);
363 : : };
364 : :
365 : : /* Hashtable helpers. */
366 : : inline hashval_t
367 : 100486946 : pre_ldst_expr_hasher::hash (const ls_expr *x)
368 : : {
369 : 100486946 : int do_not_record_p = 0;
370 : 100486946 : return
371 : 100486946 : hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
372 : : }
373 : :
374 : : static bool expr_equiv_p (const_rtx, const_rtx);
375 : :
376 : : inline bool
377 : 119872351 : pre_ldst_expr_hasher::equal (const ls_expr *ptr1,
378 : : const ls_expr *ptr2)
379 : : {
380 : 119872351 : return expr_equiv_p (ptr1->pattern, ptr2->pattern);
381 : : }
382 : :
383 : : /* Hashtable for the load/store memory refs. */
384 : : static hash_table<pre_ldst_expr_hasher> *pre_ldst_table;
385 : :
386 : : /* Bitmap containing one bit for each register in the program.
387 : : Used when performing GCSE to track which registers have been set since
388 : : the start of the basic block. */
389 : : static regset reg_set_bitmap;
390 : :
391 : : /* Array, indexed by basic block number for a list of insns which modify
392 : : memory within that block. */
393 : : static vec<rtx_insn *> *modify_mem_list;
394 : : static bitmap modify_mem_list_set;
395 : :
396 : : /* This array parallels modify_mem_list, except that it stores MEMs
397 : : being set and their canonicalized memory addresses. */
398 : : static vec<modify_pair> *canon_modify_mem_list;
399 : :
400 : : /* Bitmap indexed by block numbers to record which blocks contain
401 : : function calls. */
402 : : static bitmap blocks_with_calls;
403 : :
404 : : /* Various variables for statistics gathering. */
405 : :
406 : : /* Memory used in a pass.
407 : : This isn't intended to be absolutely precise. Its intent is only
408 : : to keep an eye on memory usage. */
409 : : static int bytes_used;
410 : :
411 : : /* GCSE substitutions made. */
412 : : static int gcse_subst_count;
413 : : /* Number of copy instructions created. */
414 : : static int gcse_create_count;
415 : :
416 : : /* Doing code hoisting. */
417 : : static bool doing_code_hoisting_p = false;
418 : :
419 : : /* Doing hardreg_pre. */
420 : : static bool doing_hardreg_pre_p = false;
421 : :
422 : : inline bool
423 : 22186667 : do_load_motion ()
424 : : {
425 : 22186625 : return flag_gcse_lm && !doing_hardreg_pre_p;
426 : : }
427 : :
428 : : static unsigned int current_hardreg_regno;
429 : :
430 : : /* For available exprs */
431 : : static sbitmap *ae_kill;
432 : :
433 : : /* Data stored for each basic block. */
434 : : struct bb_data
435 : : {
436 : : /* Maximal register pressure inside basic block for given register class
437 : : (defined only for the pressure classes). */
438 : : int max_reg_pressure[N_REG_CLASSES];
439 : : /* Recorded register pressure of basic block before trying to hoist
440 : : an expression. Will be used to restore the register pressure
441 : : if the expression should not be hoisted. */
442 : : int old_pressure;
443 : : /* Recorded register live_in info of basic block during code hoisting
444 : : process. BACKUP is used to record live_in info before trying to
445 : : hoist an expression, and will be used to restore LIVE_IN if the
446 : : expression should not be hoisted. */
447 : : bitmap live_in, backup;
448 : : };
449 : :
450 : : #define BB_DATA(bb) ((struct bb_data *) (bb)->aux)
451 : :
452 : : static basic_block curr_bb;
453 : :
454 : : /* Current register pressure for each pressure class. */
455 : : static int curr_reg_pressure[N_REG_CLASSES];
456 : :
457 : :
458 : : static void compute_can_copy (void);
459 : : static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
460 : : static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
461 : : static void *gcse_alloc (unsigned long);
462 : : static void alloc_gcse_mem (void);
463 : : static void free_gcse_mem (void);
464 : : static void hash_scan_insn (rtx_insn *, struct gcse_hash_table_d *);
465 : : static void hash_scan_set (rtx, rtx_insn *, struct gcse_hash_table_d *);
466 : : static void hash_scan_clobber (rtx, rtx_insn *, struct gcse_hash_table_d *);
467 : : static void hash_scan_call (rtx, rtx_insn *, struct gcse_hash_table_d *);
468 : : static bool oprs_unchanged_p (const_rtx, const rtx_insn *, bool);
469 : : static bool oprs_anticipatable_p (const_rtx, const rtx_insn *);
470 : : static bool oprs_available_p (const_rtx, const rtx_insn *);
471 : : static void insert_expr_in_table (rtx, machine_mode, rtx_insn *, bool, bool,
472 : : HOST_WIDE_INT, struct gcse_hash_table_d *);
473 : : static unsigned int hash_expr (const_rtx, machine_mode, int *, int);
474 : : static void record_last_reg_set_info (rtx_insn *, int);
475 : : static void record_last_mem_set_info (rtx_insn *);
476 : : static void record_last_set_info (rtx, const_rtx, void *);
477 : : static void compute_hash_table (struct gcse_hash_table_d *);
478 : : static void alloc_hash_table (struct gcse_hash_table_d *);
479 : : static void free_hash_table (struct gcse_hash_table_d *);
480 : : static void compute_hash_table_work (struct gcse_hash_table_d *);
481 : : static void dump_hash_table (FILE *, const char *, struct gcse_hash_table_d *);
482 : : static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
483 : : struct gcse_hash_table_d *);
484 : : static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
485 : : static bool load_killed_in_block_p (const_basic_block, int, const_rtx, bool);
486 : : static void alloc_pre_mem (int, int);
487 : : static void free_pre_mem (void);
488 : : static struct edge_list *compute_pre_data (void);
489 : : static bool pre_expr_reaches_here_p (basic_block, struct gcse_expr *,
490 : : basic_block);
491 : : static void insert_insn_end_basic_block (struct gcse_expr *, basic_block);
492 : : static void pre_insert_copy_insn (struct gcse_expr *, rtx_insn *);
493 : : static void pre_insert_copies (void);
494 : : static bool pre_delete (void);
495 : : static bool pre_gcse (struct edge_list *);
496 : : static bool one_pre_gcse_pass (void);
497 : : static void add_label_notes (rtx, rtx_insn *);
498 : : static void alloc_code_hoist_mem (int, int);
499 : : static void free_code_hoist_mem (void);
500 : : static void compute_code_hoist_vbeinout (void);
501 : : static void compute_code_hoist_data (void);
502 : : static bool should_hoist_expr_to_dom (basic_block, struct gcse_expr *,
503 : : basic_block,
504 : : sbitmap, HOST_WIDE_INT, int *,
505 : : enum reg_class,
506 : : int *, bitmap, rtx_insn *);
507 : : static bool hoist_code (void);
508 : : static enum reg_class get_regno_pressure_class (int regno, int *nregs);
509 : : static enum reg_class get_pressure_class_and_nregs (rtx_insn *insn, int *nregs);
510 : : static bool one_code_hoisting_pass (void);
511 : : static rtx_insn *process_insert_insn (struct gcse_expr *);
512 : : static bool pre_edge_insert (struct edge_list *, struct gcse_expr **);
513 : : static bool pre_expr_reaches_here_p_work (basic_block, struct gcse_expr *,
514 : : basic_block, char *);
515 : : static struct ls_expr * ldst_entry (rtx);
516 : : static void free_ldst_entry (struct ls_expr *);
517 : : static void free_ld_motion_mems (void);
518 : : static void print_ldst_list (FILE *);
519 : : static struct ls_expr * find_rtx_in_ldst (rtx);
520 : : static bool simple_mem (const_rtx);
521 : : static void invalidate_any_buried_refs (rtx);
522 : : static void compute_ld_motion_mems (void);
523 : : static void trim_ld_motion_mems (void);
524 : : static void update_ld_motion_stores (struct gcse_expr *);
525 : : static void clear_modify_mem_tables (void);
526 : : static void free_modify_mem_tables (void);
527 : :
528 : : #define GNEW(T) ((T *) gmalloc (sizeof (T)))
529 : : #define GCNEW(T) ((T *) gcalloc (1, sizeof (T)))
530 : :
531 : : #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N)))
532 : : #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T)))
533 : :
534 : : #define GNEWVAR(T, S) ((T *) gmalloc ((S)))
535 : : #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S)))
536 : :
537 : : #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
538 : : #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
539 : :
540 : : /* Misc. utilities. */
541 : :
542 : : #define can_copy \
543 : : (this_target_gcse->x_can_copy)
544 : : #define can_copy_init_p \
545 : : (this_target_gcse->x_can_copy_init_p)
546 : :
547 : : /* Compute which modes support reg/reg copy operations. */
548 : :
549 : : static void
550 : 83733 : compute_can_copy (void)
551 : : {
552 : 83733 : int i;
553 : : #ifndef AVOID_CCMODE_COPIES
554 : : rtx reg;
555 : : rtx_insn *insn;
556 : : #endif
557 : 83733 : memset (can_copy, 0, NUM_MACHINE_MODES);
558 : :
559 : 83733 : start_sequence ();
560 : 11052756 : for (i = 0; i < NUM_MACHINE_MODES; i++)
561 : 10885290 : if (GET_MODE_CLASS (i) == MODE_CC)
562 : : {
563 : : #ifdef AVOID_CCMODE_COPIES
564 : 1004796 : can_copy[i] = 0;
565 : : #else
566 : : reg = gen_rtx_REG ((machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
567 : : insn = emit_insn (gen_rtx_SET (reg, reg));
568 : : if (recog (PATTERN (insn), insn, NULL) >= 0)
569 : : can_copy[i] = 1;
570 : : #endif
571 : : }
572 : : else
573 : 9880494 : can_copy[i] = 1;
574 : :
575 : 83733 : end_sequence ();
576 : 83733 : }
577 : :
578 : : /* Returns whether the mode supports reg/reg copy operations. */
579 : :
580 : : bool
581 : 101023958 : can_copy_p (machine_mode mode)
582 : : {
583 : 101023958 : if (! can_copy_init_p)
584 : : {
585 : 83733 : compute_can_copy ();
586 : 83733 : can_copy_init_p = true;
587 : : }
588 : :
589 : 101023958 : return can_copy[mode] != 0;
590 : : }
591 : :
592 : : /* Cover function to xmalloc to record bytes allocated. */
593 : :
594 : : static void *
595 : 1007132 : gmalloc (size_t size)
596 : : {
597 : 1007132 : bytes_used += size;
598 : 0 : return xmalloc (size);
599 : : }
600 : :
601 : : /* Cover function to xcalloc to record bytes allocated. */
602 : :
603 : : static void *
604 : 1838306 : gcalloc (size_t nelem, size_t elsize)
605 : : {
606 : 1838306 : bytes_used += nelem * elsize;
607 : 1838306 : return xcalloc (nelem, elsize);
608 : : }
609 : :
610 : : /* Cover function to obstack_alloc. */
611 : :
612 : : static void *
613 : 25924241 : gcse_alloc (unsigned long size)
614 : : {
615 : 25924241 : bytes_used += size;
616 : 25924241 : return obstack_alloc (&gcse_obstack, size);
617 : : }
618 : :
619 : : /* Allocate memory for the reg/memory set tracking tables.
620 : : This is called at the start of each pass. */
621 : :
622 : : static void
623 : 503566 : alloc_gcse_mem (void)
624 : : {
625 : : /* Allocate vars to track sets of regs. */
626 : 503566 : reg_set_bitmap = ALLOC_REG_SET (NULL);
627 : :
628 : : /* Allocate array to keep a list of insns which modify memory in each
629 : : basic block. The two typedefs are needed to work around the
630 : : pre-processor limitation with template types in macro arguments. */
631 : 503566 : typedef vec<rtx_insn *> vec_rtx_heap;
632 : 503566 : typedef vec<modify_pair> vec_modify_pair_heap;
633 : 503566 : modify_mem_list = GCNEWVEC (vec_rtx_heap, last_basic_block_for_fn (cfun));
634 : 503566 : canon_modify_mem_list = GCNEWVEC (vec_modify_pair_heap,
635 : : last_basic_block_for_fn (cfun));
636 : 503566 : modify_mem_list_set = BITMAP_ALLOC (NULL);
637 : 503566 : blocks_with_calls = BITMAP_ALLOC (NULL);
638 : 503566 : }
639 : :
640 : : /* Free memory allocated by alloc_gcse_mem. */
641 : :
642 : : static void
643 : 503566 : free_gcse_mem (void)
644 : : {
645 : 503566 : FREE_REG_SET (reg_set_bitmap);
646 : :
647 : 503566 : free_modify_mem_tables ();
648 : 503566 : BITMAP_FREE (modify_mem_list_set);
649 : 503566 : BITMAP_FREE (blocks_with_calls);
650 : 503566 : }
651 : :
652 : : /* Compute the local properties of each recorded expression.
653 : :
654 : : Local properties are those that are defined by the block, irrespective of
655 : : other blocks.
656 : :
657 : : An expression is transparent in a block if its operands are not modified
658 : : in the block.
659 : :
660 : : An expression is computed (locally available) in a block if it is computed
661 : : at least once and expression would contain the same value if the
662 : : computation was moved to the end of the block.
663 : :
664 : : An expression is locally anticipatable in a block if it is computed at
665 : : least once and expression would contain the same value if the computation
666 : : was moved to the beginning of the block.
667 : :
668 : : We call this routine for pre and code hoisting. They all compute
669 : : basically the same information and thus can easily share this code.
670 : :
671 : : TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
672 : : properties. If NULL, then it is not necessary to compute or record that
673 : : particular property.
674 : :
675 : : TABLE controls which hash table to look at. */
676 : :
677 : : static void
678 : 440774 : compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
679 : : struct gcse_hash_table_d *table)
680 : : {
681 : 440774 : unsigned int i;
682 : :
683 : : /* Initialize any bitmaps that were passed in. */
684 : 440774 : if (transp)
685 : : {
686 : 440774 : bitmap_vector_ones (transp, last_basic_block_for_fn (cfun));
687 : : }
688 : :
689 : 440774 : if (comp)
690 : 440774 : bitmap_vector_clear (comp, last_basic_block_for_fn (cfun));
691 : 440774 : if (antloc)
692 : 440774 : bitmap_vector_clear (antloc, last_basic_block_for_fn (cfun));
693 : :
694 : 21266016 : for (i = 0; i < table->size; i++)
695 : : {
696 : 20825242 : struct gcse_expr *expr;
697 : :
698 : 30675787 : for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
699 : : {
700 : 9850545 : int indx = expr->bitmap_index;
701 : 9850545 : struct gcse_occr *occr;
702 : :
703 : : /* In most cases, the expression is transparent in the block if it is
704 : : not killed. The exception to this is during hardreg PRE, in which
705 : : uses of the hardreg prevent transparency but do not kill the
706 : : expression.
707 : :
708 : : We start by assuming all expressions are transparent [none are
709 : : killed], and then reset the bits for those that are. */
710 : 9850545 : if (transp)
711 : : {
712 : 9850545 : compute_transp (expr->expr, indx, transp,
713 : : blocks_with_calls,
714 : : modify_mem_list_set,
715 : : canon_modify_mem_list);
716 : :
717 : 9850545 : if (doing_hardreg_pre_p)
718 : : {
719 : : /* We also need to check whether the destination hardreg is
720 : : set or call-clobbered in each BB. We'll check for hardreg
721 : : uses later. */
722 : 0 : df_ref def;
723 : 0 : for (def = DF_REG_DEF_CHAIN (current_hardreg_regno);
724 : 0 : def;
725 : 0 : def = DF_REF_NEXT_REG (def))
726 : 0 : bitmap_clear_bit (transp[DF_REF_BB (def)->index], indx);
727 : : }
728 : : }
729 : :
730 : : /* The occurrences recorded in antic_occr are exactly those that
731 : : we want to set to nonzero in ANTLOC. */
732 : 9850545 : if (antloc)
733 : 16926350 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
734 : : {
735 : 7075805 : bitmap_set_bit (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
736 : :
737 : : /* While we're scanning the table, this is a good place to
738 : : initialize this. */
739 : 7075805 : occr->deleted_p = 0;
740 : : }
741 : :
742 : : /* The occurrences recorded in avail_occr are exactly those that
743 : : we want to set to nonzero in COMP. */
744 : 9850545 : if (comp)
745 : 18848436 : for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
746 : : {
747 : 8997891 : bitmap_set_bit (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
748 : :
749 : : /* While we're scanning the table, this is a good place to
750 : : initialize this. */
751 : 8997891 : occr->copied_p = 0;
752 : : }
753 : :
754 : : /* While we're scanning the table, this is a good place to
755 : : initialize this. */
756 : 9850545 : expr->reaching_reg = 0;
757 : : }
758 : : }
759 : 440774 : }
760 : :
761 : : /* A hardreg set is not transparent in a block if there are any uses of that
762 : : hardreg. This filters the results of compute_local_properties, after the
763 : : result of that function has been used to define the kills bitmap.
764 : :
765 : : TRANSP is the destination sbitmap to be updated.
766 : :
767 : : TABLE controls which hash table to look at. */
768 : :
769 : : static void
770 : 0 : prune_hardreg_uses (sbitmap *transp, struct gcse_hash_table_d *table)
771 : : {
772 : 0 : unsigned int i;
773 : 0 : gcc_assert (doing_hardreg_pre_p);
774 : :
775 : 0 : for (i = 0; i < table->size; i++)
776 : : {
777 : 0 : struct gcse_expr *expr;
778 : :
779 : 0 : for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
780 : : {
781 : 0 : int indx = expr->bitmap_index;
782 : 0 : df_ref def;
783 : :
784 : 0 : for (def = DF_REG_USE_CHAIN (current_hardreg_regno);
785 : 0 : def;
786 : 0 : def = DF_REF_NEXT_REG (def))
787 : 0 : bitmap_clear_bit (transp[DF_REF_BB (def)->index], indx);
788 : : }
789 : : }
790 : 0 : }
791 : :
792 : : /* Hash table support. */
793 : :
794 : : struct reg_avail_info
795 : : {
796 : : basic_block last_bb;
797 : : int first_set;
798 : : int last_set;
799 : : };
800 : :
801 : : static struct reg_avail_info *reg_avail_info;
802 : : static basic_block current_bb;
803 : :
804 : : /* See whether X, the source of a set, is something we want to consider for
805 : : GCSE. */
806 : :
807 : : static bool
808 : 21940007 : want_to_gcse_p (rtx x, machine_mode mode, HOST_WIDE_INT *max_distance_ptr)
809 : : {
810 : : #ifdef STACK_REGS
811 : : /* On register stack architectures, don't GCSE constants from the
812 : : constant pool, as the benefits are often swamped by the overhead
813 : : of shuffling the register stack between basic blocks. */
814 : 21940007 : if (IS_STACK_MODE (GET_MODE (x)))
815 : 268956 : x = avoid_constant_pool_reference (x);
816 : : #endif
817 : :
818 : : /* GCSE'ing constants:
819 : :
820 : : We do not specifically distinguish between constant and non-constant
821 : : expressions in PRE and Hoist. We use set_src_cost below to limit
822 : : the maximum distance simple expressions can travel.
823 : :
824 : : Nevertheless, constants are much easier to GCSE, and, hence,
825 : : it is easy to overdo the optimizations. Usually, excessive PRE and
826 : : Hoisting of constant leads to increased register pressure.
827 : :
828 : : RA can deal with this by rematerialing some of the constants.
829 : : Therefore, it is important that the back-end generates sets of constants
830 : : in a way that allows reload rematerialize them under high register
831 : : pressure, i.e., a pseudo register with REG_EQUAL to constant
832 : : is set only once. Failing to do so will result in IRA/reload
833 : : spilling such constants under high register pressure instead of
834 : : rematerializing them.
835 : :
836 : : For hardreg PRE, register pressure is not a concern, and we also want to
837 : : apply GCSE to simple moves. */
838 : :
839 : 21940007 : switch (GET_CODE (x))
840 : : {
841 : 3253719 : case REG:
842 : 3253719 : case SUBREG:
843 : 3253719 : return doing_hardreg_pre_p;
844 : :
845 : : case CALL:
846 : : return false;
847 : :
848 : 2713877 : CASE_CONST_ANY:
849 : 2713877 : if (doing_hardreg_pre_p)
850 : : return true;
851 : 2713877 : else if (!doing_code_hoisting_p)
852 : : /* Do not PRE constants. */
853 : : return false;
854 : :
855 : : /* FALLTHRU */
856 : :
857 : 16106276 : default:
858 : 16106276 : if (doing_code_hoisting_p)
859 : : /* PRE doesn't implement max_distance restriction. */
860 : : {
861 : 626735 : int cost;
862 : 626735 : HOST_WIDE_INT max_distance;
863 : :
864 : 626735 : gcc_assert (!optimize_function_for_speed_p (cfun)
865 : : && optimize_function_for_size_p (cfun));
866 : 626735 : cost = set_src_cost (x, mode, 0);
867 : :
868 : 626735 : if (cost < COSTS_N_INSNS (param_gcse_unrestricted_cost))
869 : : {
870 : 595155 : max_distance
871 : 595155 : = ((HOST_WIDE_INT)param_gcse_cost_distance_ratio * cost) / 10;
872 : 595155 : if (max_distance == 0)
873 : : return false;
874 : :
875 : 530976 : gcc_assert (max_distance > 0);
876 : : }
877 : : else
878 : : max_distance = 0;
879 : :
880 : 562556 : if (max_distance_ptr)
881 : 504784 : *max_distance_ptr = max_distance;
882 : : }
883 : :
884 : 16042097 : return can_assign_to_reg_without_clobbers_p (x, mode);
885 : : }
886 : : }
887 : :
888 : : /* Used internally by can_assign_to_reg_without_clobbers_p. */
889 : :
890 : : static GTY(()) rtx_insn *test_insn;
891 : :
892 : : /* Return true if we can assign X to a pseudo register of mode MODE
893 : : such that the resulting insn does not result in clobbering a hard
894 : : register as a side-effect.
895 : :
896 : : Additionally, if the target requires it, check that the resulting insn
897 : : can be copied. If it cannot, this means that X is special and probably
898 : : has hidden side-effects we don't want to mess with.
899 : :
900 : : This function is typically used by code motion passes, to verify
901 : : that it is safe to insert an insn without worrying about clobbering
902 : : maybe live hard regs. */
903 : :
904 : : bool
905 : 20492283 : can_assign_to_reg_without_clobbers_p (rtx x, machine_mode mode)
906 : : {
907 : 20492283 : int num_clobbers = 0;
908 : 20492283 : int icode;
909 : 20492283 : bool can_assign = false;
910 : :
911 : : /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
912 : 20492283 : if (general_operand (x, mode))
913 : : return true;
914 : 9888007 : else if (GET_MODE (x) == VOIDmode)
915 : : return false;
916 : :
917 : : /* Otherwise, check if we can make a valid insn from it. First initialize
918 : : our test insn if we haven't already. */
919 : 9888007 : if (test_insn == 0)
920 : : {
921 : 67167 : test_insn
922 : 67167 : = make_insn_raw (gen_rtx_SET (gen_rtx_REG (word_mode,
923 : : FIRST_PSEUDO_REGISTER * 2),
924 : : const0_rtx));
925 : 67167 : SET_NEXT_INSN (test_insn) = SET_PREV_INSN (test_insn) = 0;
926 : 67167 : INSN_LOCATION (test_insn) = UNKNOWN_LOCATION;
927 : : }
928 : :
929 : : /* Now make an insn like the one we would make when GCSE'ing and see if
930 : : valid. */
931 : 9888007 : PUT_MODE (SET_DEST (PATTERN (test_insn)), mode);
932 : 9888007 : SET_SRC (PATTERN (test_insn)) = x;
933 : :
934 : 9888007 : icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
935 : :
936 : : /* If the test insn is valid and doesn't need clobbers, and the target also
937 : : has no objections, we're good. */
938 : 9888007 : if (icode >= 0
939 : 8949144 : && (num_clobbers == 0 || !added_clobbers_hard_reg_p (icode))
940 : 16726290 : && ! (targetm.cannot_copy_insn_p
941 : 0 : && targetm.cannot_copy_insn_p (test_insn)))
942 : : can_assign = true;
943 : :
944 : : /* Make sure test_insn doesn't have any pointers into GC space. */
945 : 9888007 : SET_SRC (PATTERN (test_insn)) = NULL_RTX;
946 : :
947 : 9888007 : return can_assign;
948 : : }
949 : :
950 : : /* Return true if the operands of expression X are unchanged from the
951 : : start of INSN's basic block up to but not including INSN
952 : : (if AVAIL_P == false), or from INSN to the end of INSN's basic block
953 : : (if AVAIL_P == true). */
954 : :
955 : : static bool
956 : 72589434 : oprs_unchanged_p (const_rtx x, const rtx_insn *insn, bool avail_p)
957 : : {
958 : 72589434 : int i, j;
959 : 72589434 : enum rtx_code code;
960 : 72589434 : const char *fmt;
961 : :
962 : 72589434 : if (x == 0)
963 : : return true;
964 : :
965 : 72589434 : code = GET_CODE (x);
966 : 72589434 : switch (code)
967 : : {
968 : 22510805 : case REG:
969 : 22510805 : {
970 : 22510805 : struct reg_avail_info *info = ®_avail_info[REGNO (x)];
971 : :
972 : 22510805 : if (info->last_bb != current_bb)
973 : : return true;
974 : 10927426 : if (avail_p)
975 : 5807225 : return info->last_set < DF_INSN_LUID (insn);
976 : : else
977 : 5120201 : return info->first_set >= DF_INSN_LUID (insn);
978 : : }
979 : :
980 : 11744791 : case MEM:
981 : 11744791 : if (! do_load_motion ()
982 : 11744769 : || load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
983 : : x, avail_p))
984 : 3571366 : return false;
985 : : else
986 : 8173425 : return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
987 : :
988 : : case PRE_DEC:
989 : : case PRE_INC:
990 : : case POST_DEC:
991 : : case POST_INC:
992 : : case PRE_MODIFY:
993 : : case POST_MODIFY:
994 : : return false;
995 : :
996 : : case PC:
997 : : case CONST:
998 : : CASE_CONST_ANY:
999 : : case SYMBOL_REF:
1000 : : case LABEL_REF:
1001 : : case ADDR_VEC:
1002 : : case ADDR_DIFF_VEC:
1003 : : return true;
1004 : :
1005 : 21182336 : default:
1006 : 21182336 : break;
1007 : : }
1008 : :
1009 : 38793021 : for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1010 : : {
1011 : 38358221 : if (fmt[i] == 'e')
1012 : : {
1013 : : /* If we are about to do the last recursive call needed at this
1014 : : level, change it into iteration. This function is called enough
1015 : : to be worth it. */
1016 : 37210373 : if (i == 0)
1017 : 19234933 : return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1018 : :
1019 : 17975440 : else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1020 : : return false;
1021 : : }
1022 : 1147848 : else if (fmt[i] == 'E')
1023 : 1975756 : for (j = 0; j < XVECLEN (x, i); j++)
1024 : 1541013 : if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1025 : : return false;
1026 : : }
1027 : :
1028 : : return true;
1029 : : }
1030 : :
1031 : : /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */
1032 : :
1033 : : struct mem_conflict_info
1034 : : {
1035 : : /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
1036 : : see if a memory store conflicts with this memory load. */
1037 : : const_rtx mem;
1038 : :
1039 : : /* True if mems_conflict_for_gcse_p finds a conflict between two memory
1040 : : references. */
1041 : : bool conflict;
1042 : : };
1043 : :
1044 : : /* DEST is the output of an instruction. If it is a memory reference and
1045 : : possibly conflicts with the load found in DATA, then communicate this
1046 : : information back through DATA. */
1047 : :
1048 : : static void
1049 : 13333636 : mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
1050 : : void *data)
1051 : : {
1052 : 13333636 : struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
1053 : :
1054 : 13333636 : while (GET_CODE (dest) == SUBREG
1055 : 13333636 : || GET_CODE (dest) == ZERO_EXTRACT
1056 : 26667272 : || GET_CODE (dest) == STRICT_LOW_PART)
1057 : 0 : dest = XEXP (dest, 0);
1058 : :
1059 : : /* If DEST is not a MEM, then it will not conflict with the load. Note
1060 : : that function calls are assumed to clobber memory, but are handled
1061 : : elsewhere. */
1062 : 13333636 : if (! MEM_P (dest))
1063 : : return;
1064 : :
1065 : : /* If we are setting a MEM in our list of specially recognized MEMs,
1066 : : don't mark as killed this time. */
1067 : 25984242 : if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
1068 : : {
1069 : 108059 : if (!find_rtx_in_ldst (dest))
1070 : 42074 : mci->conflict = true;
1071 : 108059 : return;
1072 : : }
1073 : :
1074 : 13083942 : if (true_dependence (dest, GET_MODE (dest), mci->mem))
1075 : 1413564 : mci->conflict = true;
1076 : : }
1077 : :
1078 : : /* Return true if the expression in X (a memory reference) is killed
1079 : : in block BB before or after the insn with the LUID in UID_LIMIT.
1080 : : AVAIL_P is true for kills after UID_LIMIT, and zero for kills
1081 : : before UID_LIMIT.
1082 : :
1083 : : To check the entire block, set UID_LIMIT to max_uid + 1 and
1084 : : AVAIL_P to false. */
1085 : :
1086 : : static bool
1087 : 11744769 : load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
1088 : : bool avail_p)
1089 : : {
1090 : 11744769 : vec<rtx_insn *> list = modify_mem_list[bb->index];
1091 : 11744769 : rtx_insn *setter;
1092 : 11744769 : unsigned ix;
1093 : :
1094 : : /* If this is a readonly then we aren't going to be changing it. */
1095 : 11744769 : if (MEM_READONLY_P (x))
1096 : : return false;
1097 : :
1098 : 51953785 : FOR_EACH_VEC_ELT_REVERSE (list, ix, setter)
1099 : : {
1100 : 38277748 : struct mem_conflict_info mci;
1101 : :
1102 : : /* Ignore entries in the list that do not apply. */
1103 : 61247810 : if ((avail_p
1104 : 15829717 : && DF_INSN_LUID (setter) < uid_limit)
1105 : 38277748 : || (! avail_p
1106 : 22448031 : && DF_INSN_LUID (setter) > uid_limit))
1107 : 22970062 : continue;
1108 : :
1109 : : /* If SETTER is a call everything is clobbered. Note that calls
1110 : : to pure functions are never put on the list, so we need not
1111 : : worry about them. */
1112 : 15307686 : if (CALL_P (setter))
1113 : 3571344 : return true;
1114 : :
1115 : : /* SETTER must be an INSN of some kind that sets memory. Call
1116 : : note_stores to examine each hunk of memory that is modified. */
1117 : 13191978 : mci.mem = x;
1118 : 13191978 : mci.conflict = false;
1119 : 13191978 : note_stores (setter, mems_conflict_for_gcse_p, &mci);
1120 : 13191978 : if (mci.conflict)
1121 : : return true;
1122 : : }
1123 : : return false;
1124 : : }
1125 : :
1126 : : /* Return true if the operands of expression X are unchanged from
1127 : : the start of INSN's basic block up to but not including INSN. */
1128 : :
1129 : : static bool
1130 : 12832257 : oprs_anticipatable_p (const_rtx x, const rtx_insn *insn)
1131 : : {
1132 : 0 : return oprs_unchanged_p (x, insn, false);
1133 : : }
1134 : :
1135 : : /* Return true if the operands of expression X are unchanged from
1136 : : INSN to the end of INSN's basic block. */
1137 : :
1138 : : static bool
1139 : 12832366 : oprs_available_p (const_rtx x, const rtx_insn *insn)
1140 : : {
1141 : 0 : return oprs_unchanged_p (x, insn, true);
1142 : : }
1143 : :
1144 : : /* Hash expression X.
1145 : :
1146 : : MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1147 : : indicating if a volatile operand is found or if the expression contains
1148 : : something we don't want to insert in the table. HASH_TABLE_SIZE is
1149 : : the current size of the hash table to be probed. */
1150 : :
1151 : : static unsigned int
1152 : 12832366 : hash_expr (const_rtx x, machine_mode mode, int *do_not_record_p,
1153 : : int hash_table_size)
1154 : : {
1155 : 12832366 : unsigned int hash;
1156 : :
1157 : 12832366 : *do_not_record_p = 0;
1158 : :
1159 : 12832366 : hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1160 : 12832366 : return hash % hash_table_size;
1161 : : }
1162 : :
1163 : : /* Return true if exp1 is equivalent to exp2. */
1164 : :
1165 : : static bool
1166 : 146182556 : expr_equiv_p (const_rtx x, const_rtx y)
1167 : : {
1168 : 132664592 : return exp_equiv_p (x, y, 0, true);
1169 : : }
1170 : :
1171 : : /* Insert expression X in INSN in the hash TABLE.
1172 : : If it is already present, record it as the last occurrence in INSN's
1173 : : basic block.
1174 : :
1175 : : MODE is the mode of the value X is being stored into.
1176 : : It is only used if X is a CONST_INT.
1177 : :
1178 : : ANTIC_P is true if X is an anticipatable expression.
1179 : : AVAIL_P is true if X is an available expression.
1180 : :
1181 : : MAX_DISTANCE is the maximum distance in instructions this expression can
1182 : : be moved. */
1183 : :
1184 : : static void
1185 : 12832366 : insert_expr_in_table (rtx x, machine_mode mode, rtx_insn *insn,
1186 : : bool antic_p, bool avail_p, HOST_WIDE_INT max_distance,
1187 : : struct gcse_hash_table_d *table)
1188 : : {
1189 : 12832366 : bool found;
1190 : 12832366 : int do_not_record_p;
1191 : 12832366 : unsigned int hash;
1192 : 12832366 : struct gcse_expr *cur_expr, *last_expr = NULL;
1193 : 12832366 : struct gcse_occr *antic_occr, *avail_occr;
1194 : :
1195 : 12832366 : hash = hash_expr (x, mode, &do_not_record_p, table->size);
1196 : :
1197 : : /* Do not insert expression in table if it contains volatile operands,
1198 : : or if hash_expr determines the expression is something we don't want
1199 : : to or can't handle. */
1200 : 12832366 : if (do_not_record_p)
1201 : 303429 : return;
1202 : :
1203 : 12528937 : cur_expr = table->table[hash];
1204 : 12528937 : found = false;
1205 : :
1206 : 16557722 : while (cur_expr && (found = expr_equiv_p (cur_expr->expr, x)) == 0)
1207 : : {
1208 : : /* If the expression isn't found, save a pointer to the end of
1209 : : the list. */
1210 : 4028785 : last_expr = cur_expr;
1211 : 4028785 : cur_expr = cur_expr->next_same_hash;
1212 : : }
1213 : :
1214 : 12528937 : if (! found)
1215 : : {
1216 : 9850545 : cur_expr = GOBNEW (struct gcse_expr);
1217 : 9850545 : bytes_used += sizeof (struct gcse_expr);
1218 : 9850545 : if (table->table[hash] == NULL)
1219 : : /* This is the first pattern that hashed to this index. */
1220 : 7282786 : table->table[hash] = cur_expr;
1221 : : else
1222 : : /* Add EXPR to end of this hash chain. */
1223 : 2567759 : last_expr->next_same_hash = cur_expr;
1224 : :
1225 : : /* Set the fields of the expr element. */
1226 : 9850545 : cur_expr->expr = x;
1227 : 9850545 : cur_expr->bitmap_index = table->n_elems++;
1228 : 9850545 : cur_expr->next_same_hash = NULL;
1229 : 9850545 : cur_expr->antic_occr = NULL;
1230 : 9850545 : cur_expr->avail_occr = NULL;
1231 : 9850545 : gcc_assert (max_distance >= 0);
1232 : 9850545 : cur_expr->max_distance = max_distance;
1233 : : }
1234 : : else
1235 : 2678392 : gcc_assert (cur_expr->max_distance == max_distance);
1236 : :
1237 : : /* Now record the occurrence(s). */
1238 : 12528937 : if (antic_p)
1239 : : {
1240 : 7085905 : antic_occr = cur_expr->antic_occr;
1241 : :
1242 : 7085905 : if (antic_occr
1243 : 7085905 : && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1244 : : antic_occr = NULL;
1245 : :
1246 : 5079089 : if (antic_occr)
1247 : : /* Found another instance of the expression in the same basic block.
1248 : : Prefer the currently recorded one. We want the first one in the
1249 : : block and the block is scanned from start to end. */
1250 : : ; /* nothing to do */
1251 : : else
1252 : : {
1253 : : /* First occurrence of this expression in this basic block. */
1254 : 7075805 : antic_occr = GOBNEW (struct gcse_occr);
1255 : 7075805 : bytes_used += sizeof (struct gcse_occr);
1256 : 7075805 : antic_occr->insn = insn;
1257 : 7075805 : antic_occr->next = cur_expr->antic_occr;
1258 : 7075805 : antic_occr->deleted_p = 0;
1259 : 7075805 : cur_expr->antic_occr = antic_occr;
1260 : : }
1261 : : }
1262 : :
1263 : 12528937 : if (avail_p)
1264 : : {
1265 : 9009185 : avail_occr = cur_expr->avail_occr;
1266 : :
1267 : 9009185 : if (avail_occr
1268 : 9009185 : && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1269 : : {
1270 : : /* Found another instance of the expression in the same basic block.
1271 : : Prefer this occurrence to the currently recorded one. We want
1272 : : the last one in the block and the block is scanned from start
1273 : : to end. */
1274 : 11294 : avail_occr->insn = insn;
1275 : : }
1276 : : else
1277 : : {
1278 : : /* First occurrence of this expression in this basic block. */
1279 : 8997891 : avail_occr = GOBNEW (struct gcse_occr);
1280 : 8997891 : bytes_used += sizeof (struct gcse_occr);
1281 : 8997891 : avail_occr->insn = insn;
1282 : 8997891 : avail_occr->next = cur_expr->avail_occr;
1283 : 8997891 : avail_occr->deleted_p = 0;
1284 : 8997891 : cur_expr->avail_occr = avail_occr;
1285 : : }
1286 : : }
1287 : : }
1288 : :
1289 : : /* Scan SET present in INSN and add an entry to the hash TABLE. */
1290 : :
1291 : : static void
1292 : 47345821 : hash_scan_set (rtx set, rtx_insn *insn, struct gcse_hash_table_d *table)
1293 : : {
1294 : 47345821 : rtx src = SET_SRC (set);
1295 : 47345821 : rtx dest = SET_DEST (set);
1296 : 47345821 : rtx note;
1297 : :
1298 : 47345821 : if (GET_CODE (src) == CALL)
1299 : : hash_scan_call (src, insn, table);
1300 : :
1301 : 45724687 : else if (REG_P (dest))
1302 : : {
1303 : 34016318 : unsigned int regno = REGNO (dest);
1304 : 34016318 : HOST_WIDE_INT max_distance = 0;
1305 : :
1306 : : /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1307 : :
1308 : : This allows us to do a single GCSE pass and still eliminate
1309 : : redundant constants, addresses or other expressions that are
1310 : : constructed with multiple instructions.
1311 : :
1312 : : However, keep the original SRC if INSN is a simple reg-reg move.
1313 : : In this case, there will almost always be a REG_EQUAL note on the
1314 : : insn that sets SRC. By recording the REG_EQUAL value here as SRC
1315 : : for INSN, we miss copy propagation opportunities and we perform the
1316 : : same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1317 : : do more than one PRE GCSE pass.
1318 : :
1319 : : Note that this does not impede profitable constant propagations. We
1320 : : "look through" reg-reg sets in lookup_avail_set. */
1321 : 34016318 : note = find_reg_equal_equiv_note (insn);
1322 : 34016318 : if (note != 0
1323 : 2884323 : && REG_NOTE_KIND (note) == REG_EQUAL
1324 : 2740311 : && !REG_P (src)
1325 : 35564248 : && want_to_gcse_p (XEXP (note, 0), GET_MODE (dest), NULL))
1326 : 30992 : src = XEXP (note, 0), set = gen_rtx_SET (dest, src);
1327 : :
1328 : : /* Only record sets of pseudo-regs in the hash table, unless we're
1329 : : currently doing hardreg switching. */
1330 : 34016318 : if ((doing_hardreg_pre_p ? regno == current_hardreg_regno
1331 : : : regno >= FIRST_PSEUDO_REGISTER)
1332 : : /* Don't GCSE something if we can't do a reg/reg copy. */
1333 : 20475057 : && can_copy_p (GET_MODE (dest))
1334 : : /* GCSE commonly inserts instruction after the insn. We can't
1335 : : do that easily for EH edges so disable GCSE on these for now. */
1336 : : /* ??? We can now easily create new EH landing pads at the
1337 : : gimple level, for splitting edges; there's no reason we
1338 : : can't do the same thing at the rtl level. */
1339 : 20475057 : && !can_throw_internal (insn)
1340 : : /* Is SET_SRC something we want to gcse? */
1341 : 20391968 : && want_to_gcse_p (src, GET_MODE (dest), &max_distance)
1342 : : /* Don't CSE a nop. */
1343 : 12969530 : && ! set_noop_p (set)
1344 : : /* Don't GCSE if it has attached REG_EQUIV note.
1345 : : At this point this only function parameters should have
1346 : : REG_EQUIV notes and if the argument slot is used somewhere
1347 : : explicitly, it means address of parameter has been taken,
1348 : : so we should not extend the lifetime of the pseudo. */
1349 : 46985848 : && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1350 : : {
1351 : : /* An expression is not anticipatable if its operands are
1352 : : modified before this insn or if this is not the only SET in
1353 : : this insn. The latter condition does not have to mean that
1354 : : SRC itself is not anticipatable, but we just will not be
1355 : : able to handle code motion of insns with multiple sets. */
1356 : 12832257 : bool antic_p = (oprs_anticipatable_p (src, insn)
1357 : 12832257 : && !multiple_sets (insn));
1358 : 12832257 : if (doing_hardreg_pre_p)
1359 : : {
1360 : : /* An hardreg assignment is anticipatable only if the hardreg is
1361 : : neither set nor used prior to this assignment. */
1362 : 0 : auto info = reg_avail_info[current_hardreg_regno];
1363 : 0 : if ((info.last_bb == current_bb
1364 : 0 : && info.first_set < DF_INSN_LUID (insn))
1365 : 0 : || bitmap_bit_p (DF_LR_IN (current_bb),
1366 : : current_hardreg_regno))
1367 : : antic_p = false;
1368 : : }
1369 : :
1370 : : /* An expression is not available if its operands are
1371 : : subsequently modified, including this insn. It's also not
1372 : : available if this is a branch, because we can't insert
1373 : : a set after the branch. */
1374 : 12832257 : bool avail_p = (oprs_available_p (src, insn)
1375 : 12832257 : && ! JUMP_P (insn));
1376 : 12832257 : if (doing_hardreg_pre_p)
1377 : : {
1378 : : /* An hardreg assignment is only available if the hardreg is
1379 : : not set later in the BB. Uses of the hardreg are allowed. */
1380 : 0 : auto info = reg_avail_info[current_hardreg_regno];
1381 : 0 : if (info.last_bb == current_bb
1382 : 0 : && info.last_set > DF_INSN_LUID (insn))
1383 : : avail_p = false;
1384 : : }
1385 : :
1386 : 12832257 : insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1387 : : max_distance, table);
1388 : : }
1389 : : }
1390 : : /* In case of store we want to consider the memory value as available in
1391 : : the REG stored in that memory. This makes it possible to remove
1392 : : redundant loads from due to stores to the same location. */
1393 : 11708369 : else if (flag_gcse_las
1394 : 129 : && !doing_hardreg_pre_p
1395 : 129 : && REG_P (src)
1396 : 109 : && MEM_P (dest))
1397 : : {
1398 : 109 : unsigned int regno = REGNO (src);
1399 : 109 : HOST_WIDE_INT max_distance = 0;
1400 : :
1401 : : /* Only record sets of pseudo-regs in the hash table. */
1402 : 109 : if (regno >= FIRST_PSEUDO_REGISTER
1403 : : /* Don't GCSE something if we can't do a reg/reg copy. */
1404 : 109 : && can_copy_p (GET_MODE (src))
1405 : : /* GCSE commonly inserts instruction after the insn. We can't
1406 : : do that easily for EH edges so disable GCSE on these for now. */
1407 : 109 : && !can_throw_internal (insn)
1408 : : /* Is SET_DEST something we want to gcse? */
1409 : 109 : && want_to_gcse_p (dest, GET_MODE (dest), &max_distance)
1410 : : /* Don't CSE a nop. */
1411 : 109 : && ! set_noop_p (set)
1412 : : /* Don't GCSE if it has attached REG_EQUIV note.
1413 : : At this point this only function parameters should have
1414 : : REG_EQUIV notes and if the argument slot is used somewhere
1415 : : explicitly, it means address of parameter has been taken,
1416 : : so we should not extend the lifetime of the pseudo. */
1417 : 218 : && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1418 : 0 : || ! MEM_P (XEXP (note, 0))))
1419 : : {
1420 : : /* Stores are never anticipatable. */
1421 : 109 : bool antic_p = 0;
1422 : : /* An expression is not available if its operands are
1423 : : subsequently modified, including this insn. It's also not
1424 : : available if this is a branch, because we can't insert
1425 : : a set after the branch. */
1426 : 109 : bool avail_p = oprs_available_p (dest, insn) && ! JUMP_P (insn);
1427 : :
1428 : : /* Record the memory expression (DEST) in the hash table. */
1429 : 109 : insert_expr_in_table (dest, GET_MODE (dest), insn,
1430 : : antic_p, avail_p, max_distance, table);
1431 : : }
1432 : : }
1433 : 47345821 : }
1434 : :
1435 : : static void
1436 : 0 : hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
1437 : : struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
1438 : : {
1439 : : /* Currently nothing to do. */
1440 : 0 : }
1441 : :
1442 : : static void
1443 : 0 : hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
1444 : : struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
1445 : : {
1446 : : /* Currently nothing to do. */
1447 : 0 : }
1448 : :
1449 : : /* Process INSN and add hash table entries as appropriate. */
1450 : :
1451 : : static void
1452 : 49644805 : hash_scan_insn (rtx_insn *insn, struct gcse_hash_table_d *table)
1453 : : {
1454 : 49644805 : rtx pat = PATTERN (insn);
1455 : 49644805 : int i;
1456 : :
1457 : : /* Pick out the sets of INSN and for other forms of instructions record
1458 : : what's been modified. */
1459 : :
1460 : 49644805 : if (GET_CODE (pat) == SET)
1461 : 38936955 : hash_scan_set (pat, insn, table);
1462 : :
1463 : 10707850 : else if (GET_CODE (pat) == CLOBBER)
1464 : : hash_scan_clobber (pat, insn, table);
1465 : :
1466 : 10689878 : else if (GET_CODE (pat) == CALL)
1467 : : hash_scan_call (pat, insn, table);
1468 : :
1469 : 8579429 : else if (GET_CODE (pat) == PARALLEL)
1470 : 24840987 : for (i = 0; i < XVECLEN (pat, 0); i++)
1471 : : {
1472 : 16676359 : rtx x = XVECEXP (pat, 0, i);
1473 : :
1474 : 16676359 : if (GET_CODE (x) == SET)
1475 : 8408866 : hash_scan_set (x, insn, table);
1476 : : else if (GET_CODE (x) == CLOBBER)
1477 : : hash_scan_clobber (x, insn, table);
1478 : : else if (GET_CODE (x) == CALL)
1479 : : hash_scan_call (x, insn, table);
1480 : : }
1481 : 49644805 : }
1482 : :
1483 : : /* Dump the hash table TABLE to file FILE under the name NAME. */
1484 : :
1485 : : static void
1486 : 24 : dump_hash_table (FILE *file, const char *name, struct gcse_hash_table_d *table)
1487 : : {
1488 : 24 : int i;
1489 : : /* Flattened out table, so it's printed in proper order. */
1490 : 24 : struct gcse_expr **flat_table;
1491 : 24 : unsigned int *hash_val;
1492 : 24 : struct gcse_expr *expr;
1493 : :
1494 : 24 : flat_table = XCNEWVEC (struct gcse_expr *, table->n_elems);
1495 : 24 : hash_val = XNEWVEC (unsigned int, table->n_elems);
1496 : :
1497 : 424 : for (i = 0; i < (int) table->size; i++)
1498 : 632 : for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1499 : : {
1500 : 256 : flat_table[expr->bitmap_index] = expr;
1501 : 256 : hash_val[expr->bitmap_index] = i;
1502 : : }
1503 : :
1504 : 24 : fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1505 : : name, table->size, table->n_elems);
1506 : :
1507 : 304 : for (i = 0; i < (int) table->n_elems; i++)
1508 : 256 : if (flat_table[i] != 0)
1509 : : {
1510 : 256 : expr = flat_table[i];
1511 : 256 : fprintf (file, "Index %d (hash value %d; max distance "
1512 : : HOST_WIDE_INT_PRINT_DEC ")\n ",
1513 : 256 : expr->bitmap_index, hash_val[i], expr->max_distance);
1514 : 256 : print_rtl (file, expr->expr);
1515 : 256 : fprintf (file, "\n");
1516 : : }
1517 : :
1518 : 24 : fprintf (file, "\n");
1519 : :
1520 : 24 : free (flat_table);
1521 : 24 : free (hash_val);
1522 : 24 : }
1523 : :
1524 : : /* Record register first/last/block set information for REGNO in INSN.
1525 : :
1526 : : first_set records the first place in the block where the register
1527 : : is set and is used to compute "anticipatability".
1528 : :
1529 : : last_set records the last place in the block where the register
1530 : : is set and is used to compute "availability".
1531 : :
1532 : : last_bb records the block for which first_set and last_set are
1533 : : valid, as a quick test to invalidate them. */
1534 : :
1535 : : static void
1536 : 357627088 : record_last_reg_set_info (rtx_insn *insn, int regno)
1537 : : {
1538 : 357627088 : struct reg_avail_info *info = ®_avail_info[regno];
1539 : 357627088 : int luid = DF_INSN_LUID (insn);
1540 : :
1541 : 357627088 : info->last_set = luid;
1542 : 357627088 : if (info->last_bb != current_bb)
1543 : : {
1544 : 268618344 : info->last_bb = current_bb;
1545 : 268618344 : info->first_set = luid;
1546 : : }
1547 : 357627088 : }
1548 : :
1549 : : /* Record memory modification information for INSN. We do not actually care
1550 : : about the memory location(s) that are set, or even how they are set (consider
1551 : : a CALL_INSN). We merely need to record which insns modify memory. */
1552 : :
1553 : : static void
1554 : 9017056 : record_last_mem_set_info (rtx_insn *insn)
1555 : : {
1556 : 9017056 : if (! do_load_motion ())
1557 : : return;
1558 : :
1559 : 9017042 : record_last_mem_set_info_common (insn, modify_mem_list,
1560 : : canon_modify_mem_list,
1561 : : modify_mem_list_set,
1562 : : blocks_with_calls);
1563 : : }
1564 : :
1565 : : /* Called from compute_hash_table via note_stores to handle one
1566 : : SET or CLOBBER in an insn. DATA is really the instruction in which
1567 : : the SET is taking place. */
1568 : :
1569 : : static void
1570 : 55474602 : record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1571 : : {
1572 : 55474602 : rtx_insn *last_set_insn = (rtx_insn *) data;
1573 : :
1574 : 55474602 : if (GET_CODE (dest) == SUBREG)
1575 : 0 : dest = SUBREG_REG (dest);
1576 : :
1577 : 55474602 : if (REG_P (dest))
1578 : 43945396 : record_last_reg_set_info (last_set_insn, REGNO (dest));
1579 : 11529206 : else if (MEM_P (dest)
1580 : : /* Ignore pushes, they clobber nothing. */
1581 : 11529206 : && ! push_operand (dest, GET_MODE (dest)))
1582 : 5537823 : record_last_mem_set_info (last_set_insn);
1583 : 55474602 : }
1584 : :
1585 : : /* Top level function to create an expression hash table.
1586 : :
1587 : : Expression entries are placed in the hash table if
1588 : : - they are of the form (set (pseudo-reg) src),
1589 : : - src is something we want to perform GCSE on,
1590 : : - none of the operands are subsequently modified in the block
1591 : :
1592 : : Currently src must be a pseudo-reg or a const_int.
1593 : :
1594 : : TABLE is the table computed. */
1595 : :
1596 : : static void
1597 : 503566 : compute_hash_table_work (struct gcse_hash_table_d *table)
1598 : : {
1599 : 503566 : int i;
1600 : :
1601 : : /* re-Cache any INSN_LIST nodes we have allocated. */
1602 : 503566 : clear_modify_mem_tables ();
1603 : : /* Some working arrays used to track first and last set in each block. */
1604 : 503566 : reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1605 : :
1606 : 81332142 : for (i = 0; i < max_reg_num (); ++i)
1607 : 80828576 : reg_avail_info[i].last_bb = NULL;
1608 : :
1609 : 10240166 : FOR_EACH_BB_FN (current_bb, cfun)
1610 : : {
1611 : 9736600 : rtx_insn *insn;
1612 : 9736600 : unsigned int regno;
1613 : :
1614 : : /* First pass over the instructions records information used to
1615 : : determine when registers and memory are first and last set. */
1616 : 120144533 : FOR_BB_INSNS (current_bb, insn)
1617 : : {
1618 : 110407933 : if (!NONDEBUG_INSN_P (insn))
1619 : 60763128 : continue;
1620 : :
1621 : 49644805 : if (CALL_P (insn))
1622 : : {
1623 : 3851568 : hard_reg_set_iterator hrsi;
1624 : :
1625 : : /* We don't track modes of hard registers, so we need
1626 : : to be conservative and assume that partial kills
1627 : : are full kills. */
1628 : 3851568 : HARD_REG_SET callee_clobbers
1629 : 3851568 : = insn_callee_abi (insn).full_and_partial_reg_clobbers ();
1630 : 317533260 : EXECUTE_IF_SET_IN_HARD_REG_SET (callee_clobbers, 0, regno, hrsi)
1631 : 313681692 : record_last_reg_set_info (insn, regno);
1632 : :
1633 : 7563768 : if (! RTL_CONST_OR_PURE_CALL_P (insn)
1634 : 397309 : || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
1635 : 4225375 : || can_throw_external (insn))
1636 : 3479233 : record_last_mem_set_info (insn);
1637 : : }
1638 : :
1639 : 49644805 : note_stores (insn, record_last_set_info, insn);
1640 : : }
1641 : :
1642 : : /* The next pass builds the hash table. */
1643 : 120144533 : FOR_BB_INSNS (current_bb, insn)
1644 : 110407933 : if (NONDEBUG_INSN_P (insn))
1645 : 49644805 : hash_scan_insn (insn, table);
1646 : : }
1647 : :
1648 : 503566 : free (reg_avail_info);
1649 : 503566 : reg_avail_info = NULL;
1650 : 503566 : }
1651 : :
1652 : : /* Allocate space for the set/expr hash TABLE.
1653 : : It is used to determine the number of buckets to use. */
1654 : :
1655 : : static void
1656 : 503566 : alloc_hash_table (struct gcse_hash_table_d *table)
1657 : : {
1658 : 503566 : int n;
1659 : :
1660 : 503566 : n = get_max_insn_count ();
1661 : :
1662 : 503566 : table->size = n / 4;
1663 : 503566 : if (table->size < 11)
1664 : 192431 : table->size = 11;
1665 : :
1666 : : /* Attempt to maintain efficient use of hash table.
1667 : : Making it an odd number is simplest for now.
1668 : : ??? Later take some measurements. */
1669 : 503566 : table->size |= 1;
1670 : 503566 : n = table->size * sizeof (struct gcse_expr *);
1671 : 503566 : table->table = GNEWVAR (struct gcse_expr *, n);
1672 : 503566 : }
1673 : :
1674 : : /* Free things allocated by alloc_hash_table. */
1675 : :
1676 : : static void
1677 : 503566 : free_hash_table (struct gcse_hash_table_d *table)
1678 : : {
1679 : 503566 : free (table->table);
1680 : 0 : }
1681 : :
1682 : : /* Compute the expression hash table TABLE. */
1683 : :
1684 : : static void
1685 : 503566 : compute_hash_table (struct gcse_hash_table_d *table)
1686 : : {
1687 : : /* Initialize count of number of entries in hash table. */
1688 : 503566 : table->n_elems = 0;
1689 : 503566 : memset (table->table, 0, table->size * sizeof (struct gcse_expr *));
1690 : :
1691 : 503566 : compute_hash_table_work (table);
1692 : 503566 : }
1693 : :
1694 : : /* Expression tracking support. */
1695 : :
1696 : : /* Clear canon_modify_mem_list and modify_mem_list tables. */
1697 : : static void
1698 : 1007132 : clear_modify_mem_tables (void)
1699 : : {
1700 : 1007132 : unsigned i;
1701 : 1007132 : bitmap_iterator bi;
1702 : :
1703 : 4966120 : EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1704 : : {
1705 : 3958988 : modify_mem_list[i].release ();
1706 : 3958988 : canon_modify_mem_list[i].release ();
1707 : : }
1708 : 1007132 : bitmap_clear (modify_mem_list_set);
1709 : 1007132 : bitmap_clear (blocks_with_calls);
1710 : 1007132 : }
1711 : :
1712 : : /* Release memory used by modify_mem_list_set. */
1713 : :
1714 : : static void
1715 : 503566 : free_modify_mem_tables (void)
1716 : : {
1717 : 503566 : clear_modify_mem_tables ();
1718 : 503566 : free (modify_mem_list);
1719 : 503566 : free (canon_modify_mem_list);
1720 : 503566 : modify_mem_list = 0;
1721 : 503566 : canon_modify_mem_list = 0;
1722 : 503566 : }
1723 : :
1724 : : /* Compute PRE+LCM working variables. */
1725 : :
1726 : : /* Local properties of expressions. */
1727 : :
1728 : : /* Nonzero for expressions that are transparent in the block. */
1729 : : static sbitmap *transp;
1730 : :
1731 : : /* Nonzero for expressions that are computed (available) in the block. */
1732 : : static sbitmap *comp;
1733 : :
1734 : : /* Nonzero for expressions that are locally anticipatable in the block. */
1735 : : static sbitmap *antloc;
1736 : :
1737 : : /* Nonzero for expressions where this block is an optimal computation
1738 : : point. */
1739 : : static sbitmap *pre_optimal;
1740 : :
1741 : : /* Nonzero for expressions which are redundant in a particular block. */
1742 : : static sbitmap *pre_redundant;
1743 : :
1744 : : /* Nonzero for expressions which should be inserted on a specific edge. */
1745 : : static sbitmap *pre_insert_map;
1746 : :
1747 : : /* Nonzero for expressions which should be deleted in a specific block. */
1748 : : static sbitmap *pre_delete_map;
1749 : :
1750 : : /* Allocate vars used for PRE analysis. */
1751 : :
1752 : : static void
1753 : 415587 : alloc_pre_mem (int n_blocks, int n_exprs)
1754 : : {
1755 : 415587 : transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1756 : 415587 : comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1757 : 415587 : antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1758 : :
1759 : 415587 : pre_optimal = NULL;
1760 : 415587 : pre_redundant = NULL;
1761 : 415587 : pre_insert_map = NULL;
1762 : 415587 : pre_delete_map = NULL;
1763 : 415587 : ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1764 : :
1765 : : /* pre_insert and pre_delete are allocated later. */
1766 : 415587 : }
1767 : :
1768 : : /* Free vars used for PRE analysis. */
1769 : :
1770 : : static void
1771 : 415587 : free_pre_mem (void)
1772 : : {
1773 : 415587 : sbitmap_vector_free (transp);
1774 : 415587 : sbitmap_vector_free (comp);
1775 : :
1776 : : /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
1777 : :
1778 : 415587 : if (pre_optimal)
1779 : 0 : sbitmap_vector_free (pre_optimal);
1780 : 415587 : if (pre_redundant)
1781 : 0 : sbitmap_vector_free (pre_redundant);
1782 : 415587 : if (pre_insert_map)
1783 : 415587 : sbitmap_vector_free (pre_insert_map);
1784 : 415587 : if (pre_delete_map)
1785 : 415587 : sbitmap_vector_free (pre_delete_map);
1786 : :
1787 : 415587 : transp = comp = NULL;
1788 : 415587 : pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1789 : 415587 : }
1790 : :
1791 : : /* Remove certain expressions from anticipatable and transparent
1792 : : sets of basic blocks that have incoming abnormal edge.
1793 : : For PRE remove potentially trapping expressions to avoid placing
1794 : : them on abnormal edges. For hoisting remove memory references that
1795 : : can be clobbered by calls. */
1796 : :
1797 : : static void
1798 : 440774 : prune_expressions (bool pre_p)
1799 : : {
1800 : 440774 : struct gcse_expr *expr;
1801 : 440774 : unsigned int ui;
1802 : 440774 : basic_block bb;
1803 : :
1804 : 440774 : auto_sbitmap prune_exprs (expr_hash_table.n_elems);
1805 : 440774 : bitmap_clear (prune_exprs);
1806 : 21266016 : for (ui = 0; ui < expr_hash_table.size; ui++)
1807 : : {
1808 : 30675787 : for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1809 : : {
1810 : : /* Note potentially trapping expressions. */
1811 : 9850545 : if (may_trap_p (expr->expr))
1812 : : {
1813 : 2725430 : bitmap_set_bit (prune_exprs, expr->bitmap_index);
1814 : 2725430 : continue;
1815 : : }
1816 : :
1817 : 7125115 : if (!pre_p && contains_mem_rtx_p (expr->expr))
1818 : : /* Note memory references that can be clobbered by a call.
1819 : : We do not split abnormal edges in hoisting, so would
1820 : : a memory reference get hoisted along an abnormal edge,
1821 : : it would be placed /before/ the call. Therefore, only
1822 : : constant memory references can be hoisted along abnormal
1823 : : edges. */
1824 : : {
1825 : 36674 : rtx x = expr->expr;
1826 : :
1827 : : /* Common cases where we might find the MEM which may allow us
1828 : : to avoid pruning the expression. */
1829 : 37025 : while (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
1830 : 351 : x = XEXP (x, 0);
1831 : :
1832 : : /* If we found the MEM, go ahead and look at it to see if it has
1833 : : properties that allow us to avoid pruning its expression out
1834 : : of the tables. */
1835 : 36674 : if (MEM_P (x))
1836 : : {
1837 : 37348 : if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1838 : 36614 : && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)))
1839 : 734 : continue;
1840 : :
1841 : 37450 : if (MEM_READONLY_P (x)
1842 : 1570 : && !MEM_VOLATILE_P (x)
1843 : 37450 : && MEM_NOTRAP_P (x))
1844 : : /* Constant memory reference, e.g., a PIC address. */
1845 : 1570 : continue;
1846 : : }
1847 : :
1848 : : /* ??? Optimally, we would use interprocedural alias
1849 : : analysis to determine if this mem is actually killed
1850 : : by this call. */
1851 : :
1852 : 34370 : bitmap_set_bit (prune_exprs, expr->bitmap_index);
1853 : : }
1854 : : }
1855 : : }
1856 : :
1857 : 9865322 : FOR_EACH_BB_FN (bb, cfun)
1858 : : {
1859 : 9424548 : edge e;
1860 : 9424548 : edge_iterator ei;
1861 : :
1862 : : /* If the current block is the destination of an abnormal edge, we
1863 : : kill all trapping (for PRE) and memory (for hoist) expressions
1864 : : because we won't be able to properly place the instruction on
1865 : : the edge. So make them neither anticipatable nor transparent.
1866 : : This is fairly conservative.
1867 : :
1868 : : ??? For hoisting it may be necessary to check for set-and-jump
1869 : : instructions here, not just for abnormal edges. The general problem
1870 : : is that when an expression cannot not be placed right at the end of
1871 : : a basic block we should account for any side-effects of a subsequent
1872 : : jump instructions that could clobber the expression. It would
1873 : : be best to implement this check along the lines of
1874 : : should_hoist_expr_to_dom where the target block is already known
1875 : : and, hence, there's no need to conservatively prune expressions on
1876 : : "intermediate" set-and-jump instructions. */
1877 : 22749265 : FOR_EACH_EDGE (e, ei, bb->preds)
1878 : 13515782 : if ((e->flags & EDGE_ABNORMAL)
1879 : 191155 : && (pre_p || CALL_P (BB_END (e->src))))
1880 : : {
1881 : 191065 : bitmap_and_compl (antloc[bb->index],
1882 : 191065 : antloc[bb->index], prune_exprs);
1883 : 191065 : bitmap_and_compl (transp[bb->index],
1884 : 191065 : transp[bb->index], prune_exprs);
1885 : 191065 : break;
1886 : : }
1887 : : }
1888 : 440774 : }
1889 : :
1890 : : /* It may be necessary to insert a large number of insns on edges to
1891 : : make the existing occurrences of expressions fully redundant. This
1892 : : routine examines the set of insertions and deletions and if the ratio
1893 : : of insertions to deletions is too high for a particular expression, then
1894 : : the expression is removed from the insertion/deletion sets.
1895 : :
1896 : : N_ELEMS is the number of elements in the hash table. */
1897 : :
1898 : : static void
1899 : 415587 : prune_insertions_deletions (int n_elems)
1900 : : {
1901 : 415587 : sbitmap_iterator sbi;
1902 : :
1903 : : /* We always use I to iterate over blocks/edges and J to iterate over
1904 : : expressions. */
1905 : 415587 : unsigned int i, j;
1906 : :
1907 : : /* Counts for the number of times an expression needs to be inserted and
1908 : : number of times an expression can be removed as a result. */
1909 : 415587 : int *insertions = GCNEWVEC (int, n_elems);
1910 : 415587 : int *deletions = GCNEWVEC (int, n_elems);
1911 : :
1912 : : /* Set of expressions which require too many insertions relative to
1913 : : the number of deletions achieved. We will prune these out of the
1914 : : insertion/deletion sets. */
1915 : 415587 : auto_sbitmap prune_exprs (n_elems);
1916 : 415587 : bitmap_clear (prune_exprs);
1917 : :
1918 : : /* Iterate over the edges counting the number of times each expression
1919 : : needs to be inserted. */
1920 : 15226575 : for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
1921 : : {
1922 : 29113998 : EXECUTE_IF_SET_IN_BITMAP (pre_insert_map[i], 0, j, sbi)
1923 : 323196 : insertions[j]++;
1924 : : }
1925 : :
1926 : : /* Similarly for deletions, but those occur in blocks rather than on
1927 : : edges. */
1928 : 11185878 : for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
1929 : : {
1930 : 22523893 : EXECUTE_IF_SET_IN_BITMAP (pre_delete_map[i], 0, j, sbi)
1931 : 983311 : deletions[j]++;
1932 : : }
1933 : :
1934 : : /* Now that we have accurate counts, iterate over the elements in the
1935 : : hash table and see if any need too many insertions relative to the
1936 : : number of evaluations that can be removed. If so, mark them in
1937 : : PRUNE_EXPRS. */
1938 : 9981872 : for (j = 0; j < (unsigned) n_elems; j++)
1939 : 9566285 : if (deletions[j]
1940 : 364676 : && (insertions[j] / deletions[j]) > param_max_gcse_insertion_ratio)
1941 : 168 : bitmap_set_bit (prune_exprs, j);
1942 : :
1943 : : /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
1944 : 831342 : EXECUTE_IF_SET_IN_BITMAP (prune_exprs, 0, j, sbi)
1945 : : {
1946 : 151268 : for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
1947 : 151100 : bitmap_clear_bit (pre_insert_map[i], j);
1948 : :
1949 : 99077 : for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
1950 : 98909 : bitmap_clear_bit (pre_delete_map[i], j);
1951 : : }
1952 : :
1953 : 415587 : free (insertions);
1954 : 415587 : free (deletions);
1955 : 415587 : }
1956 : :
1957 : : /* Top level routine to do the dataflow analysis needed by PRE. */
1958 : :
1959 : : static struct edge_list *
1960 : 415587 : compute_pre_data (void)
1961 : : {
1962 : 415587 : struct edge_list *edge_list;
1963 : 415587 : basic_block bb;
1964 : :
1965 : 415587 : compute_local_properties (transp, comp, antloc, &expr_hash_table);
1966 : 415587 : prune_expressions (true);
1967 : 415587 : bitmap_vector_clear (ae_kill, last_basic_block_for_fn (cfun));
1968 : :
1969 : : /* Compute ae_kill for each basic block using:
1970 : :
1971 : : ~(TRANSP | COMP)
1972 : : */
1973 : :
1974 : 9528278 : FOR_EACH_BB_FN (bb, cfun)
1975 : : {
1976 : 9112691 : bitmap_ior (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1977 : 9112691 : bitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1978 : : }
1979 : :
1980 : 415587 : if (doing_hardreg_pre_p)
1981 : 0 : prune_hardreg_uses (transp, &expr_hash_table);
1982 : :
1983 : 415587 : edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1984 : : ae_kill, &pre_insert_map, &pre_delete_map);
1985 : 415587 : sbitmap_vector_free (antloc);
1986 : 415587 : antloc = NULL;
1987 : 415587 : sbitmap_vector_free (ae_kill);
1988 : 415587 : ae_kill = NULL;
1989 : :
1990 : 415587 : prune_insertions_deletions (expr_hash_table.n_elems);
1991 : :
1992 : 415587 : return edge_list;
1993 : : }
1994 : :
1995 : : /* PRE utilities */
1996 : :
1997 : : /* Return true if an occurrence of expression EXPR in OCCR_BB would reach
1998 : : block BB.
1999 : :
2000 : : VISITED is a pointer to a working buffer for tracking which BB's have
2001 : : been visited. It is NULL for the top-level call.
2002 : :
2003 : : We treat reaching expressions that go through blocks containing the same
2004 : : reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2005 : : 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2006 : : 2 as not reaching. The intent is to improve the probability of finding
2007 : : only one reaching expression and to reduce register lifetimes by picking
2008 : : the closest such expression. */
2009 : :
2010 : : static bool
2011 : 20302088 : pre_expr_reaches_here_p_work (basic_block occr_bb, struct gcse_expr *expr,
2012 : : basic_block bb, char *visited)
2013 : : {
2014 : 20302088 : edge pred;
2015 : 20302088 : edge_iterator ei;
2016 : :
2017 : 43378187 : FOR_EACH_EDGE (pred, ei, bb->preds)
2018 : : {
2019 : 25714952 : basic_block pred_bb = pred->src;
2020 : :
2021 : 25714952 : if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
2022 : : /* Has predecessor has already been visited? */
2023 : 25378762 : || visited[pred_bb->index])
2024 : : ;/* Nothing to do. */
2025 : :
2026 : : /* Does this predecessor generate this expression? */
2027 : 21345862 : else if (bitmap_bit_p (comp[pred_bb->index], expr->bitmap_index))
2028 : : {
2029 : : /* Is this the occurrence we're looking for?
2030 : : Note that there's only one generating occurrence per block
2031 : : so we just need to check the block number. */
2032 : 2513142 : if (occr_bb == pred_bb)
2033 : : return true;
2034 : :
2035 : 2195400 : visited[pred_bb->index] = 1;
2036 : : }
2037 : : /* Ignore this predecessor if it kills the expression.
2038 : :
2039 : : If this were used for hardreg pre, then it would need to use the kills
2040 : : bitmap. */
2041 : 18832720 : else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
2042 : 193421 : visited[pred_bb->index] = 1;
2043 : :
2044 : : /* Neither gen nor kill. */
2045 : : else
2046 : : {
2047 : 18639299 : visited[pred_bb->index] = 1;
2048 : 18639299 : if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2049 : : return true;
2050 : : }
2051 : : }
2052 : :
2053 : : /* All paths have been checked. */
2054 : : return false;
2055 : : }
2056 : :
2057 : : /* The wrapper for pre_expr_reaches_here_work that ensures that any
2058 : : memory allocated for that function is returned. */
2059 : :
2060 : : static bool
2061 : 1662789 : pre_expr_reaches_here_p (basic_block occr_bb, struct gcse_expr *expr,
2062 : : basic_block bb)
2063 : : {
2064 : 1662789 : bool rval;
2065 : 1662789 : char *visited = XCNEWVEC (char, last_basic_block_for_fn (cfun));
2066 : :
2067 : 1662789 : rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2068 : :
2069 : 1662789 : free (visited);
2070 : 1662789 : return rval;
2071 : : }
2072 : :
2073 : : /* Generate RTL to copy an EXP to REG and return it. */
2074 : :
2075 : : rtx_insn *
2076 : 317710 : prepare_copy_insn (rtx reg, rtx exp)
2077 : : {
2078 : 317710 : rtx_insn *pat;
2079 : :
2080 : 317710 : start_sequence ();
2081 : :
2082 : : /* If the expression is something that's an operand, like a constant,
2083 : : just copy it to a register. */
2084 : 317710 : if (general_operand (exp, GET_MODE (reg)))
2085 : 106677 : emit_move_insn (reg, exp);
2086 : :
2087 : : /* Otherwise, make a new insn to compute this expression and make sure the
2088 : : insn will be recognized (this also adds any needed CLOBBERs). */
2089 : : else
2090 : : {
2091 : 211033 : rtx_insn *insn = emit_insn (gen_rtx_SET (reg, exp));
2092 : :
2093 : 211033 : if (insn_invalid_p (insn, false))
2094 : 0 : gcc_unreachable ();
2095 : : }
2096 : :
2097 : 317710 : pat = end_sequence ();
2098 : :
2099 : 317710 : return pat;
2100 : : }
2101 : :
2102 : : /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */
2103 : :
2104 : : static rtx_insn *
2105 : 317689 : process_insert_insn (struct gcse_expr *expr)
2106 : : {
2107 : 317689 : rtx reg = expr->reaching_reg;
2108 : : /* Copy the expression to make sure we don't have any sharing issues. */
2109 : 317689 : rtx exp = copy_rtx (expr->expr);
2110 : :
2111 : 317689 : return prepare_copy_insn (reg, exp);
2112 : : }
2113 : :
2114 : : /* Return the INSN which is added at the end of the block BB with
2115 : : same instruction pattern with PAT. */
2116 : :
2117 : : rtx_insn *
2118 : 66273 : insert_insn_end_basic_block (rtx_insn *pat, basic_block bb)
2119 : : {
2120 : 66273 : rtx_insn *insn = BB_END (bb);
2121 : 66273 : rtx_insn *new_insn;
2122 : 66273 : rtx_insn *pat_end;
2123 : :
2124 : 66273 : gcc_assert (pat && INSN_P (pat));
2125 : :
2126 : : pat_end = pat;
2127 : 66814 : while (NEXT_INSN (pat_end) != NULL_RTX)
2128 : : pat_end = NEXT_INSN (pat_end);
2129 : :
2130 : : /* If the last insn is a jump, insert EXPR in front. Similarly we need to
2131 : : take care of trapping instructions in presence of non-call exceptions. */
2132 : :
2133 : 66273 : if (JUMP_P (insn)
2134 : 66273 : || (NONJUMP_INSN_P (insn)
2135 : 18310 : && (!single_succ_p (bb)
2136 : 3344 : || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2137 : : {
2138 : : /* FIXME: What if something in jump uses value set in new insn? */
2139 : 23948 : new_insn = emit_insn_before_noloc (pat, insn, bb);
2140 : : }
2141 : :
2142 : : /* Likewise if the last insn is a call, as will happen in the presence
2143 : : of exception handling. */
2144 : 42325 : else if (CALL_P (insn)
2145 : 81300 : && (!single_succ_p (bb)
2146 : 4040 : || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2147 : : {
2148 : : /* Keeping in mind targets with small register classes and parameters
2149 : : in registers, we search backward and place the instructions before
2150 : : the first parameter is loaded. Do this for everyone for consistency
2151 : : and a presumption that we'll get better code elsewhere as well. */
2152 : :
2153 : : /* Since different machines initialize their parameter registers
2154 : : in different orders, assume nothing. Collect the set of all
2155 : : parameter registers. */
2156 : 38900 : insn = find_first_parameter_load (insn, BB_HEAD (bb));
2157 : :
2158 : : /* If we found all the parameter loads, then we want to insert
2159 : : before the first parameter load.
2160 : :
2161 : : If we did not find all the parameter loads, then we might have
2162 : : stopped on the head of the block, which could be a CODE_LABEL.
2163 : : If we inserted before the CODE_LABEL, then we would be putting
2164 : : the insn in the wrong basic block. In that case, put the insn
2165 : : after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
2166 : 38900 : while (LABEL_P (insn)
2167 : 38900 : || NOTE_INSN_BASIC_BLOCK_P (insn))
2168 : 0 : insn = NEXT_INSN (insn);
2169 : :
2170 : 38900 : new_insn = emit_insn_before_noloc (pat, insn, bb);
2171 : : }
2172 : : else
2173 : 3425 : new_insn = emit_insn_after_noloc (pat, insn, bb);
2174 : :
2175 : 541 : while (1)
2176 : : {
2177 : 66814 : if (INSN_P (pat))
2178 : 66814 : add_label_notes (PATTERN (pat), new_insn);
2179 : 66814 : if (pat == pat_end)
2180 : : break;
2181 : 541 : pat = NEXT_INSN (pat);
2182 : : }
2183 : 66273 : return new_insn;
2184 : : }
2185 : :
2186 : : /* Add EXPR to the end of basic block BB.
2187 : :
2188 : : This is used by both the PRE and code hoisting. */
2189 : :
2190 : : static void
2191 : 66273 : insert_insn_end_basic_block (struct gcse_expr *expr, basic_block bb)
2192 : : {
2193 : 66273 : rtx reg = expr->reaching_reg;
2194 : 66273 : int regno = REGNO (reg);
2195 : :
2196 : 66273 : rtx_insn *insn = process_insert_insn (expr);
2197 : 66273 : rtx_insn *new_insn = insert_insn_end_basic_block (insn, bb);
2198 : :
2199 : 66273 : gcse_create_count++;
2200 : :
2201 : 66273 : if (dump_file)
2202 : : {
2203 : 3 : fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2204 : 3 : bb->index, INSN_UID (new_insn));
2205 : 3 : fprintf (dump_file, "copying expression %d to reg %d\n",
2206 : : expr->bitmap_index, regno);
2207 : : }
2208 : 66273 : }
2209 : :
2210 : : /* Return the INSN which is added at the start of the block BB with
2211 : : same instruction pattern with PAT. */
2212 : :
2213 : : rtx_insn *
2214 : 0 : insert_insn_start_basic_block (rtx_insn *pat, basic_block bb)
2215 : : {
2216 : 0 : rtx_insn *insn = BB_HEAD (bb);
2217 : 0 : rtx_insn *next_insn;
2218 : :
2219 : 0 : gcc_assert (pat && INSN_P (pat));
2220 : :
2221 : : /* Insert after the last initial CODE_LABEL or NOTE_INSN_BASIC_BLOCK, before
2222 : : any other instructions. */
2223 : 0 : while ((next_insn = NEXT_INSN (insn))
2224 : 0 : && (LABEL_P (next_insn) || NOTE_INSN_BASIC_BLOCK_P (insn)))
2225 : : insn = next_insn;
2226 : :
2227 : 0 : rtx_insn *new_insn = emit_insn_after_noloc (pat, insn, bb);
2228 : :
2229 : 0 : while (pat != NULL_RTX)
2230 : : {
2231 : 0 : if (INSN_P (pat))
2232 : 0 : add_label_notes (PATTERN (pat), new_insn);
2233 : 0 : pat = NEXT_INSN (pat);
2234 : : }
2235 : :
2236 : 0 : return new_insn;
2237 : : }
2238 : :
2239 : : /* Add EXPR to the start of basic block BB.
2240 : :
2241 : : This is used by hardreg PRE. */
2242 : :
2243 : : static void
2244 : 0 : insert_insn_start_basic_block (struct gcse_expr *expr, basic_block bb)
2245 : : {
2246 : 0 : rtx reg = expr->reaching_reg;
2247 : 0 : int regno = REGNO (reg);
2248 : :
2249 : 0 : rtx_insn *insn = process_insert_insn (expr);
2250 : 0 : rtx_insn *new_insn = insert_insn_start_basic_block (insn, bb);
2251 : :
2252 : 0 : gcse_create_count++;
2253 : :
2254 : 0 : if (dump_file)
2255 : : {
2256 : 0 : fprintf (dump_file, "hardreg PRE: start of bb %d, insn %d, ",
2257 : 0 : bb->index, INSN_UID (new_insn));
2258 : 0 : fprintf (dump_file, "copying expression %d to reg %d\n",
2259 : : expr->bitmap_index, regno);
2260 : : }
2261 : 0 : }
2262 : :
2263 : : /* Insert partially redundant expressions on edges in the CFG to make
2264 : : the expressions fully redundant. */
2265 : :
2266 : : static bool
2267 : 415587 : pre_edge_insert (struct edge_list *edge_list, struct gcse_expr **index_map)
2268 : : {
2269 : 415587 : int e, i, j, num_edges, set_size;
2270 : 415587 : bool did_insert = false;
2271 : 415587 : sbitmap *inserted;
2272 : :
2273 : : /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2274 : : if it reaches any of the deleted expressions. */
2275 : :
2276 : 415587 : set_size = pre_insert_map[0]->size;
2277 : 415587 : num_edges = NUM_EDGES (edge_list);
2278 : 415587 : inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2279 : 415587 : bitmap_vector_clear (inserted, num_edges);
2280 : :
2281 : 14810988 : for (e = 0; e < num_edges; e++)
2282 : : {
2283 : 14395401 : int indx;
2284 : 14395401 : basic_block pred_bb = INDEX_EDGE_PRED_BB (edge_list, e);
2285 : 14395401 : basic_block succ_bb = INDEX_EDGE_SUCC_BB (edge_list, e);
2286 : :
2287 : 51888746 : for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2288 : : {
2289 : 37493345 : SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2290 : :
2291 : 37493345 : for (j = indx;
2292 : 42159268 : insert && j < (int) expr_hash_table.n_elems;
2293 : 4665923 : j++, insert >>= 1)
2294 : 4665923 : if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2295 : : {
2296 : 304968 : struct gcse_expr *expr = index_map[j];
2297 : 304968 : struct gcse_occr *occr;
2298 : :
2299 : : /* Now look at each deleted occurrence of this expression. */
2300 : 2117425 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2301 : : {
2302 : 1812457 : if (! occr->deleted_p)
2303 : 705116 : continue;
2304 : :
2305 : : /* Insert this expression on this edge if it would
2306 : : reach the deleted occurrence in BB. */
2307 : 1107341 : if (!bitmap_bit_p (inserted[e], j))
2308 : : {
2309 : 304968 : rtx_insn *insn;
2310 : 304968 : edge eg = INDEX_EDGE (edge_list, e);
2311 : :
2312 : : /* We can't insert anything on an abnormal and
2313 : : critical edge, so we insert the insn at the end of
2314 : : the previous block. There are several alternatives
2315 : : detailed in Morgans book P277 (sec 10.5) for
2316 : : handling this situation. This one is easiest for
2317 : : now.
2318 : :
2319 : : For hardreg PRE this would add an unwanted clobber
2320 : : of the hardreg, so we instead insert in the
2321 : : successor block. This may be partially redundant,
2322 : : but it is at least correct. */
2323 : 304968 : if (eg->flags & EDGE_ABNORMAL)
2324 : : {
2325 : 53552 : if (doing_hardreg_pre_p)
2326 : 0 : insert_insn_start_basic_block (index_map[j],
2327 : : succ_bb);
2328 : : else
2329 : 53552 : insert_insn_end_basic_block (index_map[j],
2330 : : pred_bb);
2331 : : }
2332 : : else
2333 : : {
2334 : 251416 : insn = process_insert_insn (index_map[j]);
2335 : 251416 : insert_insn_on_edge (insn, eg);
2336 : : }
2337 : :
2338 : 304968 : if (dump_file)
2339 : : {
2340 : 8 : fprintf (dump_file, "PRE: edge (%d,%d), ",
2341 : : pred_bb->index,
2342 : : succ_bb->index);
2343 : 8 : fprintf (dump_file, "copy expression %d\n",
2344 : : expr->bitmap_index);
2345 : : }
2346 : :
2347 : 304968 : update_ld_motion_stores (expr);
2348 : 304968 : bitmap_set_bit (inserted[e], j);
2349 : 304968 : did_insert = true;
2350 : 304968 : gcse_create_count++;
2351 : : }
2352 : : }
2353 : : }
2354 : : }
2355 : : }
2356 : :
2357 : 415587 : sbitmap_vector_free (inserted);
2358 : 415587 : return did_insert;
2359 : : }
2360 : :
2361 : : /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2362 : : Given "old_reg <- expr" (INSN), instead of adding after it
2363 : : reaching_reg <- old_reg
2364 : : it's better to do the following:
2365 : : reaching_reg <- expr
2366 : : old_reg <- reaching_reg
2367 : : because this way copy propagation can discover additional PRE
2368 : : opportunities. But if this fails, we try the old way.
2369 : : When "expr" is a store, i.e.
2370 : : given "MEM <- old_reg", instead of adding after it
2371 : : reaching_reg <- old_reg
2372 : : it's better to add it before as follows:
2373 : : reaching_reg <- old_reg
2374 : : MEM <- reaching_reg. */
2375 : :
2376 : : static void
2377 : 317742 : pre_insert_copy_insn (struct gcse_expr *expr, rtx_insn *insn)
2378 : : {
2379 : 317742 : rtx reg = expr->reaching_reg;
2380 : 317742 : int regno = REGNO (reg);
2381 : 317742 : int indx = expr->bitmap_index;
2382 : 317742 : rtx pat = PATTERN (insn);
2383 : 317742 : rtx set, first_set;
2384 : 317742 : rtx_insn *new_insn;
2385 : 317742 : rtx old_reg;
2386 : 317742 : int i;
2387 : :
2388 : : /* This block matches the logic in hash_scan_insn. */
2389 : 317742 : switch (GET_CODE (pat))
2390 : : {
2391 : : case SET:
2392 : : set = pat;
2393 : : break;
2394 : :
2395 : : case PARALLEL:
2396 : : /* Search through the parallel looking for the set whose
2397 : : source was the expression that we're interested in. */
2398 : : first_set = NULL_RTX;
2399 : 216453 : set = NULL_RTX;
2400 : 216453 : for (i = 0; i < XVECLEN (pat, 0); i++)
2401 : : {
2402 : 216365 : rtx x = XVECEXP (pat, 0, i);
2403 : 216365 : if (GET_CODE (x) == SET)
2404 : : {
2405 : : /* If the source was a REG_EQUAL or REG_EQUIV note, we
2406 : : may not find an equivalent expression, but in this
2407 : : case the PARALLEL will have a single set. */
2408 : 216277 : if (first_set == NULL_RTX)
2409 : 216125 : first_set = x;
2410 : 216277 : if (expr_equiv_p (SET_SRC (x), expr->expr))
2411 : : {
2412 : : set = x;
2413 : : break;
2414 : : }
2415 : : }
2416 : : }
2417 : :
2418 : 216125 : gcc_assert (first_set);
2419 : 216125 : if (set == NULL_RTX)
2420 : 88 : set = first_set;
2421 : : break;
2422 : :
2423 : 0 : default:
2424 : 0 : gcc_unreachable ();
2425 : : }
2426 : :
2427 : 317742 : if (REG_P (SET_DEST (set)))
2428 : : {
2429 : 317689 : old_reg = SET_DEST (set);
2430 : : /* Check if we can modify the set destination in the original insn. */
2431 : 317689 : if (validate_change (insn, &SET_DEST (set), reg, 0))
2432 : : {
2433 : 317689 : new_insn = gen_move_insn (old_reg, reg);
2434 : 317689 : new_insn = emit_insn_after (new_insn, insn);
2435 : : }
2436 : : else
2437 : : {
2438 : 0 : new_insn = gen_move_insn (reg, old_reg);
2439 : 0 : new_insn = emit_insn_after (new_insn, insn);
2440 : : }
2441 : : }
2442 : : else /* This is possible only in case of a store to memory. */
2443 : : {
2444 : 53 : old_reg = SET_SRC (set);
2445 : 53 : new_insn = gen_move_insn (reg, old_reg);
2446 : :
2447 : : /* Check if we can modify the set source in the original insn. */
2448 : 53 : if (validate_change (insn, &SET_SRC (set), reg, 0))
2449 : 53 : new_insn = emit_insn_before (new_insn, insn);
2450 : : else
2451 : 0 : new_insn = emit_insn_after (new_insn, insn);
2452 : : }
2453 : :
2454 : 317742 : gcse_create_count++;
2455 : :
2456 : 317742 : if (dump_file)
2457 : 24 : fprintf (dump_file,
2458 : : "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2459 : 8 : BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2460 : 8 : INSN_UID (insn), regno);
2461 : 317742 : }
2462 : :
2463 : : /* Copy available expressions that reach the redundant expression
2464 : : to `reaching_reg'. */
2465 : :
2466 : : static void
2467 : 415587 : pre_insert_copies (void)
2468 : : {
2469 : 415587 : unsigned int i;
2470 : 415587 : bool added_copy;
2471 : 415587 : struct gcse_expr *expr;
2472 : 415587 : struct gcse_occr *occr;
2473 : 415587 : struct gcse_occr *avail;
2474 : :
2475 : : /* For each available expression in the table, copy the result to
2476 : : `reaching_reg' if the expression reaches a deleted one.
2477 : :
2478 : : ??? The current algorithm is rather brute force.
2479 : : Need to do some profiling. */
2480 : :
2481 : 20455242 : for (i = 0; i < expr_hash_table.size; i++)
2482 : 29605940 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2483 : : {
2484 : : /* If the basic block isn't reachable, PPOUT will be TRUE. However,
2485 : : we don't want to insert a copy here because the expression may not
2486 : : really be redundant. So only insert an insn if the expression was
2487 : : deleted. This test also avoids further processing if the
2488 : : expression wasn't deleted anywhere. */
2489 : 9566285 : if (expr->reaching_reg == NULL)
2490 : 9201778 : continue;
2491 : :
2492 : : /* Set when we add a copy for that expression. */
2493 : 364507 : added_copy = false;
2494 : :
2495 : 1729374 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2496 : : {
2497 : 1364867 : if (! occr->deleted_p)
2498 : 382050 : continue;
2499 : :
2500 : 16308204 : for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2501 : : {
2502 : 15325387 : rtx_insn *insn = avail->insn;
2503 : :
2504 : : /* No need to handle this one if handled already. */
2505 : 15325387 : if (avail->copied_p)
2506 : 438230 : continue;
2507 : :
2508 : : /* Don't handle this one if it's a redundant one. */
2509 : 14887157 : if (insn->deleted ())
2510 : 13224368 : continue;
2511 : :
2512 : : /* Or if the expression doesn't reach the deleted one. */
2513 : 1662789 : if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2514 : : expr,
2515 : 1662789 : BLOCK_FOR_INSN (occr->insn)))
2516 : 1345047 : continue;
2517 : :
2518 : 317742 : added_copy = true;
2519 : :
2520 : : /* Copy the result of avail to reaching_reg. */
2521 : 317742 : pre_insert_copy_insn (expr, insn);
2522 : 317742 : avail->copied_p = 1;
2523 : : }
2524 : : }
2525 : :
2526 : 364507 : if (added_copy)
2527 : 254955 : update_ld_motion_stores (expr);
2528 : : }
2529 : 415587 : }
2530 : :
2531 : : struct set_data
2532 : : {
2533 : : rtx_insn *insn;
2534 : : const_rtx set;
2535 : : int nsets;
2536 : : };
2537 : :
2538 : : /* Increment number of sets and record set in DATA. */
2539 : :
2540 : : static void
2541 : 1804926 : record_set_data (rtx dest, const_rtx set, void *data)
2542 : : {
2543 : 1804926 : struct set_data *s = (struct set_data *)data;
2544 : :
2545 : 1804926 : if (GET_CODE (set) == SET)
2546 : : {
2547 : : /* We allow insns having multiple sets, where all but one are
2548 : : dead as single set insns. In the common case only a single
2549 : : set is present, so we want to avoid checking for REG_UNUSED
2550 : : notes unless necessary. */
2551 : 902463 : if (s->nsets == 1
2552 : 0 : && find_reg_note (s->insn, REG_UNUSED, SET_DEST (s->set))
2553 : 902463 : && !side_effects_p (s->set))
2554 : 0 : s->nsets = 0;
2555 : :
2556 : 902463 : if (!s->nsets)
2557 : : {
2558 : : /* Record this set. */
2559 : 902463 : s->nsets += 1;
2560 : 902463 : s->set = set;
2561 : : }
2562 : 0 : else if (!find_reg_note (s->insn, REG_UNUSED, dest)
2563 : 0 : || side_effects_p (set))
2564 : 0 : s->nsets += 1;
2565 : : }
2566 : 1804926 : }
2567 : :
2568 : : static const_rtx
2569 : 1735952 : single_set_gcse (rtx_insn *insn)
2570 : : {
2571 : 1735952 : struct set_data s;
2572 : 1735952 : rtx pattern;
2573 : :
2574 : 1735952 : gcc_assert (INSN_P (insn));
2575 : :
2576 : : /* Optimize common case. */
2577 : 1735952 : pattern = PATTERN (insn);
2578 : 1735952 : if (GET_CODE (pattern) == SET)
2579 : : return pattern;
2580 : :
2581 : 902463 : s.insn = insn;
2582 : 902463 : s.nsets = 0;
2583 : 902463 : note_pattern_stores (pattern, record_set_data, &s);
2584 : :
2585 : : /* Considered invariant insns have exactly one set. */
2586 : 902463 : gcc_assert (s.nsets == 1);
2587 : 902463 : return s.set;
2588 : : }
2589 : :
2590 : : /* Emit move from SRC to DEST noting the equivalence with expression computed
2591 : : in INSN. */
2592 : :
2593 : : static rtx_insn *
2594 : 1016042 : gcse_emit_move_after (rtx dest, rtx src, rtx_insn *insn)
2595 : : {
2596 : 1016042 : rtx_insn *new_rtx;
2597 : 1016042 : const_rtx set = single_set_gcse (insn);
2598 : 1016042 : rtx set2;
2599 : 1016042 : rtx note;
2600 : 1016042 : rtx eqv = NULL_RTX;
2601 : :
2602 : : /* This should never fail since we're creating a reg->reg copy
2603 : : we've verified to be valid. */
2604 : :
2605 : 1016042 : new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2606 : :
2607 : : /* Note the equivalence for local CSE pass. Take the note from the old
2608 : : set if there was one. Otherwise record the SET_SRC from the old set
2609 : : unless DEST is also an operand of the SET_SRC. */
2610 : 1016042 : set2 = single_set (new_rtx);
2611 : 1016042 : if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2612 : 0 : return new_rtx;
2613 : 1016042 : if ((note = find_reg_equal_equiv_note (insn)))
2614 : 174335 : eqv = XEXP (note, 0);
2615 : 841707 : else if (! REG_P (dest)
2616 : 841707 : || ! reg_mentioned_p (dest, SET_SRC (set)))
2617 : 839459 : eqv = SET_SRC (set);
2618 : :
2619 : 1013794 : if (eqv != NULL_RTX)
2620 : 1013794 : set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2621 : :
2622 : : return new_rtx;
2623 : : }
2624 : :
2625 : : /* Delete redundant computations.
2626 : : Deletion is done by changing the insn to copy the `reaching_reg' of
2627 : : the expression into the result of the SET. It is left to later passes
2628 : : to propagate the copy or eliminate it.
2629 : :
2630 : : Return true if a change is made. */
2631 : :
2632 : : static bool
2633 : 415587 : pre_delete (void)
2634 : : {
2635 : 415587 : unsigned int i;
2636 : 415587 : bool changed = false;
2637 : 415587 : struct gcse_expr *expr;
2638 : 415587 : struct gcse_occr *occr;
2639 : :
2640 : 20455242 : for (i = 0; i < expr_hash_table.size; i++)
2641 : 29605940 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2642 : : {
2643 : 9566285 : int indx = expr->bitmap_index;
2644 : :
2645 : : /* We only need to search antic_occr since we require ANTLOC != 0. */
2646 : 16428825 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2647 : : {
2648 : 6862540 : rtx_insn *insn = occr->insn;
2649 : 6862540 : rtx set;
2650 : 6862540 : basic_block bb = BLOCK_FOR_INSN (insn);
2651 : :
2652 : : /* We only delete insns that have a single_set. */
2653 : 6862540 : if (bitmap_bit_p (pre_delete_map[bb->index], indx)
2654 : 982818 : && (set = single_set (insn)) != 0
2655 : 7845357 : && dbg_cnt (pre_insn))
2656 : : {
2657 : 982817 : rtx dest = SET_DEST (set);
2658 : 982817 : if (expr->reaching_reg == NULL)
2659 : : {
2660 : 364507 : if (doing_hardreg_pre_p)
2661 : : /* Use the hardreg as the reaching register. The
2662 : : deleted sets will be replaced with noop moves.
2663 : :
2664 : : This may change the value of the hardreg in some debug
2665 : : instructions, so we will need to reset any debug uses
2666 : : of the hardreg. */
2667 : 0 : expr->reaching_reg = dest;
2668 : : else
2669 : : /* Create a pseudo-reg to store the result of reaching
2670 : : expressions into. Get the mode for the new pseudo from
2671 : : the mode of the original destination pseudo. */
2672 : 364507 : expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2673 : : }
2674 : :
2675 : 982817 : gcse_emit_move_after (dest, expr->reaching_reg, insn);
2676 : 982817 : delete_insn (insn);
2677 : 982817 : occr->deleted_p = 1;
2678 : 982817 : changed = true;
2679 : 982817 : gcse_subst_count++;
2680 : :
2681 : 982817 : if (dump_file)
2682 : : {
2683 : 8 : fprintf (dump_file,
2684 : : "PRE: redundant insn %d (expression %d) in ",
2685 : 8 : INSN_UID (insn), indx);
2686 : 8 : fprintf (dump_file, "bb %d, reaching reg is %d\n",
2687 : 8 : bb->index, REGNO (expr->reaching_reg));
2688 : : }
2689 : : }
2690 : : }
2691 : : }
2692 : :
2693 : 415587 : return changed;
2694 : : }
2695 : :
2696 : : /* Since hardreg PRE reuses the hardreg as the reaching register, we need to
2697 : : eliminate any existing uses in debug insns. This is overly conservative,
2698 : : but there's currently no benefit to preserving the debug insns, so there's
2699 : : no point doing the work to retain them. */
2700 : :
2701 : : static void
2702 : 0 : reset_hardreg_debug_uses ()
2703 : : {
2704 : 0 : df_ref def;
2705 : 0 : for (def = DF_REG_USE_CHAIN (current_hardreg_regno);
2706 : 0 : def;
2707 : 0 : def = DF_REF_NEXT_REG (def))
2708 : : {
2709 : 0 : rtx_insn *insn = DF_REF_INSN (def);
2710 : 0 : if (DEBUG_INSN_P (insn))
2711 : 0 : delete_insn (insn);
2712 : : }
2713 : 0 : }
2714 : :
2715 : : /* Perform GCSE optimizations using PRE.
2716 : : This is called by one_pre_gcse_pass after all the dataflow analysis
2717 : : has been done.
2718 : :
2719 : : This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2720 : : lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2721 : : Compiler Design and Implementation.
2722 : :
2723 : : ??? A new pseudo reg is created to hold the reaching expression. The nice
2724 : : thing about the classical approach is that it would try to use an existing
2725 : : reg. If the register can't be adequately optimized [i.e. we introduce
2726 : : reload problems], one could add a pass here to propagate the new register
2727 : : through the block.
2728 : :
2729 : : ??? We don't handle single sets in PARALLELs because we're [currently] not
2730 : : able to copy the rest of the parallel when we insert copies to create full
2731 : : redundancies from partial redundancies. However, there's no reason why we
2732 : : can't handle PARALLELs in the cases where there are no partial
2733 : : redundancies. */
2734 : :
2735 : : static bool
2736 : 415587 : pre_gcse (struct edge_list *edge_list)
2737 : : {
2738 : 415587 : unsigned int i;
2739 : 415587 : bool did_insert, changed;
2740 : 415587 : struct gcse_expr **index_map;
2741 : 415587 : struct gcse_expr *expr;
2742 : :
2743 : : /* Compute a mapping from expression number (`bitmap_index') to
2744 : : hash table entry. */
2745 : :
2746 : 415587 : index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
2747 : 20870829 : for (i = 0; i < expr_hash_table.size; i++)
2748 : 29605940 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2749 : 9566285 : index_map[expr->bitmap_index] = expr;
2750 : :
2751 : : /* Delete the redundant insns first so that
2752 : : - we know what register to use for the new insns and for the other
2753 : : ones with reaching expressions
2754 : : - we know which insns are redundant when we go to create copies */
2755 : :
2756 : 415587 : changed = pre_delete ();
2757 : 415587 : did_insert = pre_edge_insert (edge_list, index_map);
2758 : : /* In other places with reaching expressions, copy the expression to the
2759 : : specially allocated pseudo-reg that reaches the redundant expr. This
2760 : : isn't needed for hardreg PRE. */
2761 : 415587 : if (!doing_hardreg_pre_p)
2762 : 415587 : pre_insert_copies ();
2763 : :
2764 : 415587 : if (did_insert)
2765 : : {
2766 : 75236 : if (doing_hardreg_pre_p)
2767 : 0 : reset_hardreg_debug_uses ();
2768 : 75236 : commit_edge_insertions ();
2769 : 75236 : changed = true;
2770 : : }
2771 : :
2772 : 415587 : free (index_map);
2773 : 415587 : return changed;
2774 : : }
2775 : :
2776 : : /* Top level routine to perform one PRE GCSE pass.
2777 : :
2778 : : Return true if a change was made. */
2779 : :
2780 : : static bool
2781 : 885252 : one_pre_gcse_pass (void)
2782 : : {
2783 : 885252 : bool changed = false;
2784 : :
2785 : 885252 : gcse_subst_count = 0;
2786 : 885252 : gcse_create_count = 0;
2787 : :
2788 : : /* Return if there's nothing to do, or it is too expensive. */
2789 : 885252 : if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
2790 : 885252 : || gcse_or_cprop_is_too_expensive (_("PRE disabled")))
2791 : 410312 : return false;
2792 : :
2793 : : /* We need alias. */
2794 : 474940 : init_alias_analysis ();
2795 : :
2796 : 474940 : bytes_used = 0;
2797 : 474940 : gcc_obstack_init (&gcse_obstack);
2798 : 474940 : alloc_gcse_mem ();
2799 : :
2800 : 474940 : alloc_hash_table (&expr_hash_table);
2801 : 474940 : add_noreturn_fake_exit_edges ();
2802 : 474940 : if (do_load_motion ())
2803 : 474938 : compute_ld_motion_mems ();
2804 : :
2805 : 474940 : compute_hash_table (&expr_hash_table);
2806 : 474940 : if (do_load_motion ())
2807 : 474938 : trim_ld_motion_mems ();
2808 : 474940 : if (dump_file)
2809 : 17 : dump_hash_table (dump_file, "Expression", &expr_hash_table);
2810 : :
2811 : 474940 : if (expr_hash_table.n_elems > 0)
2812 : : {
2813 : 415587 : struct edge_list *edge_list;
2814 : 415587 : alloc_pre_mem (last_basic_block_for_fn (cfun), expr_hash_table.n_elems);
2815 : 415587 : edge_list = compute_pre_data ();
2816 : 415587 : if (pre_gcse (edge_list))
2817 : : changed = true;
2818 : 415587 : free_edge_list (edge_list);
2819 : 415587 : free_pre_mem ();
2820 : : }
2821 : :
2822 : 474940 : if (do_load_motion ())
2823 : 474938 : free_ld_motion_mems ();
2824 : 474940 : remove_fake_exit_edges ();
2825 : 474940 : free_hash_table (&expr_hash_table);
2826 : :
2827 : 474940 : free_gcse_mem ();
2828 : 474940 : obstack_free (&gcse_obstack, NULL);
2829 : :
2830 : : /* We are finished with alias. */
2831 : 474940 : end_alias_analysis ();
2832 : :
2833 : 474940 : if (dump_file)
2834 : : {
2835 : 17 : fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2836 : 17 : current_function_name (), n_basic_blocks_for_fn (cfun),
2837 : : bytes_used);
2838 : 17 : fprintf (dump_file, "%d substs, %d insns created\n",
2839 : : gcse_subst_count, gcse_create_count);
2840 : : }
2841 : :
2842 : : return changed;
2843 : : }
2844 : :
2845 : : /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2846 : : to INSN. If such notes are added to an insn which references a
2847 : : CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
2848 : : that note, because the following loop optimization pass requires
2849 : : them. */
2850 : :
2851 : : /* ??? If there was a jump optimization pass after gcse and before loop,
2852 : : then we would not need to do this here, because jump would add the
2853 : : necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
2854 : :
2855 : : static void
2856 : 329084 : add_label_notes (rtx x, rtx_insn *insn)
2857 : : {
2858 : 329084 : enum rtx_code code = GET_CODE (x);
2859 : 329084 : int i, j;
2860 : 329084 : const char *fmt;
2861 : :
2862 : 329084 : if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2863 : : {
2864 : : /* This code used to ignore labels that referred to dispatch tables to
2865 : : avoid flow generating (slightly) worse code.
2866 : :
2867 : : We no longer ignore such label references (see LABEL_REF handling in
2868 : : mark_jump_label for additional information). */
2869 : :
2870 : : /* There's no reason for current users to emit jump-insns with
2871 : : such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2872 : : notes. */
2873 : 0 : gcc_assert (!JUMP_P (insn));
2874 : 0 : add_reg_note (insn, REG_LABEL_OPERAND, label_ref_label (x));
2875 : :
2876 : 0 : if (LABEL_P (label_ref_label (x)))
2877 : 0 : LABEL_NUSES (label_ref_label (x))++;
2878 : :
2879 : 0 : return;
2880 : : }
2881 : :
2882 : 796699 : for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2883 : : {
2884 : 467615 : if (fmt[i] == 'e')
2885 : 262216 : add_label_notes (XEXP (x, i), insn);
2886 : 205399 : else if (fmt[i] == 'E')
2887 : 81 : for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2888 : 54 : add_label_notes (XVECEXP (x, i, j), insn);
2889 : : }
2890 : : }
2891 : :
2892 : : /* Code Hoisting variables and subroutines. */
2893 : :
2894 : : /* Very busy expressions. */
2895 : : static sbitmap *hoist_vbein;
2896 : : static sbitmap *hoist_vbeout;
2897 : :
2898 : : /* ??? We could compute post dominators and run this algorithm in
2899 : : reverse to perform tail merging, doing so would probably be
2900 : : more effective than the tail merging code in jump.cc.
2901 : :
2902 : : It's unclear if tail merging could be run in parallel with
2903 : : code hoisting. It would be nice. */
2904 : :
2905 : : /* Allocate vars used for code hoisting analysis. */
2906 : :
2907 : : static void
2908 : 25187 : alloc_code_hoist_mem (int n_blocks, int n_exprs)
2909 : : {
2910 : 25187 : antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2911 : 25187 : transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2912 : 25187 : comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2913 : :
2914 : 25187 : hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2915 : 25187 : hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2916 : 25187 : }
2917 : :
2918 : : /* Free vars used for code hoisting analysis. */
2919 : :
2920 : : static void
2921 : 25187 : free_code_hoist_mem (void)
2922 : : {
2923 : 25187 : sbitmap_vector_free (antloc);
2924 : 25187 : sbitmap_vector_free (transp);
2925 : 25187 : sbitmap_vector_free (comp);
2926 : :
2927 : 25187 : sbitmap_vector_free (hoist_vbein);
2928 : 25187 : sbitmap_vector_free (hoist_vbeout);
2929 : :
2930 : 25187 : free_dominance_info (CDI_DOMINATORS);
2931 : 25187 : }
2932 : :
2933 : : /* Compute the very busy expressions at entry/exit from each block.
2934 : :
2935 : : An expression is very busy if all paths from a given point
2936 : : compute the expression. */
2937 : :
2938 : : static void
2939 : 25187 : compute_code_hoist_vbeinout (void)
2940 : : {
2941 : 25187 : int changed, passes;
2942 : 25187 : basic_block bb;
2943 : :
2944 : 25187 : bitmap_vector_clear (hoist_vbeout, last_basic_block_for_fn (cfun));
2945 : 25187 : bitmap_vector_clear (hoist_vbein, last_basic_block_for_fn (cfun));
2946 : :
2947 : 25187 : passes = 0;
2948 : 25187 : changed = 1;
2949 : :
2950 : 103077 : while (changed)
2951 : : {
2952 : 52703 : changed = 0;
2953 : :
2954 : : /* We scan the blocks in the reverse order to speed up
2955 : : the convergence. */
2956 : 869028 : FOR_EACH_BB_REVERSE_FN (bb, cfun)
2957 : : {
2958 : 816325 : if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
2959 : : {
2960 : 763622 : bitmap_intersection_of_succs (hoist_vbeout[bb->index],
2961 : : hoist_vbein, bb);
2962 : :
2963 : : /* Include expressions in VBEout that are calculated
2964 : : in BB and available at its end. */
2965 : 763622 : bitmap_ior (hoist_vbeout[bb->index],
2966 : 763622 : hoist_vbeout[bb->index], comp[bb->index]);
2967 : : }
2968 : :
2969 : 816325 : changed |= bitmap_or_and (hoist_vbein[bb->index],
2970 : 816325 : antloc[bb->index],
2971 : 816325 : hoist_vbeout[bb->index],
2972 : 816325 : transp[bb->index]);
2973 : : }
2974 : :
2975 : 52703 : passes++;
2976 : : }
2977 : :
2978 : 25187 : if (dump_file)
2979 : : {
2980 : 7 : fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2981 : :
2982 : 35 : FOR_EACH_BB_FN (bb, cfun)
2983 : : {
2984 : 28 : fprintf (dump_file, "vbein (%d): ", bb->index);
2985 : 28 : dump_bitmap_file (dump_file, hoist_vbein[bb->index]);
2986 : 28 : fprintf (dump_file, "vbeout(%d): ", bb->index);
2987 : 28 : dump_bitmap_file (dump_file, hoist_vbeout[bb->index]);
2988 : : }
2989 : : }
2990 : 25187 : }
2991 : :
2992 : : /* Top level routine to do the dataflow analysis needed by code hoisting. */
2993 : :
2994 : : static void
2995 : 25187 : compute_code_hoist_data (void)
2996 : : {
2997 : 25187 : compute_local_properties (transp, comp, antloc, &expr_hash_table);
2998 : 25187 : prune_expressions (false);
2999 : 25187 : compute_code_hoist_vbeinout ();
3000 : 25187 : calculate_dominance_info (CDI_DOMINATORS);
3001 : 25187 : if (dump_file)
3002 : 7 : fprintf (dump_file, "\n");
3003 : 25187 : }
3004 : :
3005 : : /* Update register pressure for BB when hoisting an expression from
3006 : : instruction FROM, if live ranges of inputs are shrunk. Also
3007 : : maintain live_in information if live range of register referred
3008 : : in FROM is shrunk.
3009 : :
3010 : : Return 0 if register pressure doesn't change, otherwise return
3011 : : the number by which register pressure is decreased.
3012 : :
3013 : : NOTE: Register pressure won't be increased in this function. */
3014 : :
3015 : : static int
3016 : 12335834 : update_bb_reg_pressure (basic_block bb, rtx_insn *from)
3017 : : {
3018 : 12335834 : rtx dreg;
3019 : 12335834 : rtx_insn *insn;
3020 : 12335834 : basic_block succ_bb;
3021 : 12335834 : df_ref use, op_ref;
3022 : 12335834 : edge succ;
3023 : 12335834 : edge_iterator ei;
3024 : 12335834 : int decreased_pressure = 0;
3025 : 12335834 : int nregs;
3026 : 12335834 : enum reg_class pressure_class;
3027 : :
3028 : 15058226 : FOR_EACH_INSN_USE (use, from)
3029 : : {
3030 : 2722392 : dreg = DF_REF_REAL_REG (use);
3031 : : /* The live range of register is shrunk only if it isn't:
3032 : : 1. referred on any path from the end of this block to EXIT, or
3033 : : 2. referred by insns other than FROM in this block. */
3034 : 3779102 : FOR_EACH_EDGE (succ, ei, bb->succs)
3035 : : {
3036 : 3221990 : succ_bb = succ->dest;
3037 : 3221990 : if (succ_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
3038 : 3819 : continue;
3039 : :
3040 : 3218171 : if (bitmap_bit_p (BB_DATA (succ_bb)->live_in, REGNO (dreg)))
3041 : : break;
3042 : : }
3043 : 2722392 : if (succ != NULL)
3044 : 2165280 : continue;
3045 : :
3046 : 557112 : op_ref = DF_REG_USE_CHAIN (REGNO (dreg));
3047 : 1776551 : for (; op_ref; op_ref = DF_REF_NEXT_REG (op_ref))
3048 : : {
3049 : 1232406 : if (!DF_REF_INSN_INFO (op_ref))
3050 : 177712 : continue;
3051 : :
3052 : 1054694 : insn = DF_REF_INSN (op_ref);
3053 : 1054694 : if (BLOCK_FOR_INSN (insn) == bb
3054 : 1054694 : && NONDEBUG_INSN_P (insn) && insn != from)
3055 : : break;
3056 : : }
3057 : :
3058 : 557112 : pressure_class = get_regno_pressure_class (REGNO (dreg), &nregs);
3059 : : /* Decrease register pressure and update live_in information for
3060 : : this block. */
3061 : 557112 : if (!op_ref && pressure_class != NO_REGS)
3062 : : {
3063 : 543477 : decreased_pressure += nregs;
3064 : 543477 : BB_DATA (bb)->max_reg_pressure[pressure_class] -= nregs;
3065 : 543477 : bitmap_clear_bit (BB_DATA (bb)->live_in, REGNO (dreg));
3066 : : }
3067 : : }
3068 : 12335834 : return decreased_pressure;
3069 : : }
3070 : :
3071 : : /* Determine if the expression EXPR should be hoisted to EXPR_BB up in
3072 : : flow graph, if it can reach BB unimpared. Stop the search if the
3073 : : expression would need to be moved more than DISTANCE instructions.
3074 : :
3075 : : DISTANCE is the number of instructions through which EXPR can be
3076 : : hoisted up in flow graph.
3077 : :
3078 : : BB_SIZE points to an array which contains the number of instructions
3079 : : for each basic block.
3080 : :
3081 : : PRESSURE_CLASS and NREGS are register class and number of hard registers
3082 : : for storing EXPR.
3083 : :
3084 : : HOISTED_BBS points to a bitmap indicating basic blocks through which
3085 : : EXPR is hoisted.
3086 : :
3087 : : FROM is the instruction from which EXPR is hoisted.
3088 : :
3089 : : It's unclear exactly what Muchnick meant by "unimpared". It seems
3090 : : to me that the expression must either be computed or transparent in
3091 : : *every* block in the path(s) from EXPR_BB to BB. Any other definition
3092 : : would allow the expression to be hoisted out of loops, even if
3093 : : the expression wasn't a loop invariant.
3094 : :
3095 : : Contrast this to reachability for PRE where an expression is
3096 : : considered reachable if *any* path reaches instead of *all*
3097 : : paths. */
3098 : :
3099 : : static bool
3100 : 12335834 : should_hoist_expr_to_dom (basic_block expr_bb, struct gcse_expr *expr,
3101 : : basic_block bb, sbitmap visited,
3102 : : HOST_WIDE_INT distance,
3103 : : int *bb_size, enum reg_class pressure_class,
3104 : : int *nregs, bitmap hoisted_bbs, rtx_insn *from)
3105 : : {
3106 : 12335834 : unsigned int i;
3107 : 12335834 : edge pred;
3108 : 12335834 : edge_iterator ei;
3109 : 12335834 : sbitmap_iterator sbi;
3110 : 12335834 : bool visited_allocated_locally = false;
3111 : 12335834 : int decreased_pressure = 0;
3112 : :
3113 : 12335834 : if (flag_ira_hoist_pressure)
3114 : : {
3115 : : /* Record old information of basic block BB when it is visited
3116 : : at the first time. */
3117 : 12335834 : if (!bitmap_bit_p (hoisted_bbs, bb->index))
3118 : : {
3119 : 6811969 : struct bb_data *data = BB_DATA (bb);
3120 : 6811969 : bitmap_copy (data->backup, data->live_in);
3121 : 6811969 : data->old_pressure = data->max_reg_pressure[pressure_class];
3122 : : }
3123 : 12335834 : decreased_pressure = update_bb_reg_pressure (bb, from);
3124 : : }
3125 : : /* Terminate the search if distance, for which EXPR is allowed to move,
3126 : : is exhausted. */
3127 : 12335834 : if (distance > 0)
3128 : : {
3129 : 12333304 : if (flag_ira_hoist_pressure)
3130 : : {
3131 : : /* Prefer to hoist EXPR if register pressure is decreased. */
3132 : 12333304 : if (decreased_pressure > *nregs)
3133 : 1980 : distance += bb_size[bb->index];
3134 : : /* Let EXPR be hoisted through basic block at no cost if one
3135 : : of following conditions is satisfied:
3136 : :
3137 : : 1. The basic block has low register pressure.
3138 : : 2. Register pressure won't be increases after hoisting EXPR.
3139 : :
3140 : : Constant expressions is handled conservatively, because
3141 : : hoisting constant expression aggressively results in worse
3142 : : code. This decision is made by the observation of CSiBE
3143 : : on ARM target, while it has no obvious effect on other
3144 : : targets like x86, x86_64, mips and powerpc. */
3145 : 12331324 : else if (CONST_INT_P (expr->expr)
3146 : 12154030 : || (BB_DATA (bb)->max_reg_pressure[pressure_class]
3147 : 12154030 : >= ira_class_hard_regs_num[pressure_class]
3148 : 146639 : && decreased_pressure < *nregs))
3149 : 321641 : distance -= bb_size[bb->index];
3150 : : }
3151 : : else
3152 : 0 : distance -= bb_size[bb->index];
3153 : :
3154 : 12333304 : if (distance <= 0)
3155 : : return 0;
3156 : : }
3157 : : else
3158 : 2530 : gcc_assert (distance == 0);
3159 : :
3160 : 12144108 : if (visited == NULL)
3161 : : {
3162 : 604883 : visited_allocated_locally = true;
3163 : 604883 : visited = sbitmap_alloc (last_basic_block_for_fn (cfun));
3164 : 604883 : bitmap_clear (visited);
3165 : : }
3166 : :
3167 : 27302353 : FOR_EACH_EDGE (pred, ei, bb->preds)
3168 : : {
3169 : 16124880 : basic_block pred_bb = pred->src;
3170 : :
3171 : 16124880 : if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
3172 : : break;
3173 : 16124880 : else if (pred_bb == expr_bb)
3174 : 607663 : continue;
3175 : 15517217 : else if (bitmap_bit_p (visited, pred_bb->index))
3176 : 3830491 : continue;
3177 : 11686726 : else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
3178 : : break;
3179 : : /* Not killed. */
3180 : : else
3181 : : {
3182 : 11649149 : bitmap_set_bit (visited, pred_bb->index);
3183 : 11649149 : if (! should_hoist_expr_to_dom (expr_bb, expr, pred_bb,
3184 : : visited, distance, bb_size,
3185 : : pressure_class, nregs,
3186 : : hoisted_bbs, from))
3187 : : break;
3188 : : }
3189 : : }
3190 : 12144108 : if (visited_allocated_locally)
3191 : : {
3192 : : /* If EXPR can be hoisted to expr_bb, record basic blocks through
3193 : : which EXPR is hoisted in hoisted_bbs. */
3194 : 604883 : if (flag_ira_hoist_pressure && !pred)
3195 : : {
3196 : : /* Record the basic block from which EXPR is hoisted. */
3197 : 457382 : bitmap_set_bit (visited, bb->index);
3198 : 12024160 : EXECUTE_IF_SET_IN_BITMAP (visited, 0, i, sbi)
3199 : 11109396 : bitmap_set_bit (hoisted_bbs, i);
3200 : : }
3201 : 604883 : sbitmap_free (visited);
3202 : : }
3203 : :
3204 : 12144108 : return (pred == NULL);
3205 : : }
3206 : :
3207 : : /* Find occurrence in BB. */
3208 : :
3209 : : static struct gcse_occr *
3210 : 1211996 : find_occr_in_bb (struct gcse_occr *occr, basic_block bb)
3211 : : {
3212 : : /* Find the right occurrence of this expression. */
3213 : 16330628 : while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
3214 : 15118632 : occr = occr->next;
3215 : :
3216 : 1211996 : return occr;
3217 : : }
3218 : :
3219 : : /* Actually perform code hoisting.
3220 : :
3221 : : The code hoisting pass can hoist multiple computations of the same
3222 : : expression along dominated path to a dominating basic block, like
3223 : : from b2/b3 to b1 as depicted below:
3224 : :
3225 : : b1 ------
3226 : : /\ |
3227 : : / \ |
3228 : : bx by distance
3229 : : / \ |
3230 : : / \ |
3231 : : b2 b3 ------
3232 : :
3233 : : Unfortunately code hoisting generally extends the live range of an
3234 : : output pseudo register, which increases register pressure and hurts
3235 : : register allocation. To address this issue, an attribute MAX_DISTANCE
3236 : : is computed and attached to each expression. The attribute is computed
3237 : : from rtx cost of the corresponding expression and it's used to control
3238 : : how long the expression can be hoisted up in flow graph. As the
3239 : : expression is hoisted up in flow graph, GCC decreases its DISTANCE
3240 : : and stops the hoist if DISTANCE reaches 0. Code hoisting can decrease
3241 : : register pressure if live ranges of inputs are shrunk.
3242 : :
3243 : : Option "-fira-hoist-pressure" implements register pressure directed
3244 : : hoist based on upper method. The rationale is:
3245 : : 1. Calculate register pressure for each basic block by reusing IRA
3246 : : facility.
3247 : : 2. When expression is hoisted through one basic block, GCC checks
3248 : : the change of live ranges for inputs/output. The basic block's
3249 : : register pressure will be increased because of extended live
3250 : : range of output. However, register pressure will be decreased
3251 : : if the live ranges of inputs are shrunk.
3252 : : 3. After knowing how hoisting affects register pressure, GCC prefers
3253 : : to hoist the expression if it can decrease register pressure, by
3254 : : increasing DISTANCE of the corresponding expression.
3255 : : 4. If hoisting the expression increases register pressure, GCC checks
3256 : : register pressure of the basic block and decrease DISTANCE only if
3257 : : the register pressure is high. In other words, expression will be
3258 : : hoisted through at no cost if the basic block has low register
3259 : : pressure.
3260 : : 5. Update register pressure information for basic blocks through
3261 : : which expression is hoisted. */
3262 : :
3263 : : static bool
3264 : 25187 : hoist_code (void)
3265 : : {
3266 : 25187 : basic_block bb, dominated;
3267 : 25187 : unsigned int dom_tree_walk_index;
3268 : 25187 : unsigned int i, j, k;
3269 : 25187 : struct gcse_expr **index_map;
3270 : 25187 : struct gcse_expr *expr;
3271 : 25187 : int *to_bb_head;
3272 : 25187 : int *bb_size;
3273 : 25187 : bool changed = false;
3274 : 25187 : struct bb_data *data;
3275 : : /* Basic blocks that have occurrences reachable from BB. */
3276 : 25187 : bitmap from_bbs;
3277 : : /* Basic blocks through which expr is hoisted. */
3278 : 25187 : bitmap hoisted_bbs = NULL;
3279 : 25187 : bitmap_iterator bi;
3280 : :
3281 : : /* Compute a mapping from expression number (`bitmap_index') to
3282 : : hash table entry. */
3283 : :
3284 : 25187 : index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
3285 : 835961 : for (i = 0; i < expr_hash_table.size; i++)
3286 : 1069847 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
3287 : 284260 : index_map[expr->bitmap_index] = expr;
3288 : :
3289 : : /* Calculate sizes of basic blocks and note how far
3290 : : each instruction is from the start of its block. We then use this
3291 : : data to restrict distance an expression can travel. */
3292 : :
3293 : 25187 : to_bb_head = XCNEWVEC (int, get_max_uid ());
3294 : 25187 : bb_size = XCNEWVEC (int, last_basic_block_for_fn (cfun));
3295 : :
3296 : 337044 : FOR_EACH_BB_FN (bb, cfun)
3297 : : {
3298 : 311857 : rtx_insn *insn;
3299 : 311857 : int to_head;
3300 : :
3301 : 311857 : to_head = 0;
3302 : 2679251 : FOR_BB_INSNS (bb, insn)
3303 : : {
3304 : : /* Don't count debug instructions to avoid them affecting
3305 : : decision choices. */
3306 : 2367394 : if (NONDEBUG_INSN_P (insn))
3307 : 1775753 : to_bb_head[INSN_UID (insn)] = to_head++;
3308 : : }
3309 : :
3310 : 311857 : bb_size[bb->index] = to_head;
3311 : : }
3312 : :
3313 : 25187 : gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) == 1
3314 : : && (EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0)->dest
3315 : : == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb));
3316 : :
3317 : 25187 : from_bbs = BITMAP_ALLOC (NULL);
3318 : 25187 : if (flag_ira_hoist_pressure)
3319 : 25187 : hoisted_bbs = BITMAP_ALLOC (NULL);
3320 : :
3321 : 25187 : auto_vec<basic_block> dom_tree_walk
3322 : : = get_all_dominated_blocks (CDI_DOMINATORS,
3323 : 25187 : ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb);
3324 : :
3325 : : /* Walk over each basic block looking for potentially hoistable
3326 : : expressions, nothing gets hoisted from the entry block. */
3327 : 337044 : FOR_EACH_VEC_ELT (dom_tree_walk, dom_tree_walk_index, bb)
3328 : : {
3329 : 311857 : auto_vec<basic_block> domby
3330 : 311857 : = get_dominated_to_depth (CDI_DOMINATORS, bb, param_max_hoist_depth);
3331 : :
3332 : 311857 : if (domby.length () == 0)
3333 : 0 : continue;
3334 : :
3335 : : /* Examine each expression that is very busy at the exit of this
3336 : : block. These are the potentially hoistable expressions. */
3337 : 48313688 : for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++)
3338 : : {
3339 : 48001831 : if (bitmap_bit_p (hoist_vbeout[bb->index], i))
3340 : : {
3341 : 3621514 : int nregs = 0;
3342 : 3621514 : enum reg_class pressure_class = NO_REGS;
3343 : : /* Current expression. */
3344 : 3621514 : struct gcse_expr *expr = index_map[i];
3345 : : /* Number of occurrences of EXPR that can be hoisted to BB. */
3346 : 3621514 : int hoistable = 0;
3347 : : /* Occurrences reachable from BB. */
3348 : 3621514 : vec<occr_t> occrs_to_hoist = vNULL;
3349 : : /* We want to insert the expression into BB only once, so
3350 : : note when we've inserted it. */
3351 : 3621514 : int insn_inserted_p;
3352 : 3621514 : occr_t occr;
3353 : :
3354 : : /* If an expression is computed in BB and is available at end of
3355 : : BB, hoist all occurrences dominated by BB to BB. */
3356 : 3621514 : if (bitmap_bit_p (comp[bb->index], i))
3357 : : {
3358 : 254635 : occr = find_occr_in_bb (expr->antic_occr, bb);
3359 : :
3360 : 254635 : if (occr)
3361 : : {
3362 : : /* An occurrence might've been already deleted
3363 : : while processing a dominator of BB. */
3364 : 150714 : if (!occr->deleted_p)
3365 : : {
3366 : 118316 : gcc_assert (NONDEBUG_INSN_P (occr->insn));
3367 : : hoistable++;
3368 : : }
3369 : : }
3370 : : else
3371 : : hoistable++;
3372 : : }
3373 : :
3374 : : /* We've found a potentially hoistable expression, now
3375 : : we look at every block BB dominates to see if it
3376 : : computes the expression. */
3377 : 46435971 : FOR_EACH_VEC_ELT (domby, j, dominated)
3378 : : {
3379 : 42814457 : HOST_WIDE_INT max_distance;
3380 : :
3381 : : /* Ignore self dominance. */
3382 : 42814457 : if (bb == dominated)
3383 : 3621514 : continue;
3384 : : /* We've found a dominated block, now see if it computes
3385 : : the busy expression and whether or not moving that
3386 : : expression to the "beginning" of that block is safe. */
3387 : 39192943 : if (!bitmap_bit_p (antloc[dominated->index], i))
3388 : 38235582 : continue;
3389 : :
3390 : 957361 : occr = find_occr_in_bb (expr->antic_occr, dominated);
3391 : 957361 : gcc_assert (occr);
3392 : :
3393 : : /* An occurrence might've been already deleted
3394 : : while processing a dominator of BB. */
3395 : 957361 : if (occr->deleted_p)
3396 : 270676 : continue;
3397 : 686685 : gcc_assert (NONDEBUG_INSN_P (occr->insn));
3398 : :
3399 : 686685 : max_distance = expr->max_distance;
3400 : 686685 : if (max_distance > 0)
3401 : : /* Adjust MAX_DISTANCE to account for the fact that
3402 : : OCCR won't have to travel all of DOMINATED, but
3403 : : only part of it. */
3404 : 1370870 : max_distance += (bb_size[dominated->index]
3405 : 685435 : - to_bb_head[INSN_UID (occr->insn)]);
3406 : :
3407 : 686685 : pressure_class = get_pressure_class_and_nregs (occr->insn,
3408 : : &nregs);
3409 : :
3410 : : /* Note if the expression should be hoisted from the dominated
3411 : : block to BB if it can reach DOMINATED unimpared.
3412 : :
3413 : : Keep track of how many times this expression is hoistable
3414 : : from a dominated block into BB. */
3415 : 686685 : if (should_hoist_expr_to_dom (bb, expr, dominated, NULL,
3416 : : max_distance, bb_size,
3417 : : pressure_class, &nregs,
3418 : : hoisted_bbs, occr->insn))
3419 : : {
3420 : 457382 : hoistable++;
3421 : 457382 : occrs_to_hoist.safe_push (occr);
3422 : 457382 : bitmap_set_bit (from_bbs, dominated->index);
3423 : : }
3424 : : }
3425 : :
3426 : : /* If we found more than one hoistable occurrence of this
3427 : : expression, then note it in the vector of expressions to
3428 : : hoist. It makes no sense to hoist things which are computed
3429 : : in only one BB, and doing so tends to pessimize register
3430 : : allocation. One could increase this value to try harder
3431 : : to avoid any possible code expansion due to register
3432 : : allocation issues; however experiments have shown that
3433 : : the vast majority of hoistable expressions are only movable
3434 : : from two successors, so raising this threshold is likely
3435 : : to nullify any benefit we get from code hoisting. */
3436 : 3621514 : if (hoistable > 1 && dbg_cnt (hoist_insn))
3437 : : {
3438 : : /* If (hoistable != vec::length), then there is
3439 : : an occurrence of EXPR in BB itself. Don't waste
3440 : : time looking for LCA in this case. */
3441 : 190378 : if ((unsigned) hoistable == occrs_to_hoist.length ())
3442 : : {
3443 : 83466 : basic_block lca;
3444 : :
3445 : 83466 : lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3446 : : from_bbs);
3447 : 83466 : if (lca != bb)
3448 : : /* Punt, it's better to hoist these occurrences to
3449 : : LCA. */
3450 : 82468 : occrs_to_hoist.release ();
3451 : : }
3452 : : }
3453 : : else
3454 : : /* Punt, no point hoisting a single occurrence. */
3455 : 3526325 : occrs_to_hoist.release ();
3456 : :
3457 : 3621514 : if (flag_ira_hoist_pressure
3458 : 3621514 : && !occrs_to_hoist.is_empty ())
3459 : : {
3460 : : /* Increase register pressure of basic blocks to which
3461 : : expr is hoisted because of extended live range of
3462 : : output. */
3463 : 12721 : data = BB_DATA (bb);
3464 : 12721 : data->max_reg_pressure[pressure_class] += nregs;
3465 : 284642 : EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3466 : : {
3467 : 271921 : data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
3468 : 271921 : data->max_reg_pressure[pressure_class] += nregs;
3469 : : }
3470 : : }
3471 : 3608793 : else if (flag_ira_hoist_pressure)
3472 : : {
3473 : : /* Restore register pressure and live_in info for basic
3474 : : blocks recorded in hoisted_bbs when expr will not be
3475 : : hoisted. */
3476 : 8934860 : EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3477 : : {
3478 : 5326067 : data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
3479 : 5326067 : bitmap_copy (data->live_in, data->backup);
3480 : 5326067 : data->max_reg_pressure[pressure_class]
3481 : 5326067 : = data->old_pressure;
3482 : : }
3483 : : }
3484 : :
3485 : 3621514 : if (flag_ira_hoist_pressure)
3486 : 3621514 : bitmap_clear (hoisted_bbs);
3487 : :
3488 : : insn_inserted_p = 0;
3489 : :
3490 : : /* Walk through occurrences of I'th expressions we want
3491 : : to hoist to BB and make the transformations. */
3492 : 3654739 : FOR_EACH_VEC_ELT (occrs_to_hoist, j, occr)
3493 : : {
3494 : 33225 : rtx_insn *insn;
3495 : 33225 : const_rtx set;
3496 : :
3497 : 33225 : gcc_assert (!occr->deleted_p);
3498 : :
3499 : 33225 : insn = occr->insn;
3500 : 33225 : set = single_set_gcse (insn);
3501 : :
3502 : : /* Create a pseudo-reg to store the result of reaching
3503 : : expressions into. Get the mode for the new pseudo
3504 : : from the mode of the original destination pseudo.
3505 : :
3506 : : It is important to use new pseudos whenever we
3507 : : emit a set. This will allow reload to use
3508 : : rematerialization for such registers. */
3509 : 33225 : if (!insn_inserted_p)
3510 : 12721 : expr->reaching_reg
3511 : 12721 : = gen_reg_rtx_and_attrs (SET_DEST (set));
3512 : :
3513 : 33225 : gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3514 : : insn);
3515 : 33225 : delete_insn (insn);
3516 : 33225 : occr->deleted_p = 1;
3517 : 33225 : changed = true;
3518 : 33225 : gcse_subst_count++;
3519 : :
3520 : 33225 : if (!insn_inserted_p)
3521 : : {
3522 : 12721 : insert_insn_end_basic_block (expr, bb);
3523 : 12721 : insn_inserted_p = 1;
3524 : : }
3525 : : }
3526 : :
3527 : 3621514 : occrs_to_hoist.release ();
3528 : 3621514 : bitmap_clear (from_bbs);
3529 : : }
3530 : : }
3531 : 311857 : }
3532 : :
3533 : 25187 : BITMAP_FREE (from_bbs);
3534 : 25187 : if (flag_ira_hoist_pressure)
3535 : 25187 : BITMAP_FREE (hoisted_bbs);
3536 : :
3537 : 25187 : free (bb_size);
3538 : 25187 : free (to_bb_head);
3539 : 25187 : free (index_map);
3540 : :
3541 : 25187 : return changed;
3542 : 25187 : }
3543 : :
3544 : : /* Return pressure class and number of needed hard registers (through
3545 : : *NREGS) of register REGNO. */
3546 : : static enum reg_class
3547 : 5659929 : get_regno_pressure_class (int regno, int *nregs)
3548 : : {
3549 : 5659929 : if (regno >= FIRST_PSEUDO_REGISTER)
3550 : : {
3551 : 3147456 : enum reg_class pressure_class;
3552 : :
3553 : 3147456 : pressure_class = reg_allocno_class (regno);
3554 : 3147456 : pressure_class = ira_pressure_class_translate[pressure_class];
3555 : 3147456 : *nregs
3556 : 3147456 : = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
3557 : 3147456 : return pressure_class;
3558 : : }
3559 : 2512473 : else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
3560 : 2512473 : && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
3561 : : {
3562 : 765768 : *nregs = 1;
3563 : 765768 : return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
3564 : : }
3565 : : else
3566 : : {
3567 : 1746705 : *nregs = 0;
3568 : 1746705 : return NO_REGS;
3569 : : }
3570 : : }
3571 : :
3572 : : /* Return pressure class and number of hard registers (through *NREGS)
3573 : : for destination of INSN. */
3574 : : static enum reg_class
3575 : 686685 : get_pressure_class_and_nregs (rtx_insn *insn, int *nregs)
3576 : : {
3577 : 686685 : rtx reg;
3578 : 686685 : enum reg_class pressure_class;
3579 : 686685 : const_rtx set = single_set_gcse (insn);
3580 : :
3581 : 686685 : reg = SET_DEST (set);
3582 : 686685 : if (GET_CODE (reg) == SUBREG)
3583 : 0 : reg = SUBREG_REG (reg);
3584 : 686685 : if (MEM_P (reg))
3585 : : {
3586 : 0 : *nregs = 0;
3587 : 0 : pressure_class = NO_REGS;
3588 : : }
3589 : : else
3590 : : {
3591 : 686685 : gcc_assert (REG_P (reg));
3592 : 686685 : pressure_class = reg_allocno_class (REGNO (reg));
3593 : 686685 : pressure_class = ira_pressure_class_translate[pressure_class];
3594 : 686685 : *nregs
3595 : 686685 : = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))];
3596 : : }
3597 : 686685 : return pressure_class;
3598 : : }
3599 : :
3600 : : /* Increase (if INCR_P) or decrease current register pressure for
3601 : : register REGNO. */
3602 : : static void
3603 : 5102817 : change_pressure (int regno, bool incr_p)
3604 : : {
3605 : 5102817 : int nregs;
3606 : 5102817 : enum reg_class pressure_class;
3607 : :
3608 : 5102817 : pressure_class = get_regno_pressure_class (regno, &nregs);
3609 : 5102817 : if (! incr_p)
3610 : 1235797 : curr_reg_pressure[pressure_class] -= nregs;
3611 : : else
3612 : : {
3613 : 3867020 : curr_reg_pressure[pressure_class] += nregs;
3614 : 3867020 : if (BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3615 : : < curr_reg_pressure[pressure_class])
3616 : 1763112 : BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3617 : 1763112 : = curr_reg_pressure[pressure_class];
3618 : : }
3619 : 5102817 : }
3620 : :
3621 : : /* Calculate register pressure for each basic block by walking insns
3622 : : from last to first. */
3623 : : static void
3624 : 28626 : calculate_bb_reg_pressure (void)
3625 : : {
3626 : 28626 : int i;
3627 : 28626 : unsigned int j;
3628 : 28626 : rtx_insn *insn;
3629 : 28626 : basic_block bb;
3630 : 28626 : bitmap curr_regs_live;
3631 : 28626 : bitmap_iterator bi;
3632 : :
3633 : :
3634 : 28626 : ira_setup_eliminable_regset ();
3635 : 28626 : curr_regs_live = BITMAP_ALLOC (®_obstack);
3636 : 356660 : FOR_EACH_BB_FN (bb, cfun)
3637 : : {
3638 : 328034 : curr_bb = bb;
3639 : 328034 : BB_DATA (bb)->live_in = BITMAP_ALLOC (NULL);
3640 : 328034 : BB_DATA (bb)->backup = BITMAP_ALLOC (NULL);
3641 : 328034 : bitmap_copy (BB_DATA (bb)->live_in, df_get_live_in (bb));
3642 : 328034 : bitmap_copy (curr_regs_live, df_get_live_out (bb));
3643 : 1970329 : for (i = 0; i < ira_pressure_classes_num; i++)
3644 : 1314261 : curr_reg_pressure[ira_pressure_classes[i]] = 0;
3645 : 3001973 : EXECUTE_IF_SET_IN_BITMAP (curr_regs_live, 0, j, bi)
3646 : 2673939 : change_pressure (j, true);
3647 : :
3648 : 2780198 : FOR_BB_INSNS_REVERSE (bb, insn)
3649 : : {
3650 : 2452164 : rtx dreg;
3651 : 2452164 : int regno;
3652 : 2452164 : df_ref def, use;
3653 : :
3654 : 2452164 : if (! NONDEBUG_INSN_P (insn))
3655 : 622330 : continue;
3656 : :
3657 : 15858726 : FOR_EACH_INSN_DEF (def, insn)
3658 : : {
3659 : 14028892 : dreg = DF_REF_REAL_REG (def);
3660 : 14028892 : gcc_assert (REG_P (dreg));
3661 : 14028892 : regno = REGNO (dreg);
3662 : 14028892 : if (!(DF_REF_FLAGS (def)
3663 : : & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
3664 : : {
3665 : 14026885 : if (bitmap_clear_bit (curr_regs_live, regno))
3666 : 1235797 : change_pressure (regno, false);
3667 : : }
3668 : : }
3669 : :
3670 : 3952986 : FOR_EACH_INSN_USE (use, insn)
3671 : : {
3672 : 2123152 : dreg = DF_REF_REAL_REG (use);
3673 : 2123152 : gcc_assert (REG_P (dreg));
3674 : 2123152 : regno = REGNO (dreg);
3675 : 2123152 : if (bitmap_set_bit (curr_regs_live, regno))
3676 : 1193081 : change_pressure (regno, true);
3677 : : }
3678 : : }
3679 : : }
3680 : 28626 : BITMAP_FREE (curr_regs_live);
3681 : :
3682 : 28626 : if (dump_file == NULL)
3683 : 28619 : return;
3684 : :
3685 : 7 : fprintf (dump_file, "\nRegister Pressure: \n");
3686 : 35 : FOR_EACH_BB_FN (bb, cfun)
3687 : : {
3688 : 28 : fprintf (dump_file, " Basic block %d: \n", bb->index);
3689 : 140 : for (i = 0; (int) i < ira_pressure_classes_num; i++)
3690 : : {
3691 : 112 : enum reg_class pressure_class;
3692 : :
3693 : 112 : pressure_class = ira_pressure_classes[i];
3694 : 112 : if (BB_DATA (bb)->max_reg_pressure[pressure_class] == 0)
3695 : 84 : continue;
3696 : :
3697 : 28 : fprintf (dump_file, " %s=%d\n", reg_class_names[pressure_class],
3698 : : BB_DATA (bb)->max_reg_pressure[pressure_class]);
3699 : : }
3700 : : }
3701 : 7 : fprintf (dump_file, "\n");
3702 : : }
3703 : :
3704 : : /* Top level routine to perform one code hoisting (aka unification) pass
3705 : :
3706 : : Return true if a change was made. */
3707 : :
3708 : : static bool
3709 : 61938 : one_code_hoisting_pass (void)
3710 : : {
3711 : 61938 : bool changed = false;
3712 : :
3713 : 61938 : gcse_subst_count = 0;
3714 : 61938 : gcse_create_count = 0;
3715 : :
3716 : : /* Return if there's nothing to do, or it is too expensive. */
3717 : 61938 : if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
3718 : 61938 : || gcse_or_cprop_is_too_expensive (_("GCSE disabled")))
3719 : 33312 : return false;
3720 : :
3721 : 28626 : doing_code_hoisting_p = true;
3722 : :
3723 : : /* Calculate register pressure for each basic block. */
3724 : 28626 : if (flag_ira_hoist_pressure)
3725 : : {
3726 : 28626 : regstat_init_n_sets_and_refs ();
3727 : 28626 : ira_set_pseudo_classes (false, dump_file);
3728 : 28626 : alloc_aux_for_blocks (sizeof (struct bb_data));
3729 : 28626 : calculate_bb_reg_pressure ();
3730 : 28626 : regstat_free_n_sets_and_refs ();
3731 : : }
3732 : :
3733 : : /* We need alias. */
3734 : 28626 : init_alias_analysis ();
3735 : :
3736 : 28626 : bytes_used = 0;
3737 : 28626 : gcc_obstack_init (&gcse_obstack);
3738 : 28626 : alloc_gcse_mem ();
3739 : :
3740 : 28626 : alloc_hash_table (&expr_hash_table);
3741 : 28626 : compute_hash_table (&expr_hash_table);
3742 : 28626 : if (dump_file)
3743 : 7 : dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3744 : :
3745 : 28626 : if (expr_hash_table.n_elems > 0)
3746 : : {
3747 : 25187 : alloc_code_hoist_mem (last_basic_block_for_fn (cfun),
3748 : : expr_hash_table.n_elems);
3749 : 25187 : compute_code_hoist_data ();
3750 : 25187 : changed = hoist_code ();
3751 : 25187 : free_code_hoist_mem ();
3752 : : }
3753 : :
3754 : 28626 : if (flag_ira_hoist_pressure)
3755 : : {
3756 : 28626 : free_aux_for_blocks ();
3757 : 28626 : free_reg_info ();
3758 : : }
3759 : 28626 : free_hash_table (&expr_hash_table);
3760 : 28626 : free_gcse_mem ();
3761 : 28626 : obstack_free (&gcse_obstack, NULL);
3762 : :
3763 : : /* We are finished with alias. */
3764 : 28626 : end_alias_analysis ();
3765 : :
3766 : 28626 : if (dump_file)
3767 : : {
3768 : 7 : fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3769 : 7 : current_function_name (), n_basic_blocks_for_fn (cfun),
3770 : : bytes_used);
3771 : 7 : fprintf (dump_file, "%d substs, %d insns created\n",
3772 : : gcse_subst_count, gcse_create_count);
3773 : : }
3774 : :
3775 : 28626 : doing_code_hoisting_p = false;
3776 : :
3777 : 28626 : return changed;
3778 : : }
3779 : :
3780 : : /* Here we provide the things required to do store motion towards the exit.
3781 : : In order for this to be effective, gcse also needed to be taught how to
3782 : : move a load when it is killed only by a store to itself.
3783 : :
3784 : : int i;
3785 : : float a[10];
3786 : :
3787 : : void foo(float scale)
3788 : : {
3789 : : for (i=0; i<10; i++)
3790 : : a[i] *= scale;
3791 : : }
3792 : :
3793 : : 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3794 : : the load out since its live around the loop, and stored at the bottom
3795 : : of the loop.
3796 : :
3797 : : The 'Load Motion' referred to and implemented in this file is
3798 : : an enhancement to gcse which when using edge based LCM, recognizes
3799 : : this situation and allows gcse to move the load out of the loop.
3800 : :
3801 : : Once gcse has hoisted the load, store motion can then push this
3802 : : load towards the exit, and we end up with no loads or stores of 'i'
3803 : : in the loop. */
3804 : :
3805 : : /* This will search the ldst list for a matching expression. If it
3806 : : doesn't find one, we create one and initialize it. */
3807 : :
3808 : : static struct ls_expr *
3809 : 14221022 : ldst_entry (rtx x)
3810 : : {
3811 : 14221022 : int do_not_record_p = 0;
3812 : 14221022 : struct ls_expr * ptr;
3813 : 14221022 : unsigned int hash;
3814 : 14221022 : ls_expr **slot;
3815 : 14221022 : struct ls_expr e;
3816 : :
3817 : 14221022 : hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3818 : : NULL, /*have_reg_qty=*/false);
3819 : :
3820 : 14221022 : e.pattern = x;
3821 : 14221022 : slot = pre_ldst_table->find_slot_with_hash (&e, hash, INSERT);
3822 : 14221022 : if (*slot)
3823 : : return *slot;
3824 : :
3825 : 9388752 : ptr = XNEW (struct ls_expr);
3826 : :
3827 : 9388752 : ptr->next = pre_ldst_mems;
3828 : 9388752 : ptr->expr = NULL;
3829 : 9388752 : ptr->pattern = x;
3830 : 9388752 : ptr->pattern_regs = NULL_RTX;
3831 : 9388752 : ptr->stores.create (0);
3832 : 9388752 : ptr->reaching_reg = NULL_RTX;
3833 : 9388752 : ptr->invalid = 0;
3834 : 9388752 : ptr->index = 0;
3835 : 9388752 : ptr->hash_index = hash;
3836 : 9388752 : pre_ldst_mems = ptr;
3837 : 9388752 : *slot = ptr;
3838 : :
3839 : 9388752 : return ptr;
3840 : : }
3841 : :
3842 : : /* Free up an individual ldst entry. */
3843 : :
3844 : : static void
3845 : 9388752 : free_ldst_entry (struct ls_expr * ptr)
3846 : : {
3847 : 0 : ptr->stores.release ();
3848 : :
3849 : 9388752 : free (ptr);
3850 : 3111014 : }
3851 : :
3852 : : /* Free up all memory associated with the ldst list. */
3853 : :
3854 : : static void
3855 : 474938 : free_ld_motion_mems (void)
3856 : : {
3857 : 474938 : delete pre_ldst_table;
3858 : 474938 : pre_ldst_table = NULL;
3859 : :
3860 : 3585952 : while (pre_ldst_mems)
3861 : : {
3862 : 3111014 : struct ls_expr * tmp = pre_ldst_mems;
3863 : :
3864 : 3111014 : pre_ldst_mems = pre_ldst_mems->next;
3865 : :
3866 : 3111014 : free_ldst_entry (tmp);
3867 : : }
3868 : :
3869 : 474938 : pre_ldst_mems = NULL;
3870 : 474938 : }
3871 : :
3872 : : /* Dump debugging info about the ldst list. */
3873 : :
3874 : : static void
3875 : 16 : print_ldst_list (FILE * file)
3876 : : {
3877 : 16 : struct ls_expr * ptr;
3878 : :
3879 : 16 : fprintf (file, "LDST list: \n");
3880 : :
3881 : 56 : for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3882 : : {
3883 : 40 : fprintf (file, " Pattern (%3d): ", ptr->index);
3884 : :
3885 : 40 : print_rtl (file, ptr->pattern);
3886 : :
3887 : 40 : fprintf (file, "\n Stores : ");
3888 : 40 : print_rtx_insn_vec (file, ptr->stores);
3889 : :
3890 : 40 : fprintf (file, "\n\n");
3891 : : }
3892 : :
3893 : 16 : fprintf (file, "\n");
3894 : 16 : }
3895 : :
3896 : : /* Returns 1 if X is in the list of ldst only expressions. */
3897 : :
3898 : : static struct ls_expr *
3899 : 667982 : find_rtx_in_ldst (rtx x)
3900 : : {
3901 : 667982 : struct ls_expr e;
3902 : 667982 : ls_expr **slot;
3903 : 667982 : if (!pre_ldst_table)
3904 : : return NULL;
3905 : 667982 : e.pattern = x;
3906 : 667982 : slot = pre_ldst_table->find_slot (&e, NO_INSERT);
3907 : 667982 : if (!slot || (*slot)->invalid)
3908 : : return NULL;
3909 : : return *slot;
3910 : : }
3911 : :
3912 : : /* Load Motion for loads which only kill themselves. */
3913 : :
3914 : : /* Return true if x, a MEM, is a simple access with no side effects.
3915 : : These are the types of loads we consider for the ld_motion list,
3916 : : otherwise we let the usual aliasing take care of it. */
3917 : :
3918 : : static bool
3919 : 18857283 : simple_mem (const_rtx x)
3920 : : {
3921 : 18857283 : if (MEM_VOLATILE_P (x))
3922 : : return false;
3923 : :
3924 : 17995036 : if (GET_MODE (x) == BLKmode)
3925 : : return false;
3926 : :
3927 : : /* If we are handling exceptions, we must be careful with memory references
3928 : : that may trap. If we are not, the behavior is undefined, so we may just
3929 : : continue. */
3930 : 17936206 : if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3931 : : return false;
3932 : :
3933 : 15883847 : if (side_effects_p (x))
3934 : : return false;
3935 : :
3936 : : /* Do not consider function arguments passed on stack. */
3937 : 14460433 : if (reg_mentioned_p (stack_pointer_rtx, x))
3938 : : return false;
3939 : :
3940 : 14223005 : if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3941 : : return false;
3942 : :
3943 : : return true;
3944 : : }
3945 : :
3946 : : /* Make sure there isn't a buried reference in this pattern anywhere.
3947 : : If there is, invalidate the entry for it since we're not capable
3948 : : of fixing it up just yet.. We have to be sure we know about ALL
3949 : : loads since the aliasing code will allow all entries in the
3950 : : ld_motion list to not-alias itself. If we miss a load, we will get
3951 : : the wrong value since gcse might common it and we won't know to
3952 : : fix it up. */
3953 : :
3954 : : static void
3955 : 164879323 : invalidate_any_buried_refs (rtx x)
3956 : : {
3957 : 164879323 : const char * fmt;
3958 : 164879323 : int i, j;
3959 : 164879323 : struct ls_expr * ptr;
3960 : :
3961 : : /* Invalidate it in the list. */
3962 : 164879323 : if (MEM_P (x) && simple_mem (x))
3963 : : {
3964 : 5001714 : ptr = ldst_entry (x);
3965 : 5001714 : ptr->invalid = 1;
3966 : : }
3967 : :
3968 : : /* Recursively process the insn. */
3969 : 164879323 : fmt = GET_RTX_FORMAT (GET_CODE (x));
3970 : :
3971 : 385873231 : for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3972 : : {
3973 : 220993908 : if (fmt[i] == 'e')
3974 : 99362000 : invalidate_any_buried_refs (XEXP (x, i));
3975 : 121631908 : else if (fmt[i] == 'E')
3976 : 29231817 : for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3977 : 19849874 : invalidate_any_buried_refs (XVECEXP (x, i, j));
3978 : : }
3979 : 164879323 : }
3980 : :
3981 : : /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
3982 : : being defined as MEM loads and stores to symbols, with no side effects
3983 : : and no registers in the expression. For a MEM destination, we also
3984 : : check that the insn is still valid if we replace the destination with a
3985 : : REG, as is done in update_ld_motion_stores. If there are any uses/defs
3986 : : which don't match this criteria, they are invalidated and trimmed out
3987 : : later. */
3988 : :
3989 : : static void
3990 : 474938 : compute_ld_motion_mems (void)
3991 : : {
3992 : 474938 : struct ls_expr * ptr;
3993 : 474938 : basic_block bb;
3994 : 474938 : rtx_insn *insn;
3995 : :
3996 : 474938 : pre_ldst_mems = NULL;
3997 : 474938 : pre_ldst_table = new hash_table<pre_ldst_expr_hasher> (13);
3998 : :
3999 : 9883476 : FOR_EACH_BB_FN (bb, cfun)
4000 : : {
4001 : 117364145 : FOR_BB_INSNS (bb, insn)
4002 : : {
4003 : 107955607 : if (NONDEBUG_INSN_P (insn))
4004 : : {
4005 : 47814875 : if (GET_CODE (PATTERN (insn)) == SET)
4006 : : {
4007 : 37477214 : rtx src = SET_SRC (PATTERN (insn));
4008 : 37477214 : rtx dest = SET_DEST (PATTERN (insn));
4009 : :
4010 : : /* Check for a simple load. */
4011 : 37477214 : if (MEM_P (src) && simple_mem (src))
4012 : : {
4013 : 4769286 : ptr = ldst_entry (src);
4014 : 4769286 : if (!REG_P (dest))
4015 : 33116 : ptr->invalid = 1;
4016 : : }
4017 : : else
4018 : : {
4019 : : /* Make sure there isn't a buried load somewhere. */
4020 : 32707928 : invalidate_any_buried_refs (src);
4021 : : }
4022 : :
4023 : : /* Check for a simple load through a REG_EQUAL note. */
4024 : 37477214 : rtx note = find_reg_equal_equiv_note (insn), src_eq;
4025 : 37477214 : if (note
4026 : 2221527 : && REG_NOTE_KIND (note) == REG_EQUAL
4027 : 2082163 : && (src_eq = XEXP (note, 0))
4028 : 39559377 : && !(MEM_P (src_eq) && simple_mem (src_eq)))
4029 : 2082162 : invalidate_any_buried_refs (src_eq);
4030 : :
4031 : : /* Check for stores. Don't worry about aliased ones, they
4032 : : will block any movement we might do later. We only care
4033 : : about this exact pattern since those are the only
4034 : : circumstance that we will ignore the aliasing info. */
4035 : 37477214 : if (MEM_P (dest) && simple_mem (dest))
4036 : : {
4037 : 4450022 : ptr = ldst_entry (dest);
4038 : 4450022 : machine_mode src_mode = GET_MODE (src);
4039 : 4450022 : if (! MEM_P (src)
4040 : 4450022 : && GET_CODE (src) != ASM_OPERANDS
4041 : : /* Check for REG manually since want_to_gcse_p
4042 : : returns 0 for all REGs. */
4043 : 8900044 : && can_assign_to_reg_without_clobbers_p (src,
4044 : : src_mode))
4045 : 4441765 : ptr->stores.safe_push (insn);
4046 : : else
4047 : 8257 : ptr->invalid = 1;
4048 : : }
4049 : : }
4050 : : else
4051 : : {
4052 : : /* Invalidate all MEMs in the pattern and... */
4053 : 10337661 : invalidate_any_buried_refs (PATTERN (insn));
4054 : :
4055 : : /* ...in REG_EQUAL notes for PARALLELs with single SET. */
4056 : 10337661 : rtx note = find_reg_equal_equiv_note (insn), src_eq;
4057 : 10337661 : if (note
4058 : 539698 : && REG_NOTE_KIND (note) == REG_EQUAL
4059 : 10877359 : && (src_eq = XEXP (note, 0)))
4060 : 539698 : invalidate_any_buried_refs (src_eq);
4061 : : }
4062 : : }
4063 : : }
4064 : : }
4065 : 474938 : }
4066 : :
4067 : : /* Remove any references that have been either invalidated or are not in the
4068 : : expression list for pre gcse. */
4069 : :
4070 : : static void
4071 : 474938 : trim_ld_motion_mems (void)
4072 : : {
4073 : 474938 : struct ls_expr * * last = & pre_ldst_mems;
4074 : 474938 : struct ls_expr * ptr = pre_ldst_mems;
4075 : :
4076 : 9863690 : while (ptr != NULL)
4077 : : {
4078 : 9388752 : struct gcse_expr * expr;
4079 : :
4080 : : /* Delete if entry has been made invalid. */
4081 : 9388752 : if (! ptr->invalid)
4082 : : {
4083 : : /* Delete if we cannot find this mem in the expression list. */
4084 : 6660798 : unsigned int hash = ptr->hash_index % expr_hash_table.size;
4085 : :
4086 : 6660798 : for (expr = expr_hash_table.table[hash];
4087 : 10144294 : expr != NULL;
4088 : 3483496 : expr = expr->next_same_hash)
4089 : 6594510 : if (expr_equiv_p (expr->expr, ptr->pattern))
4090 : : break;
4091 : : }
4092 : : else
4093 : : expr = (struct gcse_expr *) 0;
4094 : :
4095 : 6660798 : if (expr)
4096 : : {
4097 : : /* Set the expression field if we are keeping it. */
4098 : 3111014 : ptr->expr = expr;
4099 : 3111014 : last = & ptr->next;
4100 : 3111014 : ptr = ptr->next;
4101 : : }
4102 : : else
4103 : : {
4104 : 6277738 : *last = ptr->next;
4105 : 6277738 : pre_ldst_table->remove_elt_with_hash (ptr, ptr->hash_index);
4106 : 6277738 : free_ldst_entry (ptr);
4107 : 6277738 : ptr = * last;
4108 : : }
4109 : : }
4110 : :
4111 : : /* Show the world what we've found. */
4112 : 474938 : if (dump_file && pre_ldst_mems != NULL)
4113 : 16 : print_ldst_list (dump_file);
4114 : 474938 : }
4115 : :
4116 : : /* This routine will take an expression which we are replacing with
4117 : : a reaching register, and update any stores that are needed if
4118 : : that expression is in the ld_motion list. Stores are updated by
4119 : : copying their SRC to the reaching register, and then storing
4120 : : the reaching register into the store location. These keeps the
4121 : : correct value in the reaching register for the loads. */
4122 : :
4123 : : static void
4124 : 559923 : update_ld_motion_stores (struct gcse_expr * expr)
4125 : : {
4126 : 559923 : struct ls_expr * mem_ptr;
4127 : :
4128 : 559923 : if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
4129 : : {
4130 : : /* We can try to find just the REACHED stores, but is shouldn't
4131 : : matter to set the reaching reg everywhere... some might be
4132 : : dead and should be eliminated later. */
4133 : :
4134 : : /* We replace (set mem expr) with (set reg expr) (set mem reg)
4135 : : where reg is the reaching reg used in the load. We checked in
4136 : : compute_ld_motion_mems that we can replace (set mem expr) with
4137 : : (set reg expr) in that insn. */
4138 : 113614 : rtx_insn *insn;
4139 : 113614 : unsigned int i;
4140 : 202345 : FOR_EACH_VEC_ELT_REVERSE (mem_ptr->stores, i, insn)
4141 : : {
4142 : 68978 : rtx pat = PATTERN (insn);
4143 : 68978 : rtx src = SET_SRC (pat);
4144 : 68978 : rtx reg = expr->reaching_reg;
4145 : :
4146 : : /* If we've already copied it, continue. */
4147 : 68978 : if (expr->reaching_reg == src)
4148 : 59429 : continue;
4149 : :
4150 : 9549 : if (dump_file)
4151 : : {
4152 : 0 : fprintf (dump_file, "PRE: store updated with reaching reg ");
4153 : 0 : print_rtl (dump_file, reg);
4154 : 0 : fprintf (dump_file, ":\n ");
4155 : 0 : print_inline_rtx (dump_file, insn, 8);
4156 : 0 : fprintf (dump_file, "\n");
4157 : : }
4158 : :
4159 : 9549 : rtx_insn *copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
4160 : 9549 : emit_insn_before (copy, insn);
4161 : 9549 : SET_SRC (pat) = reg;
4162 : 9549 : df_insn_rescan (insn);
4163 : :
4164 : : /* un-recognize this pattern since it's probably different now. */
4165 : 9549 : INSN_CODE (insn) = -1;
4166 : 9549 : gcse_create_count++;
4167 : : }
4168 : : }
4169 : 559923 : }
4170 : :
4171 : : /* Return true if the graph is too expensive to optimize. PASS is the
4172 : : optimization about to be performed. */
4173 : :
4174 : : bool
4175 : 2091523 : gcse_or_cprop_is_too_expensive (const char *pass)
4176 : : {
4177 : 2091523 : unsigned HOST_WIDE_INT memory_request
4178 : 2091523 : = ((unsigned HOST_WIDE_INT)n_basic_blocks_for_fn (cfun)
4179 : 2091523 : * SBITMAP_SET_SIZE (max_reg_num ()) * sizeof (SBITMAP_ELT_TYPE));
4180 : :
4181 : : /* Trying to perform global optimizations on flow graphs which have
4182 : : a high connectivity will take a long time and is unlikely to be
4183 : : particularly useful.
4184 : :
4185 : : In normal circumstances a cfg should have about twice as many
4186 : : edges as blocks. But we do not want to punish small functions
4187 : : which have a couple switch statements. Rather than simply
4188 : : threshold the number of blocks, uses something with a more
4189 : : graceful degradation. */
4190 : 2091523 : if (n_edges_for_fn (cfun) > 20000 + n_basic_blocks_for_fn (cfun) * 4)
4191 : : {
4192 : 0 : warning (OPT_Wdisabled_optimization,
4193 : : "%s: %d basic blocks and %d edges/basic block",
4194 : : pass, n_basic_blocks_for_fn (cfun),
4195 : : n_edges_for_fn (cfun) / n_basic_blocks_for_fn (cfun));
4196 : :
4197 : 0 : return true;
4198 : : }
4199 : :
4200 : : /* If allocating memory for the dataflow bitmaps would take up too much
4201 : : storage it's better just to disable the optimization. */
4202 : 2091523 : if (memory_request / 1024 > (unsigned HOST_WIDE_INT)param_max_gcse_memory)
4203 : : {
4204 : 0 : warning (OPT_Wdisabled_optimization,
4205 : : "%s: %d basic blocks and %d registers; "
4206 : : "increase %<--param max-gcse-memory%> above %wu",
4207 : 0 : pass, n_basic_blocks_for_fn (cfun), max_reg_num (),
4208 : : memory_request / 1024);
4209 : :
4210 : 0 : return true;
4211 : : }
4212 : :
4213 : : return false;
4214 : : }
4215 : :
4216 : : static unsigned int
4217 : 885252 : execute_rtl_pre (void)
4218 : : {
4219 : 885252 : int changed;
4220 : 885252 : delete_unreachable_blocks ();
4221 : 885252 : df_analyze ();
4222 : 885252 : changed = one_pre_gcse_pass ();
4223 : 885252 : flag_rerun_cse_after_global_opts |= changed;
4224 : 885252 : if (changed)
4225 : 118488 : cleanup_cfg (0);
4226 : 885252 : return 0;
4227 : : }
4228 : :
4229 : : static unsigned int
4230 : 0 : execute_hardreg_pre (void)
4231 : : {
4232 : : #ifdef HARDREG_PRE_REGNOS
4233 : : doing_hardreg_pre_p = true;
4234 : : unsigned int regnos[] = HARDREG_PRE_REGNOS;
4235 : : /* It's possible to avoid this loop, but it isn't worth doing so until
4236 : : hardreg PRE is used for multiple hardregs. */
4237 : : for (int i = 0; regnos[i] != 0; i++)
4238 : : {
4239 : : int changed;
4240 : : current_hardreg_regno = regnos[i];
4241 : : if (dump_file)
4242 : : fprintf(dump_file, "Entering hardreg PRE for regno %d\n",
4243 : : current_hardreg_regno);
4244 : : delete_unreachable_blocks ();
4245 : : df_analyze ();
4246 : : changed = one_pre_gcse_pass ();
4247 : : if (changed)
4248 : : cleanup_cfg (0);
4249 : : }
4250 : : doing_hardreg_pre_p = false;
4251 : : #endif
4252 : 0 : return 0;
4253 : : }
4254 : :
4255 : : static unsigned int
4256 : 61938 : execute_rtl_hoist (void)
4257 : : {
4258 : 61938 : int changed;
4259 : 61938 : delete_unreachable_blocks ();
4260 : 61938 : df_analyze ();
4261 : 61938 : changed = one_code_hoisting_pass ();
4262 : 61938 : flag_rerun_cse_after_global_opts |= changed;
4263 : 61938 : if (changed)
4264 : 2914 : cleanup_cfg (0);
4265 : 61938 : return 0;
4266 : : }
4267 : :
4268 : : namespace {
4269 : :
4270 : : const pass_data pass_data_rtl_pre =
4271 : : {
4272 : : RTL_PASS, /* type */
4273 : : "rtl pre", /* name */
4274 : : OPTGROUP_NONE, /* optinfo_flags */
4275 : : TV_PRE, /* tv_id */
4276 : : PROP_cfglayout, /* properties_required */
4277 : : 0, /* properties_provided */
4278 : : 0, /* properties_destroyed */
4279 : : 0, /* todo_flags_start */
4280 : : TODO_df_finish, /* todo_flags_finish */
4281 : : };
4282 : :
4283 : : class pass_rtl_pre : public rtl_opt_pass
4284 : : {
4285 : : public:
4286 : 285081 : pass_rtl_pre (gcc::context *ctxt)
4287 : 570162 : : rtl_opt_pass (pass_data_rtl_pre, ctxt)
4288 : : {}
4289 : :
4290 : : /* opt_pass methods: */
4291 : : bool gate (function *) final override;
4292 : 885252 : unsigned int execute (function *) final override
4293 : : {
4294 : 885252 : return execute_rtl_pre ();
4295 : : }
4296 : :
4297 : : }; // class pass_rtl_pre
4298 : :
4299 : : /* We do not construct an accurate cfg in functions which call
4300 : : setjmp, so none of these passes runs if the function calls
4301 : : setjmp.
4302 : : FIXME: Should just handle setjmp via REG_SETJMP notes. */
4303 : :
4304 : : bool
4305 : 1449863 : pass_rtl_pre::gate (function *fun)
4306 : : {
4307 : 1023805 : return optimize > 0 && flag_gcse
4308 : 947858 : && !fun->calls_setjmp
4309 : 947191 : && optimize_function_for_speed_p (fun)
4310 : 2335116 : && dbg_cnt (pre);
4311 : : }
4312 : :
4313 : : } // anon namespace
4314 : :
4315 : : rtl_opt_pass *
4316 : 285081 : make_pass_rtl_pre (gcc::context *ctxt)
4317 : : {
4318 : 285081 : return new pass_rtl_pre (ctxt);
4319 : : }
4320 : :
4321 : : namespace {
4322 : :
4323 : : const pass_data pass_data_hardreg_pre =
4324 : : {
4325 : : RTL_PASS, /* type */
4326 : : "hardreg_pre", /* name */
4327 : : OPTGROUP_NONE, /* optinfo_flags */
4328 : : TV_PRE, /* tv_id */
4329 : : PROP_cfglayout, /* properties_required */
4330 : : 0, /* properties_provided */
4331 : : 0, /* properties_destroyed */
4332 : : 0, /* todo_flags_start */
4333 : : TODO_df_finish, /* todo_flags_finish */
4334 : : };
4335 : :
4336 : : class pass_hardreg_pre : public rtl_opt_pass
4337 : : {
4338 : : public:
4339 : 285081 : pass_hardreg_pre (gcc::context *ctxt)
4340 : 570162 : : rtl_opt_pass (pass_data_hardreg_pre, ctxt)
4341 : : {}
4342 : :
4343 : : /* opt_pass methods: */
4344 : : bool gate (function *) final override;
4345 : 0 : unsigned int execute (function *) final override
4346 : : {
4347 : 0 : return execute_hardreg_pre ();
4348 : : }
4349 : :
4350 : : }; // class pass_rtl_pre
4351 : :
4352 : : bool
4353 : 1449863 : pass_hardreg_pre::gate (function * ARG_UNUSED (fun))
4354 : : {
4355 : : #ifdef HARDREG_PRE_REGNOS
4356 : : return optimize > 0
4357 : : && !fun->calls_setjmp;
4358 : : #else
4359 : 1449863 : return false;
4360 : : #endif
4361 : : }
4362 : :
4363 : : } // anon namespace
4364 : :
4365 : : rtl_opt_pass *
4366 : 285081 : make_pass_hardreg_pre (gcc::context *ctxt)
4367 : : {
4368 : 285081 : return new pass_hardreg_pre (ctxt);
4369 : : }
4370 : :
4371 : : namespace {
4372 : :
4373 : : const pass_data pass_data_rtl_hoist =
4374 : : {
4375 : : RTL_PASS, /* type */
4376 : : "hoist", /* name */
4377 : : OPTGROUP_NONE, /* optinfo_flags */
4378 : : TV_HOIST, /* tv_id */
4379 : : PROP_cfglayout, /* properties_required */
4380 : : 0, /* properties_provided */
4381 : : 0, /* properties_destroyed */
4382 : : 0, /* todo_flags_start */
4383 : : TODO_df_finish, /* todo_flags_finish */
4384 : : };
4385 : :
4386 : : class pass_rtl_hoist : public rtl_opt_pass
4387 : : {
4388 : : public:
4389 : 285081 : pass_rtl_hoist (gcc::context *ctxt)
4390 : 570162 : : rtl_opt_pass (pass_data_rtl_hoist, ctxt)
4391 : : {}
4392 : :
4393 : : /* opt_pass methods: */
4394 : : bool gate (function *) final override;
4395 : 61938 : unsigned int execute (function *) final override
4396 : : {
4397 : 61938 : return execute_rtl_hoist ();
4398 : : }
4399 : :
4400 : : }; // class pass_rtl_hoist
4401 : :
4402 : : bool
4403 : 1449863 : pass_rtl_hoist::gate (function *)
4404 : : {
4405 : 1023805 : return optimize > 0 && flag_gcse
4406 : 947858 : && !cfun->calls_setjmp
4407 : : /* It does not make sense to run code hoisting unless we are optimizing
4408 : : for code size -- it rarely makes programs faster, and can make then
4409 : : bigger if we did PRE (when optimizing for space, we don't run PRE). */
4410 : 947191 : && optimize_function_for_size_p (cfun)
4411 : 1511801 : && dbg_cnt (hoist);
4412 : : }
4413 : :
4414 : : } // anon namespace
4415 : :
4416 : : rtl_opt_pass *
4417 : 285081 : make_pass_rtl_hoist (gcc::context *ctxt)
4418 : : {
4419 : 285081 : return new pass_rtl_hoist (ctxt);
4420 : : }
4421 : :
4422 : : /* Reset all state within gcse.cc so that we can rerun the compiler
4423 : : within the same process. For use by toplev::finalize. */
4424 : :
4425 : : void
4426 : 256374 : gcse_cc_finalize (void)
4427 : : {
4428 : 256374 : test_insn = NULL;
4429 : 256374 : }
4430 : :
4431 : : #include "gt-gcse.h"
|