Branch data Line data Source code
1 : : /* Partial redundancy elimination / Hoisting for RTL.
2 : : Copyright (C) 1997-2024 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it under
7 : : the terms of the GNU General Public License as published by the Free
8 : : Software Foundation; either version 3, or (at your option) any later
9 : : version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : /* TODO
21 : : - reordering of memory allocation and freeing to be more space efficient
22 : : - calc rough register pressure information and use the info to drive all
23 : : kinds of code motion (including code hoisting) in a unified way.
24 : : */
25 : :
26 : : /* References searched while implementing this.
27 : :
28 : : Compilers Principles, Techniques and Tools
29 : : Aho, Sethi, Ullman
30 : : Addison-Wesley, 1988
31 : :
32 : : Global Optimization by Suppression of Partial Redundancies
33 : : E. Morel, C. Renvoise
34 : : communications of the acm, Vol. 22, Num. 2, Feb. 1979
35 : :
36 : : A Portable Machine-Independent Global Optimizer - Design and Measurements
37 : : Frederick Chow
38 : : Stanford Ph.D. thesis, Dec. 1983
39 : :
40 : : A Fast Algorithm for Code Movement Optimization
41 : : D.M. Dhamdhere
42 : : SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
43 : :
44 : : A Solution to a Problem with Morel and Renvoise's
45 : : Global Optimization by Suppression of Partial Redundancies
46 : : K-H Drechsler, M.P. Stadel
47 : : ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
48 : :
49 : : Practical Adaptation of the Global Optimization
50 : : Algorithm of Morel and Renvoise
51 : : D.M. Dhamdhere
52 : : ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
53 : :
54 : : Efficiently Computing Static Single Assignment Form and the Control
55 : : Dependence Graph
56 : : R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
57 : : ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
58 : :
59 : : Lazy Code Motion
60 : : J. Knoop, O. Ruthing, B. Steffen
61 : : ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
62 : :
63 : : What's In a Region? Or Computing Control Dependence Regions in Near-Linear
64 : : Time for Reducible Flow Control
65 : : Thomas Ball
66 : : ACM Letters on Programming Languages and Systems,
67 : : Vol. 2, Num. 1-4, Mar-Dec 1993
68 : :
69 : : An Efficient Representation for Sparse Sets
70 : : Preston Briggs, Linda Torczon
71 : : ACM Letters on Programming Languages and Systems,
72 : : Vol. 2, Num. 1-4, Mar-Dec 1993
73 : :
74 : : A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
75 : : K-H Drechsler, M.P. Stadel
76 : : ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
77 : :
78 : : Partial Dead Code Elimination
79 : : J. Knoop, O. Ruthing, B. Steffen
80 : : ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
81 : :
82 : : Effective Partial Redundancy Elimination
83 : : P. Briggs, K.D. Cooper
84 : : ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
85 : :
86 : : The Program Structure Tree: Computing Control Regions in Linear Time
87 : : R. Johnson, D. Pearson, K. Pingali
88 : : ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
89 : :
90 : : Optimal Code Motion: Theory and Practice
91 : : J. Knoop, O. Ruthing, B. Steffen
92 : : ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
93 : :
94 : : The power of assignment motion
95 : : J. Knoop, O. Ruthing, B. Steffen
96 : : ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
97 : :
98 : : Global code motion / global value numbering
99 : : C. Click
100 : : ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
101 : :
102 : : Value Driven Redundancy Elimination
103 : : L.T. Simpson
104 : : Rice University Ph.D. thesis, Apr. 1996
105 : :
106 : : Value Numbering
107 : : L.T. Simpson
108 : : Massively Scalar Compiler Project, Rice University, Sep. 1996
109 : :
110 : : High Performance Compilers for Parallel Computing
111 : : Michael Wolfe
112 : : Addison-Wesley, 1996
113 : :
114 : : Advanced Compiler Design and Implementation
115 : : Steven Muchnick
116 : : Morgan Kaufmann, 1997
117 : :
118 : : Building an Optimizing Compiler
119 : : Robert Morgan
120 : : Digital Press, 1998
121 : :
122 : : People wishing to speed up the code here should read:
123 : : Elimination Algorithms for Data Flow Analysis
124 : : B.G. Ryder, M.C. Paull
125 : : ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
126 : :
127 : : How to Analyze Large Programs Efficiently and Informatively
128 : : D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
129 : : ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
130 : :
131 : : People wishing to do something different can find various possibilities
132 : : in the above papers and elsewhere.
133 : : */
134 : :
135 : : #include "config.h"
136 : : #include "system.h"
137 : : #include "coretypes.h"
138 : : #include "backend.h"
139 : : #include "target.h"
140 : : #include "rtl.h"
141 : : #include "tree.h"
142 : : #include "predict.h"
143 : : #include "df.h"
144 : : #include "memmodel.h"
145 : : #include "tm_p.h"
146 : : #include "insn-config.h"
147 : : #include "print-rtl.h"
148 : : #include "regs.h"
149 : : #include "ira.h"
150 : : #include "recog.h"
151 : : #include "diagnostic-core.h"
152 : : #include "cfgrtl.h"
153 : : #include "cfganal.h"
154 : : #include "lcm.h"
155 : : #include "cfgcleanup.h"
156 : : #include "expr.h"
157 : : #include "intl.h"
158 : : #include "tree-pass.h"
159 : : #include "dbgcnt.h"
160 : : #include "gcse.h"
161 : : #include "gcse-common.h"
162 : : #include "function-abi.h"
163 : :
164 : : /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
165 : : are a superset of those done by classic GCSE.
166 : :
167 : : Two passes of copy/constant propagation are done around PRE or hoisting
168 : : because the first one enables more GCSE and the second one helps to clean
169 : : up the copies that PRE and HOIST create. This is needed more for PRE than
170 : : for HOIST because code hoisting will try to use an existing register
171 : : containing the common subexpression rather than create a new one. This is
172 : : harder to do for PRE because of the code motion (which HOIST doesn't do).
173 : :
174 : : Expressions we are interested in GCSE-ing are of the form
175 : : (set (pseudo-reg) (expression)).
176 : : Function want_to_gcse_p says what these are.
177 : :
178 : : In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
179 : : This allows PRE to hoist expressions that are expressed in multiple insns,
180 : : such as complex address calculations (e.g. for PIC code, or loads with a
181 : : high part and a low part).
182 : :
183 : : PRE handles moving invariant expressions out of loops (by treating them as
184 : : partially redundant).
185 : :
186 : : **********************
187 : :
188 : : We used to support multiple passes but there are diminishing returns in
189 : : doing so. The first pass usually makes 90% of the changes that are doable.
190 : : A second pass can make a few more changes made possible by the first pass.
191 : : Experiments show any further passes don't make enough changes to justify
192 : : the expense.
193 : :
194 : : A study of spec92 using an unlimited number of passes:
195 : : [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
196 : : [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
197 : : [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
198 : :
199 : : It was found doing copy propagation between each pass enables further
200 : : substitutions.
201 : :
202 : : This study was done before expressions in REG_EQUAL notes were added as
203 : : candidate expressions for optimization, and before the GIMPLE optimizers
204 : : were added. Probably, multiple passes is even less efficient now than
205 : : at the time when the study was conducted.
206 : :
207 : : PRE is quite expensive in complicated functions because the DFA can take
208 : : a while to converge. Hence we only perform one pass.
209 : :
210 : : **********************
211 : :
212 : : The steps for PRE are:
213 : :
214 : : 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
215 : :
216 : : 2) Perform the data flow analysis for PRE.
217 : :
218 : : 3) Delete the redundant instructions
219 : :
220 : : 4) Insert the required copies [if any] that make the partially
221 : : redundant instructions fully redundant.
222 : :
223 : : 5) For other reaching expressions, insert an instruction to copy the value
224 : : to a newly created pseudo that will reach the redundant instruction.
225 : :
226 : : The deletion is done first so that when we do insertions we
227 : : know which pseudo reg to use.
228 : :
229 : : Various papers have argued that PRE DFA is expensive (O(n^2)) and others
230 : : argue it is not. The number of iterations for the algorithm to converge
231 : : is typically 2-4 so I don't view it as that expensive (relatively speaking).
232 : :
233 : : PRE GCSE depends heavily on the second CPROP pass to clean up the copies
234 : : we create. To make an expression reach the place where it's redundant,
235 : : the result of the expression is copied to a new register, and the redundant
236 : : expression is deleted by replacing it with this new register. Classic GCSE
237 : : doesn't have this problem as much as it computes the reaching defs of
238 : : each register in each block and thus can try to use an existing
239 : : register. */
240 : :
241 : : /* GCSE global vars. */
242 : :
243 : : struct target_gcse default_target_gcse;
244 : : #if SWITCHABLE_TARGET
245 : : struct target_gcse *this_target_gcse = &default_target_gcse;
246 : : #endif
247 : :
248 : : /* Set to non-zero if CSE should run after all GCSE optimizations are done. */
249 : : int flag_rerun_cse_after_global_opts;
250 : :
251 : : /* An obstack for our working variables. */
252 : : static struct obstack gcse_obstack;
253 : :
254 : : /* Hash table of expressions. */
255 : :
256 : : struct gcse_expr
257 : : {
258 : : /* The expression. */
259 : : rtx expr;
260 : : /* Index in the available expression bitmaps. */
261 : : int bitmap_index;
262 : : /* Next entry with the same hash. */
263 : : struct gcse_expr *next_same_hash;
264 : : /* List of anticipatable occurrences in basic blocks in the function.
265 : : An "anticipatable occurrence" is one that is the first occurrence in the
266 : : basic block, the operands are not modified in the basic block prior
267 : : to the occurrence and the output is not used between the start of
268 : : the block and the occurrence. */
269 : : struct gcse_occr *antic_occr;
270 : : /* List of available occurrence in basic blocks in the function.
271 : : An "available occurrence" is one that is the last occurrence in the
272 : : basic block and the operands are not modified by following statements in
273 : : the basic block [including this insn]. */
274 : : struct gcse_occr *avail_occr;
275 : : /* Non-null if the computation is PRE redundant.
276 : : The value is the newly created pseudo-reg to record a copy of the
277 : : expression in all the places that reach the redundant copy. */
278 : : rtx reaching_reg;
279 : : /* Maximum distance in instructions this expression can travel.
280 : : We avoid moving simple expressions for more than a few instructions
281 : : to keep register pressure under control.
282 : : A value of "0" removes restrictions on how far the expression can
283 : : travel. */
284 : : HOST_WIDE_INT max_distance;
285 : : };
286 : :
287 : : /* Occurrence of an expression.
288 : : There is one per basic block. If a pattern appears more than once the
289 : : last appearance is used [or first for anticipatable expressions]. */
290 : :
291 : : struct gcse_occr
292 : : {
293 : : /* Next occurrence of this expression. */
294 : : struct gcse_occr *next;
295 : : /* The insn that computes the expression. */
296 : : rtx_insn *insn;
297 : : /* Nonzero if this [anticipatable] occurrence has been deleted. */
298 : : char deleted_p;
299 : : /* Nonzero if this [available] occurrence has been copied to
300 : : reaching_reg. */
301 : : /* ??? This is mutually exclusive with deleted_p, so they could share
302 : : the same byte. */
303 : : char copied_p;
304 : : };
305 : :
306 : : typedef struct gcse_occr *occr_t;
307 : :
308 : : /* Expression hash tables.
309 : : Each hash table is an array of buckets.
310 : : ??? It is known that if it were an array of entries, structure elements
311 : : `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
312 : : not clear whether in the final analysis a sufficient amount of memory would
313 : : be saved as the size of the available expression bitmaps would be larger
314 : : [one could build a mapping table without holes afterwards though].
315 : : Someday I'll perform the computation and figure it out. */
316 : :
317 : : struct gcse_hash_table_d
318 : : {
319 : : /* The table itself.
320 : : This is an array of `expr_hash_table_size' elements. */
321 : : struct gcse_expr **table;
322 : :
323 : : /* Size of the hash table, in elements. */
324 : : unsigned int size;
325 : :
326 : : /* Number of hash table elements. */
327 : : unsigned int n_elems;
328 : : };
329 : :
330 : : /* Expression hash table. */
331 : : static struct gcse_hash_table_d expr_hash_table;
332 : :
333 : : /* This is a list of expressions which are MEMs and will be used by load
334 : : or store motion.
335 : : Load motion tracks MEMs which aren't killed by anything except itself,
336 : : i.e. loads and stores to a single location.
337 : : We can then allow movement of these MEM refs with a little special
338 : : allowance. (all stores copy the same value to the reaching reg used
339 : : for the loads). This means all values used to store into memory must have
340 : : no side effects so we can re-issue the setter value. */
341 : :
342 : : struct ls_expr
343 : : {
344 : : struct gcse_expr * expr; /* Gcse expression reference for LM. */
345 : : rtx pattern; /* Pattern of this mem. */
346 : : rtx pattern_regs; /* List of registers mentioned by the mem. */
347 : : vec<rtx_insn *> stores; /* INSN list of stores seen. */
348 : : struct ls_expr * next; /* Next in the list. */
349 : : int invalid; /* Invalid for some reason. */
350 : : int index; /* If it maps to a bitmap index. */
351 : : unsigned int hash_index; /* Index when in a hash table. */
352 : : rtx reaching_reg; /* Register to use when re-writing. */
353 : : };
354 : :
355 : : /* Head of the list of load/store memory refs. */
356 : : static struct ls_expr * pre_ldst_mems = NULL;
357 : :
358 : : struct pre_ldst_expr_hasher : nofree_ptr_hash <ls_expr>
359 : : {
360 : : typedef value_type compare_type;
361 : : static inline hashval_t hash (const ls_expr *);
362 : : static inline bool equal (const ls_expr *, const ls_expr *);
363 : : };
364 : :
365 : : /* Hashtable helpers. */
366 : : inline hashval_t
367 : 92571023 : pre_ldst_expr_hasher::hash (const ls_expr *x)
368 : : {
369 : 92571023 : int do_not_record_p = 0;
370 : 92571023 : return
371 : 92571023 : hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
372 : : }
373 : :
374 : : static bool expr_equiv_p (const_rtx, const_rtx);
375 : :
376 : : inline bool
377 : 110168674 : pre_ldst_expr_hasher::equal (const ls_expr *ptr1,
378 : : const ls_expr *ptr2)
379 : : {
380 : 110168674 : return expr_equiv_p (ptr1->pattern, ptr2->pattern);
381 : : }
382 : :
383 : : /* Hashtable for the load/store memory refs. */
384 : : static hash_table<pre_ldst_expr_hasher> *pre_ldst_table;
385 : :
386 : : /* Bitmap containing one bit for each register in the program.
387 : : Used when performing GCSE to track which registers have been set since
388 : : the start of the basic block. */
389 : : static regset reg_set_bitmap;
390 : :
391 : : /* Array, indexed by basic block number for a list of insns which modify
392 : : memory within that block. */
393 : : static vec<rtx_insn *> *modify_mem_list;
394 : : static bitmap modify_mem_list_set;
395 : :
396 : : /* This array parallels modify_mem_list, except that it stores MEMs
397 : : being set and their canonicalized memory addresses. */
398 : : static vec<modify_pair> *canon_modify_mem_list;
399 : :
400 : : /* Bitmap indexed by block numbers to record which blocks contain
401 : : function calls. */
402 : : static bitmap blocks_with_calls;
403 : :
404 : : /* Various variables for statistics gathering. */
405 : :
406 : : /* Memory used in a pass.
407 : : This isn't intended to be absolutely precise. Its intent is only
408 : : to keep an eye on memory usage. */
409 : : static int bytes_used;
410 : :
411 : : /* GCSE substitutions made. */
412 : : static int gcse_subst_count;
413 : : /* Number of copy instructions created. */
414 : : static int gcse_create_count;
415 : :
416 : : /* Doing code hoisting. */
417 : : static bool doing_code_hoisting_p = false;
418 : :
419 : : /* For available exprs */
420 : : static sbitmap *ae_kill;
421 : :
422 : : /* Data stored for each basic block. */
423 : : struct bb_data
424 : : {
425 : : /* Maximal register pressure inside basic block for given register class
426 : : (defined only for the pressure classes). */
427 : : int max_reg_pressure[N_REG_CLASSES];
428 : : /* Recorded register pressure of basic block before trying to hoist
429 : : an expression. Will be used to restore the register pressure
430 : : if the expression should not be hoisted. */
431 : : int old_pressure;
432 : : /* Recorded register live_in info of basic block during code hoisting
433 : : process. BACKUP is used to record live_in info before trying to
434 : : hoist an expression, and will be used to restore LIVE_IN if the
435 : : expression should not be hoisted. */
436 : : bitmap live_in, backup;
437 : : };
438 : :
439 : : #define BB_DATA(bb) ((struct bb_data *) (bb)->aux)
440 : :
441 : : static basic_block curr_bb;
442 : :
443 : : /* Current register pressure for each pressure class. */
444 : : static int curr_reg_pressure[N_REG_CLASSES];
445 : :
446 : :
447 : : static void compute_can_copy (void);
448 : : static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
449 : : static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
450 : : static void *gcse_alloc (unsigned long);
451 : : static void alloc_gcse_mem (void);
452 : : static void free_gcse_mem (void);
453 : : static void hash_scan_insn (rtx_insn *, struct gcse_hash_table_d *);
454 : : static void hash_scan_set (rtx, rtx_insn *, struct gcse_hash_table_d *);
455 : : static void hash_scan_clobber (rtx, rtx_insn *, struct gcse_hash_table_d *);
456 : : static void hash_scan_call (rtx, rtx_insn *, struct gcse_hash_table_d *);
457 : : static bool oprs_unchanged_p (const_rtx, const rtx_insn *, bool);
458 : : static bool oprs_anticipatable_p (const_rtx, const rtx_insn *);
459 : : static bool oprs_available_p (const_rtx, const rtx_insn *);
460 : : static void insert_expr_in_table (rtx, machine_mode, rtx_insn *, bool, bool,
461 : : HOST_WIDE_INT, struct gcse_hash_table_d *);
462 : : static unsigned int hash_expr (const_rtx, machine_mode, int *, int);
463 : : static void record_last_reg_set_info (rtx_insn *, int);
464 : : static void record_last_mem_set_info (rtx_insn *);
465 : : static void record_last_set_info (rtx, const_rtx, void *);
466 : : static void compute_hash_table (struct gcse_hash_table_d *);
467 : : static void alloc_hash_table (struct gcse_hash_table_d *);
468 : : static void free_hash_table (struct gcse_hash_table_d *);
469 : : static void compute_hash_table_work (struct gcse_hash_table_d *);
470 : : static void dump_hash_table (FILE *, const char *, struct gcse_hash_table_d *);
471 : : static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
472 : : struct gcse_hash_table_d *);
473 : : static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
474 : : static bool load_killed_in_block_p (const_basic_block, int, const_rtx, bool);
475 : : static void alloc_pre_mem (int, int);
476 : : static void free_pre_mem (void);
477 : : static struct edge_list *compute_pre_data (void);
478 : : static bool pre_expr_reaches_here_p (basic_block, struct gcse_expr *,
479 : : basic_block);
480 : : static void insert_insn_end_basic_block (struct gcse_expr *, basic_block);
481 : : static void pre_insert_copy_insn (struct gcse_expr *, rtx_insn *);
482 : : static void pre_insert_copies (void);
483 : : static bool pre_delete (void);
484 : : static bool pre_gcse (struct edge_list *);
485 : : static bool one_pre_gcse_pass (void);
486 : : static void add_label_notes (rtx, rtx_insn *);
487 : : static void alloc_code_hoist_mem (int, int);
488 : : static void free_code_hoist_mem (void);
489 : : static void compute_code_hoist_vbeinout (void);
490 : : static void compute_code_hoist_data (void);
491 : : static bool should_hoist_expr_to_dom (basic_block, struct gcse_expr *,
492 : : basic_block,
493 : : sbitmap, HOST_WIDE_INT, int *,
494 : : enum reg_class,
495 : : int *, bitmap, rtx_insn *);
496 : : static bool hoist_code (void);
497 : : static enum reg_class get_regno_pressure_class (int regno, int *nregs);
498 : : static enum reg_class get_pressure_class_and_nregs (rtx_insn *insn, int *nregs);
499 : : static bool one_code_hoisting_pass (void);
500 : : static rtx_insn *process_insert_insn (struct gcse_expr *);
501 : : static bool pre_edge_insert (struct edge_list *, struct gcse_expr **);
502 : : static bool pre_expr_reaches_here_p_work (basic_block, struct gcse_expr *,
503 : : basic_block, char *);
504 : : static struct ls_expr * ldst_entry (rtx);
505 : : static void free_ldst_entry (struct ls_expr *);
506 : : static void free_ld_motion_mems (void);
507 : : static void print_ldst_list (FILE *);
508 : : static struct ls_expr * find_rtx_in_ldst (rtx);
509 : : static bool simple_mem (const_rtx);
510 : : static void invalidate_any_buried_refs (rtx);
511 : : static void compute_ld_motion_mems (void);
512 : : static void trim_ld_motion_mems (void);
513 : : static void update_ld_motion_stores (struct gcse_expr *);
514 : : static void clear_modify_mem_tables (void);
515 : : static void free_modify_mem_tables (void);
516 : :
517 : : #define GNEW(T) ((T *) gmalloc (sizeof (T)))
518 : : #define GCNEW(T) ((T *) gcalloc (1, sizeof (T)))
519 : :
520 : : #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N)))
521 : : #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T)))
522 : :
523 : : #define GNEWVAR(T, S) ((T *) gmalloc ((S)))
524 : : #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S)))
525 : :
526 : : #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
527 : : #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
528 : :
529 : : /* Misc. utilities. */
530 : :
531 : : #define can_copy \
532 : : (this_target_gcse->x_can_copy)
533 : : #define can_copy_init_p \
534 : : (this_target_gcse->x_can_copy_init_p)
535 : :
536 : : /* Compute which modes support reg/reg copy operations. */
537 : :
538 : : static void
539 : 82402 : compute_can_copy (void)
540 : : {
541 : 82402 : int i;
542 : : #ifndef AVOID_CCMODE_COPIES
543 : : rtx reg;
544 : : rtx_insn *insn;
545 : : #endif
546 : 82402 : memset (can_copy, 0, NUM_MACHINE_MODES);
547 : :
548 : 82402 : start_sequence ();
549 : 10877064 : for (i = 0; i < NUM_MACHINE_MODES; i++)
550 : 10712260 : if (GET_MODE_CLASS (i) == MODE_CC)
551 : : {
552 : : #ifdef AVOID_CCMODE_COPIES
553 : 988824 : can_copy[i] = 0;
554 : : #else
555 : : reg = gen_rtx_REG ((machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
556 : : insn = emit_insn (gen_rtx_SET (reg, reg));
557 : : if (recog (PATTERN (insn), insn, NULL) >= 0)
558 : : can_copy[i] = 1;
559 : : #endif
560 : : }
561 : : else
562 : 9723436 : can_copy[i] = 1;
563 : :
564 : 82402 : end_sequence ();
565 : 82402 : }
566 : :
567 : : /* Returns whether the mode supports reg/reg copy operations. */
568 : :
569 : : bool
570 : 94231598 : can_copy_p (machine_mode mode)
571 : : {
572 : 94231598 : if (! can_copy_init_p)
573 : : {
574 : 82402 : compute_can_copy ();
575 : 82402 : can_copy_init_p = true;
576 : : }
577 : :
578 : 94231598 : return can_copy[mode] != 0;
579 : : }
580 : :
581 : : /* Cover function to xmalloc to record bytes allocated. */
582 : :
583 : : static void *
584 : 968762 : gmalloc (size_t size)
585 : : {
586 : 968762 : bytes_used += size;
587 : 0 : return xmalloc (size);
588 : : }
589 : :
590 : : /* Cover function to xcalloc to record bytes allocated. */
591 : :
592 : : static void *
593 : 1762058 : gcalloc (size_t nelem, size_t elsize)
594 : : {
595 : 1762058 : bytes_used += nelem * elsize;
596 : 1762058 : return xcalloc (nelem, elsize);
597 : : }
598 : :
599 : : /* Cover function to obstack_alloc. */
600 : :
601 : : static void *
602 : 24128572 : gcse_alloc (unsigned long size)
603 : : {
604 : 24128572 : bytes_used += size;
605 : 24128572 : return obstack_alloc (&gcse_obstack, size);
606 : : }
607 : :
608 : : /* Allocate memory for the reg/memory set tracking tables.
609 : : This is called at the start of each pass. */
610 : :
611 : : static void
612 : 484381 : alloc_gcse_mem (void)
613 : : {
614 : : /* Allocate vars to track sets of regs. */
615 : 484381 : reg_set_bitmap = ALLOC_REG_SET (NULL);
616 : :
617 : : /* Allocate array to keep a list of insns which modify memory in each
618 : : basic block. The two typedefs are needed to work around the
619 : : pre-processor limitation with template types in macro arguments. */
620 : 484381 : typedef vec<rtx_insn *> vec_rtx_heap;
621 : 484381 : typedef vec<modify_pair> vec_modify_pair_heap;
622 : 484381 : modify_mem_list = GCNEWVEC (vec_rtx_heap, last_basic_block_for_fn (cfun));
623 : 484381 : canon_modify_mem_list = GCNEWVEC (vec_modify_pair_heap,
624 : : last_basic_block_for_fn (cfun));
625 : 484381 : modify_mem_list_set = BITMAP_ALLOC (NULL);
626 : 484381 : blocks_with_calls = BITMAP_ALLOC (NULL);
627 : 484381 : }
628 : :
629 : : /* Free memory allocated by alloc_gcse_mem. */
630 : :
631 : : static void
632 : 484381 : free_gcse_mem (void)
633 : : {
634 : 484381 : FREE_REG_SET (reg_set_bitmap);
635 : :
636 : 484381 : free_modify_mem_tables ();
637 : 484381 : BITMAP_FREE (modify_mem_list_set);
638 : 484381 : BITMAP_FREE (blocks_with_calls);
639 : 484381 : }
640 : :
641 : : /* Compute the local properties of each recorded expression.
642 : :
643 : : Local properties are those that are defined by the block, irrespective of
644 : : other blocks.
645 : :
646 : : An expression is transparent in a block if its operands are not modified
647 : : in the block.
648 : :
649 : : An expression is computed (locally available) in a block if it is computed
650 : : at least once and expression would contain the same value if the
651 : : computation was moved to the end of the block.
652 : :
653 : : An expression is locally anticipatable in a block if it is computed at
654 : : least once and expression would contain the same value if the computation
655 : : was moved to the beginning of the block.
656 : :
657 : : We call this routine for pre and code hoisting. They all compute
658 : : basically the same information and thus can easily share this code.
659 : :
660 : : TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
661 : : properties. If NULL, then it is not necessary to compute or record that
662 : : particular property.
663 : :
664 : : TABLE controls which hash table to look at. */
665 : :
666 : : static void
667 : 421077 : compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
668 : : struct gcse_hash_table_d *table)
669 : : {
670 : 421077 : unsigned int i;
671 : :
672 : : /* Initialize any bitmaps that were passed in. */
673 : 421077 : if (transp)
674 : : {
675 : 421077 : bitmap_vector_ones (transp, last_basic_block_for_fn (cfun));
676 : : }
677 : :
678 : 421077 : if (comp)
679 : 421077 : bitmap_vector_clear (comp, last_basic_block_for_fn (cfun));
680 : 421077 : if (antloc)
681 : 421077 : bitmap_vector_clear (antloc, last_basic_block_for_fn (cfun));
682 : :
683 : 19960634 : for (i = 0; i < table->size; i++)
684 : : {
685 : 19539557 : struct gcse_expr *expr;
686 : :
687 : 28801734 : for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
688 : : {
689 : 9262177 : int indx = expr->bitmap_index;
690 : 9262177 : struct gcse_occr *occr;
691 : :
692 : : /* The expression is transparent in this block if it is not killed.
693 : : We start by assuming all are transparent [none are killed], and
694 : : then reset the bits for those that are. */
695 : 9262177 : if (transp)
696 : 9262177 : compute_transp (expr->expr, indx, transp,
697 : : blocks_with_calls,
698 : : modify_mem_list_set,
699 : : canon_modify_mem_list);
700 : :
701 : : /* The occurrences recorded in antic_occr are exactly those that
702 : : we want to set to nonzero in ANTLOC. */
703 : 9262177 : if (antloc)
704 : 15793731 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
705 : : {
706 : 6531554 : bitmap_set_bit (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
707 : :
708 : : /* While we're scanning the table, this is a good place to
709 : : initialize this. */
710 : 6531554 : occr->deleted_p = 0;
711 : : }
712 : :
713 : : /* The occurrences recorded in avail_occr are exactly those that
714 : : we want to set to nonzero in COMP. */
715 : 9262177 : if (comp)
716 : 17597018 : for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
717 : : {
718 : 8334841 : bitmap_set_bit (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
719 : :
720 : : /* While we're scanning the table, this is a good place to
721 : : initialize this. */
722 : 8334841 : occr->copied_p = 0;
723 : : }
724 : :
725 : : /* While we're scanning the table, this is a good place to
726 : : initialize this. */
727 : 9262177 : expr->reaching_reg = 0;
728 : : }
729 : : }
730 : 421077 : }
731 : :
732 : : /* Hash table support. */
733 : :
734 : : struct reg_avail_info
735 : : {
736 : : basic_block last_bb;
737 : : int first_set;
738 : : int last_set;
739 : : };
740 : :
741 : : static struct reg_avail_info *reg_avail_info;
742 : : static basic_block current_bb;
743 : :
744 : : /* See whether X, the source of a set, is something we want to consider for
745 : : GCSE. */
746 : :
747 : : static bool
748 : 20405295 : want_to_gcse_p (rtx x, machine_mode mode, HOST_WIDE_INT *max_distance_ptr)
749 : : {
750 : : #ifdef STACK_REGS
751 : : /* On register stack architectures, don't GCSE constants from the
752 : : constant pool, as the benefits are often swamped by the overhead
753 : : of shuffling the register stack between basic blocks. */
754 : 20405295 : if (IS_STACK_MODE (GET_MODE (x)))
755 : 268651 : x = avoid_constant_pool_reference (x);
756 : : #endif
757 : :
758 : : /* GCSE'ing constants:
759 : :
760 : : We do not specifically distinguish between constant and non-constant
761 : : expressions in PRE and Hoist. We use set_src_cost below to limit
762 : : the maximum distance simple expressions can travel.
763 : :
764 : : Nevertheless, constants are much easier to GCSE, and, hence,
765 : : it is easy to overdo the optimizations. Usually, excessive PRE and
766 : : Hoisting of constant leads to increased register pressure.
767 : :
768 : : RA can deal with this by rematerialing some of the constants.
769 : : Therefore, it is important that the back-end generates sets of constants
770 : : in a way that allows reload rematerialize them under high register
771 : : pressure, i.e., a pseudo register with REG_EQUAL to constant
772 : : is set only once. Failing to do so will result in IRA/reload
773 : : spilling such constants under high register pressure instead of
774 : : rematerializing them. */
775 : :
776 : 20405295 : switch (GET_CODE (x))
777 : : {
778 : : case REG:
779 : : case SUBREG:
780 : : case CALL:
781 : : return false;
782 : :
783 : 2568695 : CASE_CONST_ANY:
784 : 2568695 : if (!doing_code_hoisting_p)
785 : : /* Do not PRE constants. */
786 : : return false;
787 : :
788 : : /* FALLTHRU */
789 : :
790 : 14923337 : default:
791 : 14923337 : if (doing_code_hoisting_p)
792 : : /* PRE doesn't implement max_distance restriction. */
793 : : {
794 : 609830 : int cost;
795 : 609830 : HOST_WIDE_INT max_distance;
796 : :
797 : 609830 : gcc_assert (!optimize_function_for_speed_p (cfun)
798 : : && optimize_function_for_size_p (cfun));
799 : 609830 : cost = set_src_cost (x, mode, 0);
800 : :
801 : 609830 : if (cost < COSTS_N_INSNS (param_gcse_unrestricted_cost))
802 : : {
803 : 579466 : max_distance
804 : 579466 : = ((HOST_WIDE_INT)param_gcse_cost_distance_ratio * cost) / 10;
805 : 579466 : if (max_distance == 0)
806 : : return false;
807 : :
808 : 517248 : gcc_assert (max_distance > 0);
809 : : }
810 : : else
811 : : max_distance = 0;
812 : :
813 : 547612 : if (max_distance_ptr)
814 : 490975 : *max_distance_ptr = max_distance;
815 : : }
816 : :
817 : 14861119 : return can_assign_to_reg_without_clobbers_p (x, mode);
818 : : }
819 : : }
820 : :
821 : : /* Used internally by can_assign_to_reg_without_clobbers_p. */
822 : :
823 : : static GTY(()) rtx_insn *test_insn;
824 : :
825 : : /* Return true if we can assign X to a pseudo register of mode MODE
826 : : such that the resulting insn does not result in clobbering a hard
827 : : register as a side-effect.
828 : :
829 : : Additionally, if the target requires it, check that the resulting insn
830 : : can be copied. If it cannot, this means that X is special and probably
831 : : has hidden side-effects we don't want to mess with.
832 : :
833 : : This function is typically used by code motion passes, to verify
834 : : that it is safe to insert an insn without worrying about clobbering
835 : : maybe live hard regs. */
836 : :
837 : : bool
838 : 18943477 : can_assign_to_reg_without_clobbers_p (rtx x, machine_mode mode)
839 : : {
840 : 18943477 : int num_clobbers = 0;
841 : 18943477 : int icode;
842 : 18943477 : bool can_assign = false;
843 : :
844 : : /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
845 : 18943477 : if (general_operand (x, mode))
846 : : return true;
847 : 9042835 : else if (GET_MODE (x) == VOIDmode)
848 : : return false;
849 : :
850 : : /* Otherwise, check if we can make a valid insn from it. First initialize
851 : : our test insn if we haven't already. */
852 : 9042835 : if (test_insn == 0)
853 : : {
854 : 66141 : test_insn
855 : 66141 : = make_insn_raw (gen_rtx_SET (gen_rtx_REG (word_mode,
856 : : FIRST_PSEUDO_REGISTER * 2),
857 : : const0_rtx));
858 : 66141 : SET_NEXT_INSN (test_insn) = SET_PREV_INSN (test_insn) = 0;
859 : 66141 : INSN_LOCATION (test_insn) = UNKNOWN_LOCATION;
860 : : }
861 : :
862 : : /* Now make an insn like the one we would make when GCSE'ing and see if
863 : : valid. */
864 : 9042835 : PUT_MODE (SET_DEST (PATTERN (test_insn)), mode);
865 : 9042835 : SET_SRC (PATTERN (test_insn)) = x;
866 : :
867 : 9042835 : icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
868 : :
869 : : /* If the test insn is valid and doesn't need clobbers, and the target also
870 : : has no objections, we're good. */
871 : 9042835 : if (icode >= 0
872 : 8196220 : && (num_clobbers == 0 || !added_clobbers_hard_reg_p (icode))
873 : 15373626 : && ! (targetm.cannot_copy_insn_p
874 : 0 : && targetm.cannot_copy_insn_p (test_insn)))
875 : : can_assign = true;
876 : :
877 : : /* Make sure test_insn doesn't have any pointers into GC space. */
878 : 9042835 : SET_SRC (PATTERN (test_insn)) = NULL_RTX;
879 : :
880 : 9042835 : return can_assign;
881 : : }
882 : :
883 : : /* Return true if the operands of expression X are unchanged from the
884 : : start of INSN's basic block up to but not including INSN
885 : : (if AVAIL_P == false), or from INSN to the end of INSN's basic block
886 : : (if AVAIL_P == true). */
887 : :
888 : : static bool
889 : 67623866 : oprs_unchanged_p (const_rtx x, const rtx_insn *insn, bool avail_p)
890 : : {
891 : 67623866 : int i, j;
892 : 67623866 : enum rtx_code code;
893 : 67623866 : const char *fmt;
894 : :
895 : 67623866 : if (x == 0)
896 : : return true;
897 : :
898 : 67623866 : code = GET_CODE (x);
899 : 67623866 : switch (code)
900 : : {
901 : 20895146 : case REG:
902 : 20895146 : {
903 : 20895146 : struct reg_avail_info *info = ®_avail_info[REGNO (x)];
904 : :
905 : 20895146 : if (info->last_bb != current_bb)
906 : : return true;
907 : 10256569 : if (avail_p)
908 : 5461033 : return info->last_set < DF_INSN_LUID (insn);
909 : : else
910 : 4795536 : return info->first_set >= DF_INSN_LUID (insn);
911 : : }
912 : :
913 : 11075267 : case MEM:
914 : 11075267 : if (! flag_gcse_lm
915 : 11075267 : || load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
916 : : x, avail_p))
917 : 3448000 : return false;
918 : : else
919 : 7627267 : return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
920 : :
921 : : case PRE_DEC:
922 : : case PRE_INC:
923 : : case POST_DEC:
924 : : case POST_INC:
925 : : case PRE_MODIFY:
926 : : case POST_MODIFY:
927 : : return false;
928 : :
929 : : case PC:
930 : : case CONST:
931 : : CASE_CONST_ANY:
932 : : case SYMBOL_REF:
933 : : case LABEL_REF:
934 : : case ADDR_VEC:
935 : : case ADDR_DIFF_VEC:
936 : : return true;
937 : :
938 : 19686969 : default:
939 : 19686969 : break;
940 : : }
941 : :
942 : 36058076 : for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
943 : : {
944 : 35637014 : if (fmt[i] == 'e')
945 : : {
946 : : /* If we are about to do the last recursive call needed at this
947 : : level, change it into iteration. This function is called enough
948 : : to be worth it. */
949 : 34519763 : if (i == 0)
950 : 17805503 : return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
951 : :
952 : 16714260 : else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
953 : : return false;
954 : : }
955 : 1117251 : else if (fmt[i] == 'E')
956 : 1916630 : for (j = 0; j < XVECLEN (x, i); j++)
957 : 1495625 : if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
958 : : return false;
959 : : }
960 : :
961 : : return true;
962 : : }
963 : :
964 : : /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */
965 : :
966 : : struct mem_conflict_info
967 : : {
968 : : /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
969 : : see if a memory store conflicts with this memory load. */
970 : : const_rtx mem;
971 : :
972 : : /* True if mems_conflict_for_gcse_p finds a conflict between two memory
973 : : references. */
974 : : bool conflict;
975 : : };
976 : :
977 : : /* DEST is the output of an instruction. If it is a memory reference and
978 : : possibly conflicts with the load found in DATA, then communicate this
979 : : information back through DATA. */
980 : :
981 : : static void
982 : 12885284 : mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
983 : : void *data)
984 : : {
985 : 12885284 : struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
986 : :
987 : 12885284 : while (GET_CODE (dest) == SUBREG
988 : 12885284 : || GET_CODE (dest) == ZERO_EXTRACT
989 : 25770568 : || GET_CODE (dest) == STRICT_LOW_PART)
990 : 0 : dest = XEXP (dest, 0);
991 : :
992 : : /* If DEST is not a MEM, then it will not conflict with the load. Note
993 : : that function calls are assumed to clobber memory, but are handled
994 : : elsewhere. */
995 : 12885284 : if (! MEM_P (dest))
996 : : return;
997 : :
998 : : /* If we are setting a MEM in our list of specially recognized MEMs,
999 : : don't mark as killed this time. */
1000 : 25095832 : if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
1001 : : {
1002 : 99185 : if (!find_rtx_in_ldst (dest))
1003 : 38137 : mci->conflict = true;
1004 : 99185 : return;
1005 : : }
1006 : :
1007 : 12644877 : if (true_dependence (dest, GET_MODE (dest), mci->mem))
1008 : 1407786 : mci->conflict = true;
1009 : : }
1010 : :
1011 : : /* Return true if the expression in X (a memory reference) is killed
1012 : : in block BB before or after the insn with the LUID in UID_LIMIT.
1013 : : AVAIL_P is true for kills after UID_LIMIT, and zero for kills
1014 : : before UID_LIMIT.
1015 : :
1016 : : To check the entire block, set UID_LIMIT to max_uid + 1 and
1017 : : AVAIL_P to false. */
1018 : :
1019 : : static bool
1020 : 11075245 : load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
1021 : : bool avail_p)
1022 : : {
1023 : 11075245 : vec<rtx_insn *> list = modify_mem_list[bb->index];
1024 : 11075245 : rtx_insn *setter;
1025 : 11075245 : unsigned ix;
1026 : :
1027 : : /* If this is a readonly then we aren't going to be changing it. */
1028 : 11075245 : if (MEM_READONLY_P (x))
1029 : : return false;
1030 : :
1031 : 49912868 : FOR_EACH_VEC_ELT_REVERSE (list, ix, setter)
1032 : : {
1033 : 37090070 : struct mem_conflict_info mci;
1034 : :
1035 : : /* Ignore entries in the list that do not apply. */
1036 : 59434044 : if ((avail_p
1037 : 15273825 : && DF_INSN_LUID (setter) < uid_limit)
1038 : 37090070 : || (! avail_p
1039 : 21816245 : && DF_INSN_LUID (setter) > uid_limit))
1040 : 22343974 : continue;
1041 : :
1042 : : /* If SETTER is a call everything is clobbered. Note that calls
1043 : : to pure functions are never put on the list, so we need not
1044 : : worry about them. */
1045 : 14746096 : if (CALL_P (setter))
1046 : 3447978 : return true;
1047 : :
1048 : : /* SETTER must be an INSN of some kind that sets memory. Call
1049 : : note_stores to examine each hunk of memory that is modified. */
1050 : 12744039 : mci.mem = x;
1051 : 12744039 : mci.conflict = false;
1052 : 12744039 : note_stores (setter, mems_conflict_for_gcse_p, &mci);
1053 : 12744039 : if (mci.conflict)
1054 : : return true;
1055 : : }
1056 : : return false;
1057 : : }
1058 : :
1059 : : /* Return true if the operands of expression X are unchanged from
1060 : : the start of INSN's basic block up to but not including INSN. */
1061 : :
1062 : : static bool
1063 : 11990551 : oprs_anticipatable_p (const_rtx x, const rtx_insn *insn)
1064 : : {
1065 : 0 : return oprs_unchanged_p (x, insn, false);
1066 : : }
1067 : :
1068 : : /* Return true if the operands of expression X are unchanged from
1069 : : INSN to the end of INSN's basic block. */
1070 : :
1071 : : static bool
1072 : 11990660 : oprs_available_p (const_rtx x, const rtx_insn *insn)
1073 : : {
1074 : 0 : return oprs_unchanged_p (x, insn, true);
1075 : : }
1076 : :
1077 : : /* Hash expression X.
1078 : :
1079 : : MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1080 : : indicating if a volatile operand is found or if the expression contains
1081 : : something we don't want to insert in the table. HASH_TABLE_SIZE is
1082 : : the current size of the hash table to be probed. */
1083 : :
1084 : : static unsigned int
1085 : 11990660 : hash_expr (const_rtx x, machine_mode mode, int *do_not_record_p,
1086 : : int hash_table_size)
1087 : : {
1088 : 11990660 : unsigned int hash;
1089 : :
1090 : 11990660 : *do_not_record_p = 0;
1091 : :
1092 : 11990660 : hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1093 : 11990660 : return hash % hash_table_size;
1094 : : }
1095 : :
1096 : : /* Return true if exp1 is equivalent to exp2. */
1097 : :
1098 : : static bool
1099 : 135041779 : expr_equiv_p (const_rtx x, const_rtx y)
1100 : : {
1101 : 122520444 : return exp_equiv_p (x, y, 0, true);
1102 : : }
1103 : :
1104 : : /* Insert expression X in INSN in the hash TABLE.
1105 : : If it is already present, record it as the last occurrence in INSN's
1106 : : basic block.
1107 : :
1108 : : MODE is the mode of the value X is being stored into.
1109 : : It is only used if X is a CONST_INT.
1110 : :
1111 : : ANTIC_P is true if X is an anticipatable expression.
1112 : : AVAIL_P is true if X is an available expression.
1113 : :
1114 : : MAX_DISTANCE is the maximum distance in instructions this expression can
1115 : : be moved. */
1116 : :
1117 : : static void
1118 : 11990660 : insert_expr_in_table (rtx x, machine_mode mode, rtx_insn *insn,
1119 : : bool antic_p, bool avail_p, HOST_WIDE_INT max_distance,
1120 : : struct gcse_hash_table_d *table)
1121 : : {
1122 : 11990660 : bool found;
1123 : 11990660 : int do_not_record_p;
1124 : 11990660 : unsigned int hash;
1125 : 11990660 : struct gcse_expr *cur_expr, *last_expr = NULL;
1126 : 11990660 : struct gcse_occr *antic_occr, *avail_occr;
1127 : :
1128 : 11990660 : hash = hash_expr (x, mode, &do_not_record_p, table->size);
1129 : :
1130 : : /* Do not insert expression in table if it contains volatile operands,
1131 : : or if hash_expr determines the expression is something we don't want
1132 : : to or can't handle. */
1133 : 11990660 : if (do_not_record_p)
1134 : 301805 : return;
1135 : :
1136 : 11688855 : cur_expr = table->table[hash];
1137 : 11688855 : found = false;
1138 : :
1139 : 15418656 : while (cur_expr && (found = expr_equiv_p (cur_expr->expr, x)) == 0)
1140 : : {
1141 : : /* If the expression isn't found, save a pointer to the end of
1142 : : the list. */
1143 : 3729801 : last_expr = cur_expr;
1144 : 3729801 : cur_expr = cur_expr->next_same_hash;
1145 : : }
1146 : :
1147 : 11688855 : if (! found)
1148 : : {
1149 : 9262177 : cur_expr = GOBNEW (struct gcse_expr);
1150 : 9262177 : bytes_used += sizeof (struct gcse_expr);
1151 : 9262177 : if (table->table[hash] == NULL)
1152 : : /* This is the first pattern that hashed to this index. */
1153 : 6856722 : table->table[hash] = cur_expr;
1154 : : else
1155 : : /* Add EXPR to end of this hash chain. */
1156 : 2405455 : last_expr->next_same_hash = cur_expr;
1157 : :
1158 : : /* Set the fields of the expr element. */
1159 : 9262177 : cur_expr->expr = x;
1160 : 9262177 : cur_expr->bitmap_index = table->n_elems++;
1161 : 9262177 : cur_expr->next_same_hash = NULL;
1162 : 9262177 : cur_expr->antic_occr = NULL;
1163 : 9262177 : cur_expr->avail_occr = NULL;
1164 : 9262177 : gcc_assert (max_distance >= 0);
1165 : 9262177 : cur_expr->max_distance = max_distance;
1166 : : }
1167 : : else
1168 : 2426678 : gcc_assert (cur_expr->max_distance == max_distance);
1169 : :
1170 : : /* Now record the occurrence(s). */
1171 : 11688855 : if (antic_p)
1172 : : {
1173 : 6541013 : antic_occr = cur_expr->antic_occr;
1174 : :
1175 : 6541013 : if (antic_occr
1176 : 6541013 : && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1177 : : antic_occr = NULL;
1178 : :
1179 : 4732423 : if (antic_occr)
1180 : : /* Found another instance of the expression in the same basic block.
1181 : : Prefer the currently recorded one. We want the first one in the
1182 : : block and the block is scanned from start to end. */
1183 : : ; /* nothing to do */
1184 : : else
1185 : : {
1186 : : /* First occurrence of this expression in this basic block. */
1187 : 6531554 : antic_occr = GOBNEW (struct gcse_occr);
1188 : 6531554 : bytes_used += sizeof (struct gcse_occr);
1189 : 6531554 : antic_occr->insn = insn;
1190 : 6531554 : antic_occr->next = cur_expr->antic_occr;
1191 : 6531554 : antic_occr->deleted_p = 0;
1192 : 6531554 : cur_expr->antic_occr = antic_occr;
1193 : : }
1194 : : }
1195 : :
1196 : 11688855 : if (avail_p)
1197 : : {
1198 : 8345709 : avail_occr = cur_expr->avail_occr;
1199 : :
1200 : 8345709 : if (avail_occr
1201 : 8345709 : && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1202 : : {
1203 : : /* Found another instance of the expression in the same basic block.
1204 : : Prefer this occurrence to the currently recorded one. We want
1205 : : the last one in the block and the block is scanned from start
1206 : : to end. */
1207 : 10868 : avail_occr->insn = insn;
1208 : : }
1209 : : else
1210 : : {
1211 : : /* First occurrence of this expression in this basic block. */
1212 : 8334841 : avail_occr = GOBNEW (struct gcse_occr);
1213 : 8334841 : bytes_used += sizeof (struct gcse_occr);
1214 : 8334841 : avail_occr->insn = insn;
1215 : 8334841 : avail_occr->next = cur_expr->avail_occr;
1216 : 8334841 : avail_occr->deleted_p = 0;
1217 : 8334841 : cur_expr->avail_occr = avail_occr;
1218 : : }
1219 : : }
1220 : : }
1221 : :
1222 : : /* Scan SET present in INSN and add an entry to the hash TABLE. */
1223 : :
1224 : : static void
1225 : 44106336 : hash_scan_set (rtx set, rtx_insn *insn, struct gcse_hash_table_d *table)
1226 : : {
1227 : 44106336 : rtx src = SET_SRC (set);
1228 : 44106336 : rtx dest = SET_DEST (set);
1229 : 44106336 : rtx note;
1230 : :
1231 : 44106336 : if (GET_CODE (src) == CALL)
1232 : : hash_scan_call (src, insn, table);
1233 : :
1234 : 42606246 : else if (REG_P (dest))
1235 : : {
1236 : 31672646 : unsigned int regno = REGNO (dest);
1237 : 31672646 : HOST_WIDE_INT max_distance = 0;
1238 : :
1239 : : /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1240 : :
1241 : : This allows us to do a single GCSE pass and still eliminate
1242 : : redundant constants, addresses or other expressions that are
1243 : : constructed with multiple instructions.
1244 : :
1245 : : However, keep the original SRC if INSN is a simple reg-reg move.
1246 : : In this case, there will almost always be a REG_EQUAL note on the
1247 : : insn that sets SRC. By recording the REG_EQUAL value here as SRC
1248 : : for INSN, we miss copy propagation opportunities and we perform the
1249 : : same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1250 : : do more than one PRE GCSE pass.
1251 : :
1252 : : Note that this does not impede profitable constant propagations. We
1253 : : "look through" reg-reg sets in lookup_avail_set. */
1254 : 31672646 : note = find_reg_equal_equiv_note (insn);
1255 : 31672646 : if (note != 0
1256 : 2685423 : && REG_NOTE_KIND (note) == REG_EQUAL
1257 : 2542954 : && !REG_P (src)
1258 : 33120442 : && want_to_gcse_p (XEXP (note, 0), GET_MODE (dest), NULL))
1259 : 30971 : src = XEXP (note, 0), set = gen_rtx_SET (dest, src);
1260 : :
1261 : : /* Only record sets of pseudo-regs in the hash table. */
1262 : 31672646 : if (regno >= FIRST_PSEUDO_REGISTER
1263 : : /* Don't GCSE something if we can't do a reg/reg copy. */
1264 : 19039956 : && can_copy_p (GET_MODE (dest))
1265 : : /* GCSE commonly inserts instruction after the insn. We can't
1266 : : do that easily for EH edges so disable GCSE on these for now. */
1267 : : /* ??? We can now easily create new EH landing pads at the
1268 : : gimple level, for splitting edges; there's no reason we
1269 : : can't do the same thing at the rtl level. */
1270 : 19039956 : && !can_throw_internal (insn)
1271 : : /* Is SET_SRC something we want to gcse? */
1272 : 18957390 : && want_to_gcse_p (src, GET_MODE (dest), &max_distance)
1273 : : /* Don't CSE a nop. */
1274 : 12126282 : && ! set_noop_p (set)
1275 : : /* Don't GCSE if it has attached REG_EQUIV note.
1276 : : At this point this only function parameters should have
1277 : : REG_EQUIV notes and if the argument slot is used somewhere
1278 : : explicitly, it means address of parameter has been taken,
1279 : : so we should not extend the lifetime of the pseudo. */
1280 : 43798928 : && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1281 : : {
1282 : : /* An expression is not anticipatable if its operands are
1283 : : modified before this insn or if this is not the only SET in
1284 : : this insn. The latter condition does not have to mean that
1285 : : SRC itself is not anticipatable, but we just will not be
1286 : : able to handle code motion of insns with multiple sets. */
1287 : 11990551 : bool antic_p = (oprs_anticipatable_p (src, insn)
1288 : 11990551 : && !multiple_sets (insn));
1289 : : /* An expression is not available if its operands are
1290 : : subsequently modified, including this insn. It's also not
1291 : : available if this is a branch, because we can't insert
1292 : : a set after the branch. */
1293 : 11990551 : bool avail_p = (oprs_available_p (src, insn)
1294 : 11990551 : && ! JUMP_P (insn));
1295 : :
1296 : 11990551 : insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1297 : : max_distance, table);
1298 : : }
1299 : : }
1300 : : /* In case of store we want to consider the memory value as available in
1301 : : the REG stored in that memory. This makes it possible to remove
1302 : : redundant loads from due to stores to the same location. */
1303 : 10933600 : else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1304 : : {
1305 : 109 : unsigned int regno = REGNO (src);
1306 : 109 : HOST_WIDE_INT max_distance = 0;
1307 : :
1308 : : /* Only record sets of pseudo-regs in the hash table. */
1309 : 109 : if (regno >= FIRST_PSEUDO_REGISTER
1310 : : /* Don't GCSE something if we can't do a reg/reg copy. */
1311 : 109 : && can_copy_p (GET_MODE (src))
1312 : : /* GCSE commonly inserts instruction after the insn. We can't
1313 : : do that easily for EH edges so disable GCSE on these for now. */
1314 : 109 : && !can_throw_internal (insn)
1315 : : /* Is SET_DEST something we want to gcse? */
1316 : 109 : && want_to_gcse_p (dest, GET_MODE (dest), &max_distance)
1317 : : /* Don't CSE a nop. */
1318 : 109 : && ! set_noop_p (set)
1319 : : /* Don't GCSE if it has attached REG_EQUIV note.
1320 : : At this point this only function parameters should have
1321 : : REG_EQUIV notes and if the argument slot is used somewhere
1322 : : explicitly, it means address of parameter has been taken,
1323 : : so we should not extend the lifetime of the pseudo. */
1324 : 218 : && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1325 : 0 : || ! MEM_P (XEXP (note, 0))))
1326 : : {
1327 : : /* Stores are never anticipatable. */
1328 : 109 : bool antic_p = 0;
1329 : : /* An expression is not available if its operands are
1330 : : subsequently modified, including this insn. It's also not
1331 : : available if this is a branch, because we can't insert
1332 : : a set after the branch. */
1333 : 109 : bool avail_p = oprs_available_p (dest, insn) && ! JUMP_P (insn);
1334 : :
1335 : : /* Record the memory expression (DEST) in the hash table. */
1336 : 109 : insert_expr_in_table (dest, GET_MODE (dest), insn,
1337 : : antic_p, avail_p, max_distance, table);
1338 : : }
1339 : : }
1340 : 44106336 : }
1341 : :
1342 : : static void
1343 : 0 : hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
1344 : : struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
1345 : : {
1346 : : /* Currently nothing to do. */
1347 : 0 : }
1348 : :
1349 : : static void
1350 : 0 : hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
1351 : : struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
1352 : : {
1353 : : /* Currently nothing to do. */
1354 : 0 : }
1355 : :
1356 : : /* Process INSN and add hash table entries as appropriate. */
1357 : :
1358 : : static void
1359 : 46310204 : hash_scan_insn (rtx_insn *insn, struct gcse_hash_table_d *table)
1360 : : {
1361 : 46310204 : rtx pat = PATTERN (insn);
1362 : 46310204 : int i;
1363 : :
1364 : : /* Pick out the sets of INSN and for other forms of instructions record
1365 : : what's been modified. */
1366 : :
1367 : 46310204 : if (GET_CODE (pat) == SET)
1368 : 36332479 : hash_scan_set (pat, insn, table);
1369 : :
1370 : 9977725 : else if (GET_CODE (pat) == CLOBBER)
1371 : : hash_scan_clobber (pat, insn, table);
1372 : :
1373 : 9960206 : else if (GET_CODE (pat) == CALL)
1374 : : hash_scan_call (pat, insn, table);
1375 : :
1376 : 7942577 : else if (GET_CODE (pat) == PARALLEL)
1377 : 22946354 : for (i = 0; i < XVECLEN (pat, 0); i++)
1378 : : {
1379 : 15409071 : rtx x = XVECEXP (pat, 0, i);
1380 : :
1381 : 15409071 : if (GET_CODE (x) == SET)
1382 : 7773857 : hash_scan_set (x, insn, table);
1383 : : else if (GET_CODE (x) == CLOBBER)
1384 : : hash_scan_clobber (x, insn, table);
1385 : : else if (GET_CODE (x) == CALL)
1386 : : hash_scan_call (x, insn, table);
1387 : : }
1388 : 46310204 : }
1389 : :
1390 : : /* Dump the hash table TABLE to file FILE under the name NAME. */
1391 : :
1392 : : static void
1393 : 24 : dump_hash_table (FILE *file, const char *name, struct gcse_hash_table_d *table)
1394 : : {
1395 : 24 : int i;
1396 : : /* Flattened out table, so it's printed in proper order. */
1397 : 24 : struct gcse_expr **flat_table;
1398 : 24 : unsigned int *hash_val;
1399 : 24 : struct gcse_expr *expr;
1400 : :
1401 : 24 : flat_table = XCNEWVEC (struct gcse_expr *, table->n_elems);
1402 : 24 : hash_val = XNEWVEC (unsigned int, table->n_elems);
1403 : :
1404 : 424 : for (i = 0; i < (int) table->size; i++)
1405 : 640 : for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1406 : : {
1407 : 264 : flat_table[expr->bitmap_index] = expr;
1408 : 264 : hash_val[expr->bitmap_index] = i;
1409 : : }
1410 : :
1411 : 24 : fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1412 : : name, table->size, table->n_elems);
1413 : :
1414 : 312 : for (i = 0; i < (int) table->n_elems; i++)
1415 : 264 : if (flat_table[i] != 0)
1416 : : {
1417 : 264 : expr = flat_table[i];
1418 : 264 : fprintf (file, "Index %d (hash value %d; max distance "
1419 : : HOST_WIDE_INT_PRINT_DEC ")\n ",
1420 : 264 : expr->bitmap_index, hash_val[i], expr->max_distance);
1421 : 264 : print_rtl (file, expr->expr);
1422 : 264 : fprintf (file, "\n");
1423 : : }
1424 : :
1425 : 24 : fprintf (file, "\n");
1426 : :
1427 : 24 : free (flat_table);
1428 : 24 : free (hash_val);
1429 : 24 : }
1430 : :
1431 : : /* Record register first/last/block set information for REGNO in INSN.
1432 : :
1433 : : first_set records the first place in the block where the register
1434 : : is set and is used to compute "anticipatability".
1435 : :
1436 : : last_set records the last place in the block where the register
1437 : : is set and is used to compute "availability".
1438 : :
1439 : : last_bb records the block for which first_set and last_set are
1440 : : valid, as a quick test to invalidate them. */
1441 : :
1442 : : static void
1443 : 336800345 : record_last_reg_set_info (rtx_insn *insn, int regno)
1444 : : {
1445 : 336800345 : struct reg_avail_info *info = ®_avail_info[regno];
1446 : 336800345 : int luid = DF_INSN_LUID (insn);
1447 : :
1448 : 336800345 : info->last_set = luid;
1449 : 336800345 : if (info->last_bb != current_bb)
1450 : : {
1451 : 252070880 : info->last_bb = current_bb;
1452 : 252070880 : info->first_set = luid;
1453 : : }
1454 : 336800345 : }
1455 : :
1456 : : /* Record memory modification information for INSN. We do not actually care
1457 : : about the memory location(s) that are set, or even how they are set (consider
1458 : : a CALL_INSN). We merely need to record which insns modify memory. */
1459 : :
1460 : : static void
1461 : 8417185 : record_last_mem_set_info (rtx_insn *insn)
1462 : : {
1463 : 8417185 : if (! flag_gcse_lm)
1464 : : return;
1465 : :
1466 : 8417171 : record_last_mem_set_info_common (insn, modify_mem_list,
1467 : : canon_modify_mem_list,
1468 : : modify_mem_list_set,
1469 : : blocks_with_calls);
1470 : : }
1471 : :
1472 : : /* Called from compute_hash_table via note_stores to handle one
1473 : : SET or CLOBBER in an insn. DATA is really the instruction in which
1474 : : the SET is taking place. */
1475 : :
1476 : : static void
1477 : 51608263 : record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1478 : : {
1479 : 51608263 : rtx_insn *last_set_insn = (rtx_insn *) data;
1480 : :
1481 : 51608263 : if (GET_CODE (dest) == SUBREG)
1482 : 0 : dest = SUBREG_REG (dest);
1483 : :
1484 : 51608263 : if (REG_P (dest))
1485 : 40862694 : record_last_reg_set_info (last_set_insn, REGNO (dest));
1486 : 10745569 : else if (MEM_P (dest)
1487 : : /* Ignore pushes, they clobber nothing. */
1488 : 10745569 : && ! push_operand (dest, GET_MODE (dest)))
1489 : 5124852 : record_last_mem_set_info (last_set_insn);
1490 : 51608263 : }
1491 : :
1492 : : /* Top level function to create an expression hash table.
1493 : :
1494 : : Expression entries are placed in the hash table if
1495 : : - they are of the form (set (pseudo-reg) src),
1496 : : - src is something we want to perform GCSE on,
1497 : : - none of the operands are subsequently modified in the block
1498 : :
1499 : : Currently src must be a pseudo-reg or a const_int.
1500 : :
1501 : : TABLE is the table computed. */
1502 : :
1503 : : static void
1504 : 484381 : compute_hash_table_work (struct gcse_hash_table_d *table)
1505 : : {
1506 : 484381 : int i;
1507 : :
1508 : : /* re-Cache any INSN_LIST nodes we have allocated. */
1509 : 484381 : clear_modify_mem_tables ();
1510 : : /* Some working arrays used to track first and last set in each block. */
1511 : 484381 : reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1512 : :
1513 : 77140356 : for (i = 0; i < max_reg_num (); ++i)
1514 : 76655975 : reg_avail_info[i].last_bb = NULL;
1515 : :
1516 : 9487271 : FOR_EACH_BB_FN (current_bb, cfun)
1517 : : {
1518 : 9002890 : rtx_insn *insn;
1519 : 9002890 : unsigned int regno;
1520 : :
1521 : : /* First pass over the instructions records information used to
1522 : : determine when registers and memory are first and last set. */
1523 : 106809320 : FOR_BB_INSNS (current_bb, insn)
1524 : : {
1525 : 97806430 : if (!NONDEBUG_INSN_P (insn))
1526 : 51496226 : continue;
1527 : :
1528 : 46310204 : if (CALL_P (insn))
1529 : : {
1530 : 3637521 : hard_reg_set_iterator hrsi;
1531 : :
1532 : : /* We don't track modes of hard registers, so we need
1533 : : to be conservative and assume that partial kills
1534 : : are full kills. */
1535 : 3637521 : HARD_REG_SET callee_clobbers
1536 : 3637521 : = insn_callee_abi (insn).full_and_partial_reg_clobbers ();
1537 : 299575172 : EXECUTE_IF_SET_IN_HARD_REG_SET (callee_clobbers, 0, regno, hrsi)
1538 : 295937651 : record_last_reg_set_info (insn, regno);
1539 : :
1540 : 7146921 : if (! RTL_CONST_OR_PURE_CALL_P (insn)
1541 : 369134 : || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
1542 : 3983590 : || can_throw_external (insn))
1543 : 3292333 : record_last_mem_set_info (insn);
1544 : : }
1545 : :
1546 : 46310204 : note_stores (insn, record_last_set_info, insn);
1547 : : }
1548 : :
1549 : : /* The next pass builds the hash table. */
1550 : 106809320 : FOR_BB_INSNS (current_bb, insn)
1551 : 97806430 : if (NONDEBUG_INSN_P (insn))
1552 : 46310204 : hash_scan_insn (insn, table);
1553 : : }
1554 : :
1555 : 484381 : free (reg_avail_info);
1556 : 484381 : reg_avail_info = NULL;
1557 : 484381 : }
1558 : :
1559 : : /* Allocate space for the set/expr hash TABLE.
1560 : : It is used to determine the number of buckets to use. */
1561 : :
1562 : : static void
1563 : 484381 : alloc_hash_table (struct gcse_hash_table_d *table)
1564 : : {
1565 : 484381 : int n;
1566 : :
1567 : 484381 : n = get_max_insn_count ();
1568 : :
1569 : 484381 : table->size = n / 4;
1570 : 484381 : if (table->size < 11)
1571 : 187562 : table->size = 11;
1572 : :
1573 : : /* Attempt to maintain efficient use of hash table.
1574 : : Making it an odd number is simplest for now.
1575 : : ??? Later take some measurements. */
1576 : 484381 : table->size |= 1;
1577 : 484381 : n = table->size * sizeof (struct gcse_expr *);
1578 : 484381 : table->table = GNEWVAR (struct gcse_expr *, n);
1579 : 484381 : }
1580 : :
1581 : : /* Free things allocated by alloc_hash_table. */
1582 : :
1583 : : static void
1584 : 484381 : free_hash_table (struct gcse_hash_table_d *table)
1585 : : {
1586 : 484381 : free (table->table);
1587 : 0 : }
1588 : :
1589 : : /* Compute the expression hash table TABLE. */
1590 : :
1591 : : static void
1592 : 484381 : compute_hash_table (struct gcse_hash_table_d *table)
1593 : : {
1594 : : /* Initialize count of number of entries in hash table. */
1595 : 484381 : table->n_elems = 0;
1596 : 484381 : memset (table->table, 0, table->size * sizeof (struct gcse_expr *));
1597 : :
1598 : 484381 : compute_hash_table_work (table);
1599 : 484381 : }
1600 : :
1601 : : /* Expression tracking support. */
1602 : :
1603 : : /* Clear canon_modify_mem_list and modify_mem_list tables. */
1604 : : static void
1605 : 968762 : clear_modify_mem_tables (void)
1606 : : {
1607 : 968762 : unsigned i;
1608 : 968762 : bitmap_iterator bi;
1609 : :
1610 : 4672197 : EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1611 : : {
1612 : 3703435 : modify_mem_list[i].release ();
1613 : 3703435 : canon_modify_mem_list[i].release ();
1614 : : }
1615 : 968762 : bitmap_clear (modify_mem_list_set);
1616 : 968762 : bitmap_clear (blocks_with_calls);
1617 : 968762 : }
1618 : :
1619 : : /* Release memory used by modify_mem_list_set. */
1620 : :
1621 : : static void
1622 : 484381 : free_modify_mem_tables (void)
1623 : : {
1624 : 484381 : clear_modify_mem_tables ();
1625 : 484381 : free (modify_mem_list);
1626 : 484381 : free (canon_modify_mem_list);
1627 : 484381 : modify_mem_list = 0;
1628 : 484381 : canon_modify_mem_list = 0;
1629 : 484381 : }
1630 : :
1631 : : /* Compute PRE+LCM working variables. */
1632 : :
1633 : : /* Local properties of expressions. */
1634 : :
1635 : : /* Nonzero for expressions that are transparent in the block. */
1636 : : static sbitmap *transp;
1637 : :
1638 : : /* Nonzero for expressions that are computed (available) in the block. */
1639 : : static sbitmap *comp;
1640 : :
1641 : : /* Nonzero for expressions that are locally anticipatable in the block. */
1642 : : static sbitmap *antloc;
1643 : :
1644 : : /* Nonzero for expressions where this block is an optimal computation
1645 : : point. */
1646 : : static sbitmap *pre_optimal;
1647 : :
1648 : : /* Nonzero for expressions which are redundant in a particular block. */
1649 : : static sbitmap *pre_redundant;
1650 : :
1651 : : /* Nonzero for expressions which should be inserted on a specific edge. */
1652 : : static sbitmap *pre_insert_map;
1653 : :
1654 : : /* Nonzero for expressions which should be deleted in a specific block. */
1655 : : static sbitmap *pre_delete_map;
1656 : :
1657 : : /* Allocate vars used for PRE analysis. */
1658 : :
1659 : : static void
1660 : 396648 : alloc_pre_mem (int n_blocks, int n_exprs)
1661 : : {
1662 : 396648 : transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1663 : 396648 : comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1664 : 396648 : antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1665 : :
1666 : 396648 : pre_optimal = NULL;
1667 : 396648 : pre_redundant = NULL;
1668 : 396648 : pre_insert_map = NULL;
1669 : 396648 : pre_delete_map = NULL;
1670 : 396648 : ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1671 : :
1672 : : /* pre_insert and pre_delete are allocated later. */
1673 : 396648 : }
1674 : :
1675 : : /* Free vars used for PRE analysis. */
1676 : :
1677 : : static void
1678 : 396648 : free_pre_mem (void)
1679 : : {
1680 : 396648 : sbitmap_vector_free (transp);
1681 : 396648 : sbitmap_vector_free (comp);
1682 : :
1683 : : /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
1684 : :
1685 : 396648 : if (pre_optimal)
1686 : 0 : sbitmap_vector_free (pre_optimal);
1687 : 396648 : if (pre_redundant)
1688 : 0 : sbitmap_vector_free (pre_redundant);
1689 : 396648 : if (pre_insert_map)
1690 : 396648 : sbitmap_vector_free (pre_insert_map);
1691 : 396648 : if (pre_delete_map)
1692 : 396648 : sbitmap_vector_free (pre_delete_map);
1693 : :
1694 : 396648 : transp = comp = NULL;
1695 : 396648 : pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1696 : 396648 : }
1697 : :
1698 : : /* Remove certain expressions from anticipatable and transparent
1699 : : sets of basic blocks that have incoming abnormal edge.
1700 : : For PRE remove potentially trapping expressions to avoid placing
1701 : : them on abnormal edges. For hoisting remove memory references that
1702 : : can be clobbered by calls. */
1703 : :
1704 : : static void
1705 : 421077 : prune_expressions (bool pre_p)
1706 : : {
1707 : 421077 : struct gcse_expr *expr;
1708 : 421077 : unsigned int ui;
1709 : 421077 : basic_block bb;
1710 : :
1711 : 421077 : auto_sbitmap prune_exprs (expr_hash_table.n_elems);
1712 : 421077 : bitmap_clear (prune_exprs);
1713 : 19960634 : for (ui = 0; ui < expr_hash_table.size; ui++)
1714 : : {
1715 : 28801734 : for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1716 : : {
1717 : : /* Note potentially trapping expressions. */
1718 : 9262177 : if (may_trap_p (expr->expr))
1719 : : {
1720 : 2516385 : bitmap_set_bit (prune_exprs, expr->bitmap_index);
1721 : 2516385 : continue;
1722 : : }
1723 : :
1724 : 6745792 : if (!pre_p && contains_mem_rtx_p (expr->expr))
1725 : : /* Note memory references that can be clobbered by a call.
1726 : : We do not split abnormal edges in hoisting, so would
1727 : : a memory reference get hoisted along an abnormal edge,
1728 : : it would be placed /before/ the call. Therefore, only
1729 : : constant memory references can be hoisted along abnormal
1730 : : edges. */
1731 : : {
1732 : 35563 : rtx x = expr->expr;
1733 : :
1734 : : /* Common cases where we might find the MEM which may allow us
1735 : : to avoid pruning the expression. */
1736 : 35863 : while (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
1737 : 300 : x = XEXP (x, 0);
1738 : :
1739 : : /* If we found the MEM, go ahead and look at it to see if it has
1740 : : properties that allow us to avoid pruning its expression out
1741 : : of the tables. */
1742 : 35563 : if (MEM_P (x))
1743 : : {
1744 : 36248 : if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1745 : 35525 : && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)))
1746 : 723 : continue;
1747 : :
1748 : 36363 : if (MEM_READONLY_P (x)
1749 : 1561 : && !MEM_VOLATILE_P (x)
1750 : 36363 : && MEM_NOTRAP_P (x))
1751 : : /* Constant memory reference, e.g., a PIC address. */
1752 : 1561 : continue;
1753 : : }
1754 : :
1755 : : /* ??? Optimally, we would use interprocedural alias
1756 : : analysis to determine if this mem is actually killed
1757 : : by this call. */
1758 : :
1759 : 33279 : bitmap_set_bit (prune_exprs, expr->bitmap_index);
1760 : : }
1761 : : }
1762 : : }
1763 : :
1764 : 9118107 : FOR_EACH_BB_FN (bb, cfun)
1765 : : {
1766 : 8697030 : edge e;
1767 : 8697030 : edge_iterator ei;
1768 : :
1769 : : /* If the current block is the destination of an abnormal edge, we
1770 : : kill all trapping (for PRE) and memory (for hoist) expressions
1771 : : because we won't be able to properly place the instruction on
1772 : : the edge. So make them neither anticipatable nor transparent.
1773 : : This is fairly conservative.
1774 : :
1775 : : ??? For hoisting it may be necessary to check for set-and-jump
1776 : : instructions here, not just for abnormal edges. The general problem
1777 : : is that when an expression cannot not be placed right at the end of
1778 : : a basic block we should account for any side-effects of a subsequent
1779 : : jump instructions that could clobber the expression. It would
1780 : : be best to implement this check along the lines of
1781 : : should_hoist_expr_to_dom where the target block is already known
1782 : : and, hence, there's no need to conservatively prune expressions on
1783 : : "intermediate" set-and-jump instructions. */
1784 : 20966553 : FOR_EACH_EDGE (e, ei, bb->preds)
1785 : 12446311 : if ((e->flags & EDGE_ABNORMAL)
1786 : 176889 : && (pre_p || CALL_P (BB_END (e->src))))
1787 : : {
1788 : 176788 : bitmap_and_compl (antloc[bb->index],
1789 : 176788 : antloc[bb->index], prune_exprs);
1790 : 176788 : bitmap_and_compl (transp[bb->index],
1791 : 176788 : transp[bb->index], prune_exprs);
1792 : 176788 : break;
1793 : : }
1794 : : }
1795 : 421077 : }
1796 : :
1797 : : /* It may be necessary to insert a large number of insns on edges to
1798 : : make the existing occurrences of expressions fully redundant. This
1799 : : routine examines the set of insertions and deletions and if the ratio
1800 : : of insertions to deletions is too high for a particular expression, then
1801 : : the expression is removed from the insertion/deletion sets.
1802 : :
1803 : : N_ELEMS is the number of elements in the hash table. */
1804 : :
1805 : : static void
1806 : 396648 : prune_insertions_deletions (int n_elems)
1807 : : {
1808 : 396648 : sbitmap_iterator sbi;
1809 : :
1810 : : /* We always use I to iterate over blocks/edges and J to iterate over
1811 : : expressions. */
1812 : 396648 : unsigned int i, j;
1813 : :
1814 : : /* Counts for the number of times an expression needs to be inserted and
1815 : : number of times an expression can be removed as a result. */
1816 : 396648 : int *insertions = GCNEWVEC (int, n_elems);
1817 : 396648 : int *deletions = GCNEWVEC (int, n_elems);
1818 : :
1819 : : /* Set of expressions which require too many insertions relative to
1820 : : the number of deletions achieved. We will prune these out of the
1821 : : insertion/deletion sets. */
1822 : 396648 : auto_sbitmap prune_exprs (n_elems);
1823 : 396648 : bitmap_clear (prune_exprs);
1824 : :
1825 : : /* Iterate over the edges counting the number of times each expression
1826 : : needs to be inserted. */
1827 : 14065649 : for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
1828 : : {
1829 : 26873614 : EXECUTE_IF_SET_IN_BITMAP (pre_insert_map[i], 0, j, sbi)
1830 : 328908 : insertions[j]++;
1831 : : }
1832 : :
1833 : : /* Similarly for deletions, but those occur in blocks rather than on
1834 : : edges. */
1835 : 10345251 : for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
1836 : : {
1837 : 20797742 : EXECUTE_IF_SET_IN_BITMAP (pre_delete_map[i], 0, j, sbi)
1838 : 900536 : deletions[j]++;
1839 : : }
1840 : :
1841 : : /* Now that we have accurate counts, iterate over the elements in the
1842 : : hash table and see if any need too many insertions relative to the
1843 : : number of evaluations that can be removed. If so, mark them in
1844 : : PRUNE_EXPRS. */
1845 : 9384018 : for (j = 0; j < (unsigned) n_elems; j++)
1846 : 8987370 : if (deletions[j]
1847 : 343346 : && (insertions[j] / deletions[j]) > param_max_gcse_insertion_ratio)
1848 : 557 : bitmap_set_bit (prune_exprs, j);
1849 : :
1850 : : /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
1851 : 793853 : EXECUTE_IF_SET_IN_BITMAP (prune_exprs, 0, j, sbi)
1852 : : {
1853 : 343447 : for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
1854 : 342890 : bitmap_clear_bit (pre_insert_map[i], j);
1855 : :
1856 : 231444 : for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
1857 : 230887 : bitmap_clear_bit (pre_delete_map[i], j);
1858 : : }
1859 : :
1860 : 396648 : free (insertions);
1861 : 396648 : free (deletions);
1862 : 396648 : }
1863 : :
1864 : : /* Top level routine to do the dataflow analysis needed by PRE. */
1865 : :
1866 : : static struct edge_list *
1867 : 396648 : compute_pre_data (void)
1868 : : {
1869 : 396648 : struct edge_list *edge_list;
1870 : 396648 : basic_block bb;
1871 : :
1872 : 396648 : compute_local_properties (transp, comp, antloc, &expr_hash_table);
1873 : 396648 : prune_expressions (true);
1874 : 396648 : bitmap_vector_clear (ae_kill, last_basic_block_for_fn (cfun));
1875 : :
1876 : : /* Compute ae_kill for each basic block using:
1877 : :
1878 : : ~(TRANSP | COMP)
1879 : : */
1880 : :
1881 : 8794256 : FOR_EACH_BB_FN (bb, cfun)
1882 : : {
1883 : 8397608 : bitmap_ior (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1884 : 8397608 : bitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1885 : : }
1886 : :
1887 : 396648 : edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1888 : : ae_kill, &pre_insert_map, &pre_delete_map);
1889 : 396648 : sbitmap_vector_free (antloc);
1890 : 396648 : antloc = NULL;
1891 : 396648 : sbitmap_vector_free (ae_kill);
1892 : 396648 : ae_kill = NULL;
1893 : :
1894 : 396648 : prune_insertions_deletions (expr_hash_table.n_elems);
1895 : :
1896 : 396648 : return edge_list;
1897 : : }
1898 : :
1899 : : /* PRE utilities */
1900 : :
1901 : : /* Return true if an occurrence of expression EXPR in OCCR_BB would reach
1902 : : block BB.
1903 : :
1904 : : VISITED is a pointer to a working buffer for tracking which BB's have
1905 : : been visited. It is NULL for the top-level call.
1906 : :
1907 : : We treat reaching expressions that go through blocks containing the same
1908 : : reaching expression as "not reaching". E.g. if EXPR is generated in blocks
1909 : : 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
1910 : : 2 as not reaching. The intent is to improve the probability of finding
1911 : : only one reaching expression and to reduce register lifetimes by picking
1912 : : the closest such expression. */
1913 : :
1914 : : static bool
1915 : 18589100 : pre_expr_reaches_here_p_work (basic_block occr_bb, struct gcse_expr *expr,
1916 : : basic_block bb, char *visited)
1917 : : {
1918 : 18589100 : edge pred;
1919 : 18589100 : edge_iterator ei;
1920 : :
1921 : 39839008 : FOR_EACH_EDGE (pred, ei, bb->preds)
1922 : : {
1923 : 23685584 : basic_block pred_bb = pred->src;
1924 : :
1925 : 23685584 : if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
1926 : : /* Has predecessor has already been visited? */
1927 : 23404807 : || visited[pred_bb->index])
1928 : : ;/* Nothing to do. */
1929 : :
1930 : : /* Does this predecessor generate this expression? */
1931 : 19535444 : else if (bitmap_bit_p (comp[pred_bb->index], expr->bitmap_index))
1932 : : {
1933 : : /* Is this the occurrence we're looking for?
1934 : : Note that there's only one generating occurrence per block
1935 : : so we just need to check the block number. */
1936 : 2257224 : if (occr_bb == pred_bb)
1937 : : return true;
1938 : :
1939 : 1966244 : visited[pred_bb->index] = 1;
1940 : : }
1941 : : /* Ignore this predecessor if it kills the expression. */
1942 : 17278220 : else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
1943 : 178811 : visited[pred_bb->index] = 1;
1944 : :
1945 : : /* Neither gen nor kill. */
1946 : : else
1947 : : {
1948 : 17099409 : visited[pred_bb->index] = 1;
1949 : 17099409 : if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
1950 : : return true;
1951 : : }
1952 : : }
1953 : :
1954 : : /* All paths have been checked. */
1955 : : return false;
1956 : : }
1957 : :
1958 : : /* The wrapper for pre_expr_reaches_here_work that ensures that any
1959 : : memory allocated for that function is returned. */
1960 : :
1961 : : static bool
1962 : 1489691 : pre_expr_reaches_here_p (basic_block occr_bb, struct gcse_expr *expr,
1963 : : basic_block bb)
1964 : : {
1965 : 1489691 : bool rval;
1966 : 1489691 : char *visited = XCNEWVEC (char, last_basic_block_for_fn (cfun));
1967 : :
1968 : 1489691 : rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
1969 : :
1970 : 1489691 : free (visited);
1971 : 1489691 : return rval;
1972 : : }
1973 : :
1974 : : /* Generate RTL to copy an EXP to REG and return it. */
1975 : :
1976 : : rtx_insn *
1977 : 306774 : prepare_copy_insn (rtx reg, rtx exp)
1978 : : {
1979 : 306774 : rtx_insn *pat;
1980 : :
1981 : 306774 : start_sequence ();
1982 : :
1983 : : /* If the expression is something that's an operand, like a constant,
1984 : : just copy it to a register. */
1985 : 306774 : if (general_operand (exp, GET_MODE (reg)))
1986 : 98471 : emit_move_insn (reg, exp);
1987 : :
1988 : : /* Otherwise, make a new insn to compute this expression and make sure the
1989 : : insn will be recognized (this also adds any needed CLOBBERs). */
1990 : : else
1991 : : {
1992 : 208303 : rtx_insn *insn = emit_insn (gen_rtx_SET (reg, exp));
1993 : :
1994 : 208303 : if (insn_invalid_p (insn, false))
1995 : 0 : gcc_unreachable ();
1996 : : }
1997 : :
1998 : 306774 : pat = get_insns ();
1999 : 306774 : end_sequence ();
2000 : :
2001 : 306774 : return pat;
2002 : : }
2003 : :
2004 : : /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */
2005 : :
2006 : : static rtx_insn *
2007 : 306753 : process_insert_insn (struct gcse_expr *expr)
2008 : : {
2009 : 306753 : rtx reg = expr->reaching_reg;
2010 : : /* Copy the expression to make sure we don't have any sharing issues. */
2011 : 306753 : rtx exp = copy_rtx (expr->expr);
2012 : :
2013 : 306753 : return prepare_copy_insn (reg, exp);
2014 : : }
2015 : :
2016 : : /* Return the INSN which is added at the end of the block BB with
2017 : : same instruction pattern with PAT. */
2018 : :
2019 : : rtx_insn *
2020 : 64346 : insert_insn_end_basic_block (rtx_insn *pat, basic_block bb)
2021 : : {
2022 : 64346 : rtx_insn *insn = BB_END (bb);
2023 : 64346 : rtx_insn *new_insn;
2024 : 64346 : rtx_insn *pat_end;
2025 : :
2026 : 64346 : gcc_assert (pat && INSN_P (pat));
2027 : :
2028 : : pat_end = pat;
2029 : 64885 : while (NEXT_INSN (pat_end) != NULL_RTX)
2030 : : pat_end = NEXT_INSN (pat_end);
2031 : :
2032 : : /* If the last insn is a jump, insert EXPR in front. Similarly we need to
2033 : : take care of trapping instructions in presence of non-call exceptions. */
2034 : :
2035 : 64346 : if (JUMP_P (insn)
2036 : 64346 : || (NONJUMP_INSN_P (insn)
2037 : 17903 : && (!single_succ_p (bb)
2038 : 3144 : || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2039 : : {
2040 : : /* FIXME: What if something in jump uses value set in new insn? */
2041 : 23738 : new_insn = emit_insn_before_noloc (pat, insn, bb);
2042 : : }
2043 : :
2044 : : /* Likewise if the last insn is a call, as will happen in the presence
2045 : : of exception handling. */
2046 : 40608 : else if (CALL_P (insn)
2047 : 78070 : && (!single_succ_p (bb)
2048 : 5146 : || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2049 : : {
2050 : : /* Keeping in mind targets with small register classes and parameters
2051 : : in registers, we search backward and place the instructions before
2052 : : the first parameter is loaded. Do this for everyone for consistency
2053 : : and a presumption that we'll get better code elsewhere as well. */
2054 : :
2055 : : /* Since different machines initialize their parameter registers
2056 : : in different orders, assume nothing. Collect the set of all
2057 : : parameter registers. */
2058 : 37371 : insn = find_first_parameter_load (insn, BB_HEAD (bb));
2059 : :
2060 : : /* If we found all the parameter loads, then we want to insert
2061 : : before the first parameter load.
2062 : :
2063 : : If we did not find all the parameter loads, then we might have
2064 : : stopped on the head of the block, which could be a CODE_LABEL.
2065 : : If we inserted before the CODE_LABEL, then we would be putting
2066 : : the insn in the wrong basic block. In that case, put the insn
2067 : : after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
2068 : 37371 : while (LABEL_P (insn)
2069 : 37371 : || NOTE_INSN_BASIC_BLOCK_P (insn))
2070 : 0 : insn = NEXT_INSN (insn);
2071 : :
2072 : 37371 : new_insn = emit_insn_before_noloc (pat, insn, bb);
2073 : : }
2074 : : else
2075 : 3237 : new_insn = emit_insn_after_noloc (pat, insn, bb);
2076 : :
2077 : 539 : while (1)
2078 : : {
2079 : 64885 : if (INSN_P (pat))
2080 : 64885 : add_label_notes (PATTERN (pat), new_insn);
2081 : 64885 : if (pat == pat_end)
2082 : : break;
2083 : 539 : pat = NEXT_INSN (pat);
2084 : : }
2085 : 64346 : return new_insn;
2086 : : }
2087 : :
2088 : : /* Add EXPR to the end of basic block BB.
2089 : :
2090 : : This is used by both the PRE and code hoisting. */
2091 : :
2092 : : static void
2093 : 64346 : insert_insn_end_basic_block (struct gcse_expr *expr, basic_block bb)
2094 : : {
2095 : 64346 : rtx reg = expr->reaching_reg;
2096 : 64346 : int regno = REGNO (reg);
2097 : :
2098 : 64346 : rtx_insn *insn = process_insert_insn (expr);
2099 : 64346 : rtx_insn *new_insn = insert_insn_end_basic_block (insn, bb);
2100 : :
2101 : 64346 : gcse_create_count++;
2102 : :
2103 : 64346 : if (dump_file)
2104 : : {
2105 : 3 : fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2106 : 3 : bb->index, INSN_UID (new_insn));
2107 : 3 : fprintf (dump_file, "copying expression %d to reg %d\n",
2108 : : expr->bitmap_index, regno);
2109 : : }
2110 : 64346 : }
2111 : :
2112 : : /* Insert partially redundant expressions on edges in the CFG to make
2113 : : the expressions fully redundant. */
2114 : :
2115 : : static bool
2116 : 396648 : pre_edge_insert (struct edge_list *edge_list, struct gcse_expr **index_map)
2117 : : {
2118 : 396648 : int e, i, j, num_edges, set_size;
2119 : 396648 : bool did_insert = false;
2120 : 396648 : sbitmap *inserted;
2121 : :
2122 : : /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2123 : : if it reaches any of the deleted expressions. */
2124 : :
2125 : 396648 : set_size = pre_insert_map[0]->size;
2126 : 396648 : num_edges = NUM_EDGES (edge_list);
2127 : 396648 : inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2128 : 396648 : bitmap_vector_clear (inserted, num_edges);
2129 : :
2130 : 13669001 : for (e = 0; e < num_edges; e++)
2131 : : {
2132 : 13272353 : int indx;
2133 : 13272353 : basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2134 : :
2135 : 48706057 : for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2136 : : {
2137 : 35433704 : SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2138 : :
2139 : 35433704 : for (j = indx;
2140 : 40076418 : insert && j < (int) expr_hash_table.n_elems;
2141 : 4642714 : j++, insert >>= 1)
2142 : 4642714 : if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2143 : : {
2144 : 294222 : struct gcse_expr *expr = index_map[j];
2145 : 294222 : struct gcse_occr *occr;
2146 : :
2147 : : /* Now look at each deleted occurrence of this expression. */
2148 : 1908245 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2149 : : {
2150 : 1614023 : if (! occr->deleted_p)
2151 : 625249 : continue;
2152 : :
2153 : : /* Insert this expression on this edge if it would
2154 : : reach the deleted occurrence in BB. */
2155 : 988774 : if (!bitmap_bit_p (inserted[e], j))
2156 : : {
2157 : 294222 : rtx_insn *insn;
2158 : 294222 : edge eg = INDEX_EDGE (edge_list, e);
2159 : :
2160 : : /* We can't insert anything on an abnormal and
2161 : : critical edge, so we insert the insn at the end of
2162 : : the previous block. There are several alternatives
2163 : : detailed in Morgans book P277 (sec 10.5) for
2164 : : handling this situation. This one is easiest for
2165 : : now. */
2166 : :
2167 : 294222 : if (eg->flags & EDGE_ABNORMAL)
2168 : 51815 : insert_insn_end_basic_block (index_map[j], bb);
2169 : : else
2170 : : {
2171 : 242407 : insn = process_insert_insn (index_map[j]);
2172 : 242407 : insert_insn_on_edge (insn, eg);
2173 : : }
2174 : :
2175 : 294222 : if (dump_file)
2176 : : {
2177 : 8 : fprintf (dump_file, "PRE: edge (%d,%d), ",
2178 : : bb->index,
2179 : 8 : INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2180 : 8 : fprintf (dump_file, "copy expression %d\n",
2181 : : expr->bitmap_index);
2182 : : }
2183 : :
2184 : 294222 : update_ld_motion_stores (expr);
2185 : 294222 : bitmap_set_bit (inserted[e], j);
2186 : 294222 : did_insert = true;
2187 : 294222 : gcse_create_count++;
2188 : : }
2189 : : }
2190 : : }
2191 : : }
2192 : : }
2193 : :
2194 : 396648 : sbitmap_vector_free (inserted);
2195 : 396648 : return did_insert;
2196 : : }
2197 : :
2198 : : /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2199 : : Given "old_reg <- expr" (INSN), instead of adding after it
2200 : : reaching_reg <- old_reg
2201 : : it's better to do the following:
2202 : : reaching_reg <- expr
2203 : : old_reg <- reaching_reg
2204 : : because this way copy propagation can discover additional PRE
2205 : : opportunities. But if this fails, we try the old way.
2206 : : When "expr" is a store, i.e.
2207 : : given "MEM <- old_reg", instead of adding after it
2208 : : reaching_reg <- old_reg
2209 : : it's better to add it before as follows:
2210 : : reaching_reg <- old_reg
2211 : : MEM <- reaching_reg. */
2212 : :
2213 : : static void
2214 : 290980 : pre_insert_copy_insn (struct gcse_expr *expr, rtx_insn *insn)
2215 : : {
2216 : 290980 : rtx reg = expr->reaching_reg;
2217 : 290980 : int regno = REGNO (reg);
2218 : 290980 : int indx = expr->bitmap_index;
2219 : 290980 : rtx pat = PATTERN (insn);
2220 : 290980 : rtx set, first_set;
2221 : 290980 : rtx_insn *new_insn;
2222 : 290980 : rtx old_reg;
2223 : 290980 : int i;
2224 : :
2225 : : /* This block matches the logic in hash_scan_insn. */
2226 : 290980 : switch (GET_CODE (pat))
2227 : : {
2228 : : case SET:
2229 : : set = pat;
2230 : : break;
2231 : :
2232 : : case PARALLEL:
2233 : : /* Search through the parallel looking for the set whose
2234 : : source was the expression that we're interested in. */
2235 : : first_set = NULL_RTX;
2236 : 198175 : set = NULL_RTX;
2237 : 198175 : for (i = 0; i < XVECLEN (pat, 0); i++)
2238 : : {
2239 : 198091 : rtx x = XVECEXP (pat, 0, i);
2240 : 198091 : if (GET_CODE (x) == SET)
2241 : : {
2242 : : /* If the source was a REG_EQUAL or REG_EQUIV note, we
2243 : : may not find an equivalent expression, but in this
2244 : : case the PARALLEL will have a single set. */
2245 : 198007 : if (first_set == NULL_RTX)
2246 : 197855 : first_set = x;
2247 : 198007 : if (expr_equiv_p (SET_SRC (x), expr->expr))
2248 : : {
2249 : : set = x;
2250 : : break;
2251 : : }
2252 : : }
2253 : : }
2254 : :
2255 : 197855 : gcc_assert (first_set);
2256 : 197855 : if (set == NULL_RTX)
2257 : 84 : set = first_set;
2258 : : break;
2259 : :
2260 : 0 : default:
2261 : 0 : gcc_unreachable ();
2262 : : }
2263 : :
2264 : 290980 : if (REG_P (SET_DEST (set)))
2265 : : {
2266 : 290927 : old_reg = SET_DEST (set);
2267 : : /* Check if we can modify the set destination in the original insn. */
2268 : 290927 : if (validate_change (insn, &SET_DEST (set), reg, 0))
2269 : : {
2270 : 290927 : new_insn = gen_move_insn (old_reg, reg);
2271 : 290927 : new_insn = emit_insn_after (new_insn, insn);
2272 : : }
2273 : : else
2274 : : {
2275 : 0 : new_insn = gen_move_insn (reg, old_reg);
2276 : 0 : new_insn = emit_insn_after (new_insn, insn);
2277 : : }
2278 : : }
2279 : : else /* This is possible only in case of a store to memory. */
2280 : : {
2281 : 53 : old_reg = SET_SRC (set);
2282 : 53 : new_insn = gen_move_insn (reg, old_reg);
2283 : :
2284 : : /* Check if we can modify the set source in the original insn. */
2285 : 53 : if (validate_change (insn, &SET_SRC (set), reg, 0))
2286 : 53 : new_insn = emit_insn_before (new_insn, insn);
2287 : : else
2288 : 0 : new_insn = emit_insn_after (new_insn, insn);
2289 : : }
2290 : :
2291 : 290980 : gcse_create_count++;
2292 : :
2293 : 290980 : if (dump_file)
2294 : 24 : fprintf (dump_file,
2295 : : "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2296 : 8 : BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2297 : 8 : INSN_UID (insn), regno);
2298 : 290980 : }
2299 : :
2300 : : /* Copy available expressions that reach the redundant expression
2301 : : to `reaching_reg'. */
2302 : :
2303 : : static void
2304 : 396648 : pre_insert_copies (void)
2305 : : {
2306 : 396648 : unsigned int i;
2307 : 396648 : bool added_copy;
2308 : 396648 : struct gcse_expr *expr;
2309 : 396648 : struct gcse_occr *occr;
2310 : 396648 : struct gcse_occr *avail;
2311 : :
2312 : : /* For each available expression in the table, copy the result to
2313 : : `reaching_reg' if the expression reaches a deleted one.
2314 : :
2315 : : ??? The current algorithm is rather brute force.
2316 : : Need to do some profiling. */
2317 : :
2318 : 19175908 : for (i = 0; i < expr_hash_table.size; i++)
2319 : 27766630 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2320 : : {
2321 : : /* If the basic block isn't reachable, PPOUT will be TRUE. However,
2322 : : we don't want to insert a copy here because the expression may not
2323 : : really be redundant. So only insert an insn if the expression was
2324 : : deleted. This test also avoids further processing if the
2325 : : expression wasn't deleted anywhere. */
2326 : 8987370 : if (expr->reaching_reg == NULL)
2327 : 8644582 : continue;
2328 : :
2329 : : /* Set when we add a copy for that expression. */
2330 : 342788 : added_copy = false;
2331 : :
2332 : 1599013 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2333 : : {
2334 : 1256225 : if (! occr->deleted_p)
2335 : 356735 : continue;
2336 : :
2337 : 15280021 : for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2338 : : {
2339 : 14380531 : rtx_insn *insn = avail->insn;
2340 : :
2341 : : /* No need to handle this one if handled already. */
2342 : 14380531 : if (avail->copied_p)
2343 : 444636 : continue;
2344 : :
2345 : : /* Don't handle this one if it's a redundant one. */
2346 : 13935895 : if (insn->deleted ())
2347 : 12446204 : continue;
2348 : :
2349 : : /* Or if the expression doesn't reach the deleted one. */
2350 : 1489691 : if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2351 : : expr,
2352 : 1489691 : BLOCK_FOR_INSN (occr->insn)))
2353 : 1198711 : continue;
2354 : :
2355 : 290980 : added_copy = true;
2356 : :
2357 : : /* Copy the result of avail to reaching_reg. */
2358 : 290980 : pre_insert_copy_insn (expr, insn);
2359 : 290980 : avail->copied_p = 1;
2360 : : }
2361 : : }
2362 : :
2363 : 342788 : if (added_copy)
2364 : 232138 : update_ld_motion_stores (expr);
2365 : : }
2366 : 396648 : }
2367 : :
2368 : : struct set_data
2369 : : {
2370 : : rtx_insn *insn;
2371 : : const_rtx set;
2372 : : int nsets;
2373 : : };
2374 : :
2375 : : /* Increment number of sets and record set in DATA. */
2376 : :
2377 : : static void
2378 : 1632796 : record_set_data (rtx dest, const_rtx set, void *data)
2379 : : {
2380 : 1632796 : struct set_data *s = (struct set_data *)data;
2381 : :
2382 : 1632796 : if (GET_CODE (set) == SET)
2383 : : {
2384 : : /* We allow insns having multiple sets, where all but one are
2385 : : dead as single set insns. In the common case only a single
2386 : : set is present, so we want to avoid checking for REG_UNUSED
2387 : : notes unless necessary. */
2388 : 816398 : if (s->nsets == 1
2389 : 0 : && find_reg_note (s->insn, REG_UNUSED, SET_DEST (s->set))
2390 : 816398 : && !side_effects_p (s->set))
2391 : 0 : s->nsets = 0;
2392 : :
2393 : 816398 : if (!s->nsets)
2394 : : {
2395 : : /* Record this set. */
2396 : 816398 : s->nsets += 1;
2397 : 816398 : s->set = set;
2398 : : }
2399 : 0 : else if (!find_reg_note (s->insn, REG_UNUSED, dest)
2400 : 0 : || side_effects_p (set))
2401 : 0 : s->nsets += 1;
2402 : : }
2403 : 1632796 : }
2404 : :
2405 : : static const_rtx
2406 : 1615811 : single_set_gcse (rtx_insn *insn)
2407 : : {
2408 : 1615811 : struct set_data s;
2409 : 1615811 : rtx pattern;
2410 : :
2411 : 1615811 : gcc_assert (INSN_P (insn));
2412 : :
2413 : : /* Optimize common case. */
2414 : 1615811 : pattern = PATTERN (insn);
2415 : 1615811 : if (GET_CODE (pattern) == SET)
2416 : : return pattern;
2417 : :
2418 : 816398 : s.insn = insn;
2419 : 816398 : s.nsets = 0;
2420 : 816398 : note_pattern_stores (pattern, record_set_data, &s);
2421 : :
2422 : : /* Considered invariant insns have exactly one set. */
2423 : 816398 : gcc_assert (s.nsets == 1);
2424 : 816398 : return s.set;
2425 : : }
2426 : :
2427 : : /* Emit move from SRC to DEST noting the equivalence with expression computed
2428 : : in INSN. */
2429 : :
2430 : : static rtx_insn *
2431 : 932035 : gcse_emit_move_after (rtx dest, rtx src, rtx_insn *insn)
2432 : : {
2433 : 932035 : rtx_insn *new_rtx;
2434 : 932035 : const_rtx set = single_set_gcse (insn);
2435 : 932035 : rtx set2;
2436 : 932035 : rtx note;
2437 : 932035 : rtx eqv = NULL_RTX;
2438 : :
2439 : : /* This should never fail since we're creating a reg->reg copy
2440 : : we've verified to be valid. */
2441 : :
2442 : 932035 : new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2443 : :
2444 : : /* Note the equivalence for local CSE pass. Take the note from the old
2445 : : set if there was one. Otherwise record the SET_SRC from the old set
2446 : : unless DEST is also an operand of the SET_SRC. */
2447 : 932035 : set2 = single_set (new_rtx);
2448 : 932035 : if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2449 : 0 : return new_rtx;
2450 : 932035 : if ((note = find_reg_equal_equiv_note (insn)))
2451 : 168036 : eqv = XEXP (note, 0);
2452 : 763999 : else if (! REG_P (dest)
2453 : 763999 : || ! reg_mentioned_p (dest, SET_SRC (set)))
2454 : 762532 : eqv = SET_SRC (set);
2455 : :
2456 : 930568 : if (eqv != NULL_RTX)
2457 : 930568 : set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2458 : :
2459 : : return new_rtx;
2460 : : }
2461 : :
2462 : : /* Delete redundant computations.
2463 : : Deletion is done by changing the insn to copy the `reaching_reg' of
2464 : : the expression into the result of the SET. It is left to later passes
2465 : : to propagate the copy or eliminate it.
2466 : :
2467 : : Return true if a change is made. */
2468 : :
2469 : : static bool
2470 : 396648 : pre_delete (void)
2471 : : {
2472 : 396648 : unsigned int i;
2473 : 396648 : bool changed = false;
2474 : 396648 : struct gcse_expr *expr;
2475 : 396648 : struct gcse_occr *occr;
2476 : :
2477 : 19175908 : for (i = 0; i < expr_hash_table.size; i++)
2478 : 27766630 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2479 : : {
2480 : 8987370 : int indx = expr->bitmap_index;
2481 : :
2482 : : /* We only need to search antic_occr since we require ANTLOC != 0. */
2483 : 15313922 : for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2484 : : {
2485 : 6326552 : rtx_insn *insn = occr->insn;
2486 : 6326552 : rtx set;
2487 : 6326552 : basic_block bb = BLOCK_FOR_INSN (insn);
2488 : :
2489 : : /* We only delete insns that have a single_set. */
2490 : 6326552 : if (bitmap_bit_p (pre_delete_map[bb->index], indx)
2491 : 899491 : && (set = single_set (insn)) != 0
2492 : 7226042 : && dbg_cnt (pre_insn))
2493 : : {
2494 : : /* Create a pseudo-reg to store the result of reaching
2495 : : expressions into. Get the mode for the new pseudo from
2496 : : the mode of the original destination pseudo. */
2497 : 899490 : if (expr->reaching_reg == NULL)
2498 : 342788 : expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2499 : :
2500 : 899490 : gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2501 : 899490 : delete_insn (insn);
2502 : 899490 : occr->deleted_p = 1;
2503 : 899490 : changed = true;
2504 : 899490 : gcse_subst_count++;
2505 : :
2506 : 899490 : if (dump_file)
2507 : : {
2508 : 8 : fprintf (dump_file,
2509 : : "PRE: redundant insn %d (expression %d) in ",
2510 : 8 : INSN_UID (insn), indx);
2511 : 8 : fprintf (dump_file, "bb %d, reaching reg is %d\n",
2512 : 8 : bb->index, REGNO (expr->reaching_reg));
2513 : : }
2514 : : }
2515 : : }
2516 : : }
2517 : :
2518 : 396648 : return changed;
2519 : : }
2520 : :
2521 : : /* Perform GCSE optimizations using PRE.
2522 : : This is called by one_pre_gcse_pass after all the dataflow analysis
2523 : : has been done.
2524 : :
2525 : : This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2526 : : lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2527 : : Compiler Design and Implementation.
2528 : :
2529 : : ??? A new pseudo reg is created to hold the reaching expression. The nice
2530 : : thing about the classical approach is that it would try to use an existing
2531 : : reg. If the register can't be adequately optimized [i.e. we introduce
2532 : : reload problems], one could add a pass here to propagate the new register
2533 : : through the block.
2534 : :
2535 : : ??? We don't handle single sets in PARALLELs because we're [currently] not
2536 : : able to copy the rest of the parallel when we insert copies to create full
2537 : : redundancies from partial redundancies. However, there's no reason why we
2538 : : can't handle PARALLELs in the cases where there are no partial
2539 : : redundancies. */
2540 : :
2541 : : static bool
2542 : 396648 : pre_gcse (struct edge_list *edge_list)
2543 : : {
2544 : 396648 : unsigned int i;
2545 : 396648 : bool did_insert, changed;
2546 : 396648 : struct gcse_expr **index_map;
2547 : 396648 : struct gcse_expr *expr;
2548 : :
2549 : : /* Compute a mapping from expression number (`bitmap_index') to
2550 : : hash table entry. */
2551 : :
2552 : 396648 : index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
2553 : 19572556 : for (i = 0; i < expr_hash_table.size; i++)
2554 : 27766630 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2555 : 8987370 : index_map[expr->bitmap_index] = expr;
2556 : :
2557 : : /* Delete the redundant insns first so that
2558 : : - we know what register to use for the new insns and for the other
2559 : : ones with reaching expressions
2560 : : - we know which insns are redundant when we go to create copies */
2561 : :
2562 : 396648 : changed = pre_delete ();
2563 : 396648 : did_insert = pre_edge_insert (edge_list, index_map);
2564 : :
2565 : : /* In other places with reaching expressions, copy the expression to the
2566 : : specially allocated pseudo-reg that reaches the redundant expr. */
2567 : 396648 : pre_insert_copies ();
2568 : 396648 : if (did_insert)
2569 : : {
2570 : 73030 : commit_edge_insertions ();
2571 : 73030 : changed = true;
2572 : : }
2573 : :
2574 : 396648 : free (index_map);
2575 : 396648 : return changed;
2576 : : }
2577 : :
2578 : : /* Top level routine to perform one PRE GCSE pass.
2579 : :
2580 : : Return true if a change was made. */
2581 : :
2582 : : static bool
2583 : 861249 : one_pre_gcse_pass (void)
2584 : : {
2585 : 861249 : bool changed = false;
2586 : :
2587 : 861249 : gcse_subst_count = 0;
2588 : 861249 : gcse_create_count = 0;
2589 : :
2590 : : /* Return if there's nothing to do, or it is too expensive. */
2591 : 861249 : if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
2592 : 861249 : || gcse_or_cprop_is_too_expensive (_("PRE disabled")))
2593 : 404668 : return false;
2594 : :
2595 : : /* We need alias. */
2596 : 456581 : init_alias_analysis ();
2597 : :
2598 : 456581 : bytes_used = 0;
2599 : 456581 : gcc_obstack_init (&gcse_obstack);
2600 : 456581 : alloc_gcse_mem ();
2601 : :
2602 : 456581 : alloc_hash_table (&expr_hash_table);
2603 : 456581 : add_noreturn_fake_exit_edges ();
2604 : 456581 : if (flag_gcse_lm)
2605 : 456579 : compute_ld_motion_mems ();
2606 : :
2607 : 456581 : compute_hash_table (&expr_hash_table);
2608 : 456581 : if (flag_gcse_lm)
2609 : 456579 : trim_ld_motion_mems ();
2610 : 456581 : if (dump_file)
2611 : 17 : dump_hash_table (dump_file, "Expression", &expr_hash_table);
2612 : :
2613 : 456581 : if (expr_hash_table.n_elems > 0)
2614 : : {
2615 : 396648 : struct edge_list *edge_list;
2616 : 396648 : alloc_pre_mem (last_basic_block_for_fn (cfun), expr_hash_table.n_elems);
2617 : 396648 : edge_list = compute_pre_data ();
2618 : 396648 : if (pre_gcse (edge_list))
2619 : : changed = true;
2620 : 396648 : free_edge_list (edge_list);
2621 : 396648 : free_pre_mem ();
2622 : : }
2623 : :
2624 : 456581 : if (flag_gcse_lm)
2625 : 456579 : free_ld_motion_mems ();
2626 : 456581 : remove_fake_exit_edges ();
2627 : 456581 : free_hash_table (&expr_hash_table);
2628 : :
2629 : 456581 : free_gcse_mem ();
2630 : 456581 : obstack_free (&gcse_obstack, NULL);
2631 : :
2632 : : /* We are finished with alias. */
2633 : 456581 : end_alias_analysis ();
2634 : :
2635 : 456581 : if (dump_file)
2636 : : {
2637 : 17 : fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2638 : 17 : current_function_name (), n_basic_blocks_for_fn (cfun),
2639 : : bytes_used);
2640 : 17 : fprintf (dump_file, "%d substs, %d insns created\n",
2641 : : gcse_subst_count, gcse_create_count);
2642 : : }
2643 : :
2644 : : return changed;
2645 : : }
2646 : :
2647 : : /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2648 : : to INSN. If such notes are added to an insn which references a
2649 : : CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
2650 : : that note, because the following loop optimization pass requires
2651 : : them. */
2652 : :
2653 : : /* ??? If there was a jump optimization pass after gcse and before loop,
2654 : : then we would not need to do this here, because jump would add the
2655 : : necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
2656 : :
2657 : : static void
2658 : 319421 : add_label_notes (rtx x, rtx_insn *insn)
2659 : : {
2660 : 319421 : enum rtx_code code = GET_CODE (x);
2661 : 319421 : int i, j;
2662 : 319421 : const char *fmt;
2663 : :
2664 : 319421 : if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2665 : : {
2666 : : /* This code used to ignore labels that referred to dispatch tables to
2667 : : avoid flow generating (slightly) worse code.
2668 : :
2669 : : We no longer ignore such label references (see LABEL_REF handling in
2670 : : mark_jump_label for additional information). */
2671 : :
2672 : : /* There's no reason for current users to emit jump-insns with
2673 : : such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2674 : : notes. */
2675 : 0 : gcc_assert (!JUMP_P (insn));
2676 : 0 : add_reg_note (insn, REG_LABEL_OPERAND, label_ref_label (x));
2677 : :
2678 : 0 : if (LABEL_P (label_ref_label (x)))
2679 : 0 : LABEL_NUSES (label_ref_label (x))++;
2680 : :
2681 : 0 : return;
2682 : : }
2683 : :
2684 : 773548 : for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2685 : : {
2686 : 454127 : if (fmt[i] == 'e')
2687 : 254480 : add_label_notes (XEXP (x, i), insn);
2688 : 199647 : else if (fmt[i] == 'E')
2689 : 85 : for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2690 : 56 : add_label_notes (XVECEXP (x, i, j), insn);
2691 : : }
2692 : : }
2693 : :
2694 : : /* Code Hoisting variables and subroutines. */
2695 : :
2696 : : /* Very busy expressions. */
2697 : : static sbitmap *hoist_vbein;
2698 : : static sbitmap *hoist_vbeout;
2699 : :
2700 : : /* ??? We could compute post dominators and run this algorithm in
2701 : : reverse to perform tail merging, doing so would probably be
2702 : : more effective than the tail merging code in jump.cc.
2703 : :
2704 : : It's unclear if tail merging could be run in parallel with
2705 : : code hoisting. It would be nice. */
2706 : :
2707 : : /* Allocate vars used for code hoisting analysis. */
2708 : :
2709 : : static void
2710 : 24429 : alloc_code_hoist_mem (int n_blocks, int n_exprs)
2711 : : {
2712 : 24429 : antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2713 : 24429 : transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2714 : 24429 : comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2715 : :
2716 : 24429 : hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2717 : 24429 : hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2718 : 24429 : }
2719 : :
2720 : : /* Free vars used for code hoisting analysis. */
2721 : :
2722 : : static void
2723 : 24429 : free_code_hoist_mem (void)
2724 : : {
2725 : 24429 : sbitmap_vector_free (antloc);
2726 : 24429 : sbitmap_vector_free (transp);
2727 : 24429 : sbitmap_vector_free (comp);
2728 : :
2729 : 24429 : sbitmap_vector_free (hoist_vbein);
2730 : 24429 : sbitmap_vector_free (hoist_vbeout);
2731 : :
2732 : 24429 : free_dominance_info (CDI_DOMINATORS);
2733 : 24429 : }
2734 : :
2735 : : /* Compute the very busy expressions at entry/exit from each block.
2736 : :
2737 : : An expression is very busy if all paths from a given point
2738 : : compute the expression. */
2739 : :
2740 : : static void
2741 : 24429 : compute_code_hoist_vbeinout (void)
2742 : : {
2743 : 24429 : int changed, passes;
2744 : 24429 : basic_block bb;
2745 : :
2746 : 24429 : bitmap_vector_clear (hoist_vbeout, last_basic_block_for_fn (cfun));
2747 : 24429 : bitmap_vector_clear (hoist_vbein, last_basic_block_for_fn (cfun));
2748 : :
2749 : 24429 : passes = 0;
2750 : 24429 : changed = 1;
2751 : :
2752 : 100168 : while (changed)
2753 : : {
2754 : 51310 : changed = 0;
2755 : :
2756 : : /* We scan the blocks in the reverse order to speed up
2757 : : the convergence. */
2758 : 839370 : FOR_EACH_BB_REVERSE_FN (bb, cfun)
2759 : : {
2760 : 788060 : if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
2761 : : {
2762 : 736750 : bitmap_intersection_of_succs (hoist_vbeout[bb->index],
2763 : : hoist_vbein, bb);
2764 : :
2765 : : /* Include expressions in VBEout that are calculated
2766 : : in BB and available at its end. */
2767 : 736750 : bitmap_ior (hoist_vbeout[bb->index],
2768 : 736750 : hoist_vbeout[bb->index], comp[bb->index]);
2769 : : }
2770 : :
2771 : 788060 : changed |= bitmap_or_and (hoist_vbein[bb->index],
2772 : 788060 : antloc[bb->index],
2773 : 788060 : hoist_vbeout[bb->index],
2774 : 788060 : transp[bb->index]);
2775 : : }
2776 : :
2777 : 51310 : passes++;
2778 : : }
2779 : :
2780 : 24429 : if (dump_file)
2781 : : {
2782 : 7 : fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2783 : :
2784 : 35 : FOR_EACH_BB_FN (bb, cfun)
2785 : : {
2786 : 28 : fprintf (dump_file, "vbein (%d): ", bb->index);
2787 : 28 : dump_bitmap_file (dump_file, hoist_vbein[bb->index]);
2788 : 28 : fprintf (dump_file, "vbeout(%d): ", bb->index);
2789 : 28 : dump_bitmap_file (dump_file, hoist_vbeout[bb->index]);
2790 : : }
2791 : : }
2792 : 24429 : }
2793 : :
2794 : : /* Top level routine to do the dataflow analysis needed by code hoisting. */
2795 : :
2796 : : static void
2797 : 24429 : compute_code_hoist_data (void)
2798 : : {
2799 : 24429 : compute_local_properties (transp, comp, antloc, &expr_hash_table);
2800 : 24429 : prune_expressions (false);
2801 : 24429 : compute_code_hoist_vbeinout ();
2802 : 24429 : calculate_dominance_info (CDI_DOMINATORS);
2803 : 24429 : if (dump_file)
2804 : 7 : fprintf (dump_file, "\n");
2805 : 24429 : }
2806 : :
2807 : : /* Update register pressure for BB when hoisting an expression from
2808 : : instruction FROM, if live ranges of inputs are shrunk. Also
2809 : : maintain live_in information if live range of register referred
2810 : : in FROM is shrunk.
2811 : :
2812 : : Return 0 if register pressure doesn't change, otherwise return
2813 : : the number by which register pressure is decreased.
2814 : :
2815 : : NOTE: Register pressure won't be increased in this function. */
2816 : :
2817 : : static int
2818 : 12239321 : update_bb_reg_pressure (basic_block bb, rtx_insn *from)
2819 : : {
2820 : 12239321 : rtx dreg;
2821 : 12239321 : rtx_insn *insn;
2822 : 12239321 : basic_block succ_bb;
2823 : 12239321 : df_ref use, op_ref;
2824 : 12239321 : edge succ;
2825 : 12239321 : edge_iterator ei;
2826 : 12239321 : int decreased_pressure = 0;
2827 : 12239321 : int nregs;
2828 : 12239321 : enum reg_class pressure_class;
2829 : :
2830 : 14884283 : FOR_EACH_INSN_USE (use, from)
2831 : : {
2832 : 2644962 : dreg = DF_REF_REAL_REG (use);
2833 : : /* The live range of register is shrunk only if it isn't:
2834 : : 1. referred on any path from the end of this block to EXIT, or
2835 : : 2. referred by insns other than FROM in this block. */
2836 : 3698372 : FOR_EACH_EDGE (succ, ei, bb->succs)
2837 : : {
2838 : 3141841 : succ_bb = succ->dest;
2839 : 3141841 : if (succ_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2840 : 3717 : continue;
2841 : :
2842 : 3138124 : if (bitmap_bit_p (BB_DATA (succ_bb)->live_in, REGNO (dreg)))
2843 : : break;
2844 : : }
2845 : 2644962 : if (succ != NULL)
2846 : 2088431 : continue;
2847 : :
2848 : 556531 : op_ref = DF_REG_USE_CHAIN (REGNO (dreg));
2849 : 1769932 : for (; op_ref; op_ref = DF_REF_NEXT_REG (op_ref))
2850 : : {
2851 : 1226149 : if (!DF_REF_INSN_INFO (op_ref))
2852 : 177131 : continue;
2853 : :
2854 : 1049018 : insn = DF_REF_INSN (op_ref);
2855 : 1049018 : if (BLOCK_FOR_INSN (insn) == bb
2856 : 1049018 : && NONDEBUG_INSN_P (insn) && insn != from)
2857 : : break;
2858 : : }
2859 : :
2860 : 556531 : pressure_class = get_regno_pressure_class (REGNO (dreg), &nregs);
2861 : : /* Decrease register pressure and update live_in information for
2862 : : this block. */
2863 : 556531 : if (!op_ref && pressure_class != NO_REGS)
2864 : : {
2865 : 543133 : decreased_pressure += nregs;
2866 : 543133 : BB_DATA (bb)->max_reg_pressure[pressure_class] -= nregs;
2867 : 543133 : bitmap_clear_bit (BB_DATA (bb)->live_in, REGNO (dreg));
2868 : : }
2869 : : }
2870 : 12239321 : return decreased_pressure;
2871 : : }
2872 : :
2873 : : /* Determine if the expression EXPR should be hoisted to EXPR_BB up in
2874 : : flow graph, if it can reach BB unimpared. Stop the search if the
2875 : : expression would need to be moved more than DISTANCE instructions.
2876 : :
2877 : : DISTANCE is the number of instructions through which EXPR can be
2878 : : hoisted up in flow graph.
2879 : :
2880 : : BB_SIZE points to an array which contains the number of instructions
2881 : : for each basic block.
2882 : :
2883 : : PRESSURE_CLASS and NREGS are register class and number of hard registers
2884 : : for storing EXPR.
2885 : :
2886 : : HOISTED_BBS points to a bitmap indicating basic blocks through which
2887 : : EXPR is hoisted.
2888 : :
2889 : : FROM is the instruction from which EXPR is hoisted.
2890 : :
2891 : : It's unclear exactly what Muchnick meant by "unimpared". It seems
2892 : : to me that the expression must either be computed or transparent in
2893 : : *every* block in the path(s) from EXPR_BB to BB. Any other definition
2894 : : would allow the expression to be hoisted out of loops, even if
2895 : : the expression wasn't a loop invariant.
2896 : :
2897 : : Contrast this to reachability for PRE where an expression is
2898 : : considered reachable if *any* path reaches instead of *all*
2899 : : paths. */
2900 : :
2901 : : static bool
2902 : 12239321 : should_hoist_expr_to_dom (basic_block expr_bb, struct gcse_expr *expr,
2903 : : basic_block bb, sbitmap visited,
2904 : : HOST_WIDE_INT distance,
2905 : : int *bb_size, enum reg_class pressure_class,
2906 : : int *nregs, bitmap hoisted_bbs, rtx_insn *from)
2907 : : {
2908 : 12239321 : unsigned int i;
2909 : 12239321 : edge pred;
2910 : 12239321 : edge_iterator ei;
2911 : 12239321 : sbitmap_iterator sbi;
2912 : 12239321 : bool visited_allocated_locally = false;
2913 : 12239321 : int decreased_pressure = 0;
2914 : :
2915 : 12239321 : if (flag_ira_hoist_pressure)
2916 : : {
2917 : : /* Record old information of basic block BB when it is visited
2918 : : at the first time. */
2919 : 12239321 : if (!bitmap_bit_p (hoisted_bbs, bb->index))
2920 : : {
2921 : 6700871 : struct bb_data *data = BB_DATA (bb);
2922 : 6700871 : bitmap_copy (data->backup, data->live_in);
2923 : 6700871 : data->old_pressure = data->max_reg_pressure[pressure_class];
2924 : : }
2925 : 12239321 : decreased_pressure = update_bb_reg_pressure (bb, from);
2926 : : }
2927 : : /* Terminate the search if distance, for which EXPR is allowed to move,
2928 : : is exhausted. */
2929 : 12239321 : if (distance > 0)
2930 : : {
2931 : 12236927 : if (flag_ira_hoist_pressure)
2932 : : {
2933 : : /* Prefer to hoist EXPR if register pressure is decreased. */
2934 : 12236927 : if (decreased_pressure > *nregs)
2935 : 2097 : distance += bb_size[bb->index];
2936 : : /* Let EXPR be hoisted through basic block at no cost if one
2937 : : of following conditions is satisfied:
2938 : :
2939 : : 1. The basic block has low register pressure.
2940 : : 2. Register pressure won't be increases after hoisting EXPR.
2941 : :
2942 : : Constant expressions is handled conservatively, because
2943 : : hoisting constant expression aggressively results in worse
2944 : : code. This decision is made by the observation of CSiBE
2945 : : on ARM target, while it has no obvious effect on other
2946 : : targets like x86, x86_64, mips and powerpc. */
2947 : 12234830 : else if (CONST_INT_P (expr->expr)
2948 : 12087729 : || (BB_DATA (bb)->max_reg_pressure[pressure_class]
2949 : 12087729 : >= ira_class_hard_regs_num[pressure_class]
2950 : 127704 : && decreased_pressure < *nregs))
2951 : 272585 : distance -= bb_size[bb->index];
2952 : : }
2953 : : else
2954 : 0 : distance -= bb_size[bb->index];
2955 : :
2956 : 12236927 : if (distance <= 0)
2957 : : return 0;
2958 : : }
2959 : : else
2960 : 2394 : gcc_assert (distance == 0);
2961 : :
2962 : 12079202 : if (visited == NULL)
2963 : : {
2964 : 588427 : visited_allocated_locally = true;
2965 : 588427 : visited = sbitmap_alloc (last_basic_block_for_fn (cfun));
2966 : 588427 : bitmap_clear (visited);
2967 : : }
2968 : :
2969 : 27272396 : FOR_EACH_EDGE (pred, ei, bb->preds)
2970 : : {
2971 : 16082885 : basic_block pred_bb = pred->src;
2972 : :
2973 : 16082885 : if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
2974 : : break;
2975 : 16082885 : else if (pred_bb == expr_bb)
2976 : 608012 : continue;
2977 : 15474873 : else if (bitmap_bit_p (visited, pred_bb->index))
2978 : 3850384 : continue;
2979 : 11624489 : else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
2980 : : break;
2981 : : /* Not killed. */
2982 : : else
2983 : : {
2984 : 11588090 : bitmap_set_bit (visited, pred_bb->index);
2985 : 11588090 : if (! should_hoist_expr_to_dom (expr_bb, expr, pred_bb,
2986 : : visited, distance, bb_size,
2987 : : pressure_class, nregs,
2988 : : hoisted_bbs, from))
2989 : : break;
2990 : : }
2991 : : }
2992 : 12079202 : if (visited_allocated_locally)
2993 : : {
2994 : : /* If EXPR can be hoisted to expr_bb, record basic blocks through
2995 : : which EXPR is hoisted in hoisted_bbs. */
2996 : 588427 : if (flag_ira_hoist_pressure && !pred)
2997 : : {
2998 : : /* Record the basic block from which EXPR is hoisted. */
2999 : 454713 : bitmap_set_bit (visited, bb->index);
3000 : 12037191 : EXECUTE_IF_SET_IN_BITMAP (visited, 0, i, sbi)
3001 : 11127765 : bitmap_set_bit (hoisted_bbs, i);
3002 : : }
3003 : 588427 : sbitmap_free (visited);
3004 : : }
3005 : :
3006 : 12079202 : return (pred == NULL);
3007 : : }
3008 : :
3009 : : /* Find occurrence in BB. */
3010 : :
3011 : : static struct gcse_occr *
3012 : 1166870 : find_occr_in_bb (struct gcse_occr *occr, basic_block bb)
3013 : : {
3014 : : /* Find the right occurrence of this expression. */
3015 : 15591758 : while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
3016 : 14424888 : occr = occr->next;
3017 : :
3018 : 1166870 : return occr;
3019 : : }
3020 : :
3021 : : /* Actually perform code hoisting.
3022 : :
3023 : : The code hoisting pass can hoist multiple computations of the same
3024 : : expression along dominated path to a dominating basic block, like
3025 : : from b2/b3 to b1 as depicted below:
3026 : :
3027 : : b1 ------
3028 : : /\ |
3029 : : / \ |
3030 : : bx by distance
3031 : : / \ |
3032 : : / \ |
3033 : : b2 b3 ------
3034 : :
3035 : : Unfortunately code hoisting generally extends the live range of an
3036 : : output pseudo register, which increases register pressure and hurts
3037 : : register allocation. To address this issue, an attribute MAX_DISTANCE
3038 : : is computed and attached to each expression. The attribute is computed
3039 : : from rtx cost of the corresponding expression and it's used to control
3040 : : how long the expression can be hoisted up in flow graph. As the
3041 : : expression is hoisted up in flow graph, GCC decreases its DISTANCE
3042 : : and stops the hoist if DISTANCE reaches 0. Code hoisting can decrease
3043 : : register pressure if live ranges of inputs are shrunk.
3044 : :
3045 : : Option "-fira-hoist-pressure" implements register pressure directed
3046 : : hoist based on upper method. The rationale is:
3047 : : 1. Calculate register pressure for each basic block by reusing IRA
3048 : : facility.
3049 : : 2. When expression is hoisted through one basic block, GCC checks
3050 : : the change of live ranges for inputs/output. The basic block's
3051 : : register pressure will be increased because of extended live
3052 : : range of output. However, register pressure will be decreased
3053 : : if the live ranges of inputs are shrunk.
3054 : : 3. After knowing how hoisting affects register pressure, GCC prefers
3055 : : to hoist the expression if it can decrease register pressure, by
3056 : : increasing DISTANCE of the corresponding expression.
3057 : : 4. If hoisting the expression increases register pressure, GCC checks
3058 : : register pressure of the basic block and decrease DISTANCE only if
3059 : : the register pressure is high. In other words, expression will be
3060 : : hoisted through at no cost if the basic block has low register
3061 : : pressure.
3062 : : 5. Update register pressure information for basic blocks through
3063 : : which expression is hoisted. */
3064 : :
3065 : : static bool
3066 : 24429 : hoist_code (void)
3067 : : {
3068 : 24429 : basic_block bb, dominated;
3069 : 24429 : unsigned int dom_tree_walk_index;
3070 : 24429 : unsigned int i, j, k;
3071 : 24429 : struct gcse_expr **index_map;
3072 : 24429 : struct gcse_expr *expr;
3073 : 24429 : int *to_bb_head;
3074 : 24429 : int *bb_size;
3075 : 24429 : bool changed = false;
3076 : 24429 : struct bb_data *data;
3077 : : /* Basic blocks that have occurrences reachable from BB. */
3078 : 24429 : bitmap from_bbs;
3079 : : /* Basic blocks through which expr is hoisted. */
3080 : 24429 : bitmap hoisted_bbs = NULL;
3081 : 24429 : bitmap_iterator bi;
3082 : :
3083 : : /* Compute a mapping from expression number (`bitmap_index') to
3084 : : hash table entry. */
3085 : :
3086 : 24429 : index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
3087 : 809155 : for (i = 0; i < expr_hash_table.size; i++)
3088 : 1035104 : for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
3089 : 274807 : index_map[expr->bitmap_index] = expr;
3090 : :
3091 : : /* Calculate sizes of basic blocks and note how far
3092 : : each instruction is from the start of its block. We then use this
3093 : : data to restrict distance an expression can travel. */
3094 : :
3095 : 24429 : to_bb_head = XCNEWVEC (int, get_max_uid ());
3096 : 24429 : bb_size = XCNEWVEC (int, last_basic_block_for_fn (cfun));
3097 : :
3098 : 323851 : FOR_EACH_BB_FN (bb, cfun)
3099 : : {
3100 : 299422 : rtx_insn *insn;
3101 : 299422 : int to_head;
3102 : :
3103 : 299422 : to_head = 0;
3104 : 2553053 : FOR_BB_INSNS (bb, insn)
3105 : : {
3106 : : /* Don't count debug instructions to avoid them affecting
3107 : : decision choices. */
3108 : 2253631 : if (NONDEBUG_INSN_P (insn))
3109 : 1706987 : to_bb_head[INSN_UID (insn)] = to_head++;
3110 : : }
3111 : :
3112 : 299422 : bb_size[bb->index] = to_head;
3113 : : }
3114 : :
3115 : 24429 : gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) == 1
3116 : : && (EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0)->dest
3117 : : == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb));
3118 : :
3119 : 24429 : from_bbs = BITMAP_ALLOC (NULL);
3120 : 24429 : if (flag_ira_hoist_pressure)
3121 : 24429 : hoisted_bbs = BITMAP_ALLOC (NULL);
3122 : :
3123 : 24429 : auto_vec<basic_block> dom_tree_walk
3124 : : = get_all_dominated_blocks (CDI_DOMINATORS,
3125 : 24429 : ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb);
3126 : :
3127 : : /* Walk over each basic block looking for potentially hoistable
3128 : : expressions, nothing gets hoisted from the entry block. */
3129 : 323851 : FOR_EACH_VEC_ELT (dom_tree_walk, dom_tree_walk_index, bb)
3130 : : {
3131 : 299422 : auto_vec<basic_block> domby
3132 : 299422 : = get_dominated_to_depth (CDI_DOMINATORS, bb, param_max_hoist_depth);
3133 : :
3134 : 299422 : if (domby.length () == 0)
3135 : 0 : continue;
3136 : :
3137 : : /* Examine each expression that is very busy at the exit of this
3138 : : block. These are the potentially hoistable expressions. */
3139 : 44266513 : for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++)
3140 : : {
3141 : 43967091 : if (bitmap_bit_p (hoist_vbeout[bb->index], i))
3142 : : {
3143 : 3466099 : int nregs = 0;
3144 : 3466099 : enum reg_class pressure_class = NO_REGS;
3145 : : /* Current expression. */
3146 : 3466099 : struct gcse_expr *expr = index_map[i];
3147 : : /* Number of occurrences of EXPR that can be hoisted to BB. */
3148 : 3466099 : int hoistable = 0;
3149 : : /* Occurrences reachable from BB. */
3150 : 3466099 : vec<occr_t> occrs_to_hoist = vNULL;
3151 : : /* We want to insert the expression into BB only once, so
3152 : : note when we've inserted it. */
3153 : 3466099 : int insn_inserted_p;
3154 : 3466099 : occr_t occr;
3155 : :
3156 : : /* If an expression is computed in BB and is available at end of
3157 : : BB, hoist all occurrences dominated by BB to BB. */
3158 : 3466099 : if (bitmap_bit_p (comp[bb->index], i))
3159 : : {
3160 : 246024 : occr = find_occr_in_bb (expr->antic_occr, bb);
3161 : :
3162 : 246024 : if (occr)
3163 : : {
3164 : : /* An occurrence might've been already deleted
3165 : : while processing a dominator of BB. */
3166 : 145273 : if (!occr->deleted_p)
3167 : : {
3168 : 113488 : gcc_assert (NONDEBUG_INSN_P (occr->insn));
3169 : : hoistable++;
3170 : : }
3171 : : }
3172 : : else
3173 : : hoistable++;
3174 : : }
3175 : :
3176 : : /* We've found a potentially hoistable expression, now
3177 : : we look at every block BB dominates to see if it
3178 : : computes the expression. */
3179 : 44004811 : FOR_EACH_VEC_ELT (domby, j, dominated)
3180 : : {
3181 : 40538712 : HOST_WIDE_INT max_distance;
3182 : :
3183 : : /* Ignore self dominance. */
3184 : 40538712 : if (bb == dominated)
3185 : 3466099 : continue;
3186 : : /* We've found a dominated block, now see if it computes
3187 : : the busy expression and whether or not moving that
3188 : : expression to the "beginning" of that block is safe. */
3189 : 37072613 : if (!bitmap_bit_p (antloc[dominated->index], i))
3190 : 36151767 : continue;
3191 : :
3192 : 920846 : occr = find_occr_in_bb (expr->antic_occr, dominated);
3193 : 920846 : gcc_assert (occr);
3194 : :
3195 : : /* An occurrence might've been already deleted
3196 : : while processing a dominator of BB. */
3197 : 920846 : if (occr->deleted_p)
3198 : 269615 : continue;
3199 : 651231 : gcc_assert (NONDEBUG_INSN_P (occr->insn));
3200 : :
3201 : 651231 : max_distance = expr->max_distance;
3202 : 651231 : if (max_distance > 0)
3203 : : /* Adjust MAX_DISTANCE to account for the fact that
3204 : : OCCR won't have to travel all of DOMINATED, but
3205 : : only part of it. */
3206 : 1300180 : max_distance += (bb_size[dominated->index]
3207 : 650090 : - to_bb_head[INSN_UID (occr->insn)]);
3208 : :
3209 : 651231 : pressure_class = get_pressure_class_and_nregs (occr->insn,
3210 : : &nregs);
3211 : :
3212 : : /* Note if the expression should be hoisted from the dominated
3213 : : block to BB if it can reach DOMINATED unimpared.
3214 : :
3215 : : Keep track of how many times this expression is hoistable
3216 : : from a dominated block into BB. */
3217 : 651231 : if (should_hoist_expr_to_dom (bb, expr, dominated, NULL,
3218 : : max_distance, bb_size,
3219 : : pressure_class, &nregs,
3220 : : hoisted_bbs, occr->insn))
3221 : : {
3222 : 454713 : hoistable++;
3223 : 454713 : occrs_to_hoist.safe_push (occr);
3224 : 454713 : bitmap_set_bit (from_bbs, dominated->index);
3225 : : }
3226 : : }
3227 : :
3228 : : /* If we found more than one hoistable occurrence of this
3229 : : expression, then note it in the vector of expressions to
3230 : : hoist. It makes no sense to hoist things which are computed
3231 : : in only one BB, and doing so tends to pessimize register
3232 : : allocation. One could increase this value to try harder
3233 : : to avoid any possible code expansion due to register
3234 : : allocation issues; however experiments have shown that
3235 : : the vast majority of hoistable expressions are only movable
3236 : : from two successors, so raising this threshold is likely
3237 : : to nullify any benefit we get from code hoisting. */
3238 : 3466099 : if (hoistable > 1 && dbg_cnt (hoist_insn))
3239 : : {
3240 : : /* If (hoistable != vec::length), then there is
3241 : : an occurrence of EXPR in BB itself. Don't waste
3242 : : time looking for LCA in this case. */
3243 : 189442 : if ((unsigned) hoistable == occrs_to_hoist.length ())
3244 : : {
3245 : 83157 : basic_block lca;
3246 : :
3247 : 83157 : lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3248 : : from_bbs);
3249 : 83157 : if (lca != bb)
3250 : : /* Punt, it's better to hoist these occurrences to
3251 : : LCA. */
3252 : 82190 : occrs_to_hoist.release ();
3253 : : }
3254 : : }
3255 : : else
3256 : : /* Punt, no point hoisting a single occurrence. */
3257 : 3371378 : occrs_to_hoist.release ();
3258 : :
3259 : 3466099 : if (flag_ira_hoist_pressure
3260 : 3466099 : && !occrs_to_hoist.is_empty ())
3261 : : {
3262 : : /* Increase register pressure of basic blocks to which
3263 : : expr is hoisted because of extended live range of
3264 : : output. */
3265 : 12531 : data = BB_DATA (bb);
3266 : 12531 : data->max_reg_pressure[pressure_class] += nregs;
3267 : 281634 : EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3268 : : {
3269 : 269103 : data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
3270 : 269103 : data->max_reg_pressure[pressure_class] += nregs;
3271 : : }
3272 : : }
3273 : 3453568 : else if (flag_ira_hoist_pressure)
3274 : : {
3275 : : /* Restore register pressure and live_in info for basic
3276 : : blocks recorded in hoisted_bbs when expr will not be
3277 : : hoisted. */
3278 : 8784957 : EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3279 : : {
3280 : 5331389 : data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
3281 : 5331389 : bitmap_copy (data->live_in, data->backup);
3282 : 5331389 : data->max_reg_pressure[pressure_class]
3283 : 5331389 : = data->old_pressure;
3284 : : }
3285 : : }
3286 : :
3287 : 3466099 : if (flag_ira_hoist_pressure)
3288 : 3466099 : bitmap_clear (hoisted_bbs);
3289 : :
3290 : : insn_inserted_p = 0;
3291 : :
3292 : : /* Walk through occurrences of I'th expressions we want
3293 : : to hoist to BB and make the transformations. */
3294 : 3498644 : FOR_EACH_VEC_ELT (occrs_to_hoist, j, occr)
3295 : : {
3296 : 32545 : rtx_insn *insn;
3297 : 32545 : const_rtx set;
3298 : :
3299 : 32545 : gcc_assert (!occr->deleted_p);
3300 : :
3301 : 32545 : insn = occr->insn;
3302 : 32545 : set = single_set_gcse (insn);
3303 : :
3304 : : /* Create a pseudo-reg to store the result of reaching
3305 : : expressions into. Get the mode for the new pseudo
3306 : : from the mode of the original destination pseudo.
3307 : :
3308 : : It is important to use new pseudos whenever we
3309 : : emit a set. This will allow reload to use
3310 : : rematerialization for such registers. */
3311 : 32545 : if (!insn_inserted_p)
3312 : 12531 : expr->reaching_reg
3313 : 12531 : = gen_reg_rtx_and_attrs (SET_DEST (set));
3314 : :
3315 : 32545 : gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3316 : : insn);
3317 : 32545 : delete_insn (insn);
3318 : 32545 : occr->deleted_p = 1;
3319 : 32545 : changed = true;
3320 : 32545 : gcse_subst_count++;
3321 : :
3322 : 32545 : if (!insn_inserted_p)
3323 : : {
3324 : 12531 : insert_insn_end_basic_block (expr, bb);
3325 : 12531 : insn_inserted_p = 1;
3326 : : }
3327 : : }
3328 : :
3329 : 3466099 : occrs_to_hoist.release ();
3330 : 3466099 : bitmap_clear (from_bbs);
3331 : : }
3332 : : }
3333 : 299422 : }
3334 : :
3335 : 24429 : BITMAP_FREE (from_bbs);
3336 : 24429 : if (flag_ira_hoist_pressure)
3337 : 24429 : BITMAP_FREE (hoisted_bbs);
3338 : :
3339 : 24429 : free (bb_size);
3340 : 24429 : free (to_bb_head);
3341 : 24429 : free (index_map);
3342 : :
3343 : 24429 : return changed;
3344 : 24429 : }
3345 : :
3346 : : /* Return pressure class and number of needed hard registers (through
3347 : : *NREGS) of register REGNO. */
3348 : : static enum reg_class
3349 : 5459060 : get_regno_pressure_class (int regno, int *nregs)
3350 : : {
3351 : 5459060 : if (regno >= FIRST_PSEUDO_REGISTER)
3352 : : {
3353 : 3057556 : enum reg_class pressure_class;
3354 : :
3355 : 3057556 : pressure_class = reg_allocno_class (regno);
3356 : 3057556 : pressure_class = ira_pressure_class_translate[pressure_class];
3357 : 3057556 : *nregs
3358 : 3057556 : = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
3359 : 3057556 : return pressure_class;
3360 : : }
3361 : 2401504 : else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
3362 : 2401504 : && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
3363 : : {
3364 : 727043 : *nregs = 1;
3365 : 727043 : return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
3366 : : }
3367 : : else
3368 : : {
3369 : 1674461 : *nregs = 0;
3370 : 1674461 : return NO_REGS;
3371 : : }
3372 : : }
3373 : :
3374 : : /* Return pressure class and number of hard registers (through *NREGS)
3375 : : for destination of INSN. */
3376 : : static enum reg_class
3377 : 651231 : get_pressure_class_and_nregs (rtx_insn *insn, int *nregs)
3378 : : {
3379 : 651231 : rtx reg;
3380 : 651231 : enum reg_class pressure_class;
3381 : 651231 : const_rtx set = single_set_gcse (insn);
3382 : :
3383 : 651231 : reg = SET_DEST (set);
3384 : 651231 : if (GET_CODE (reg) == SUBREG)
3385 : 0 : reg = SUBREG_REG (reg);
3386 : 651231 : if (MEM_P (reg))
3387 : : {
3388 : 0 : *nregs = 0;
3389 : 0 : pressure_class = NO_REGS;
3390 : : }
3391 : : else
3392 : : {
3393 : 651231 : gcc_assert (REG_P (reg));
3394 : 651231 : pressure_class = reg_allocno_class (REGNO (reg));
3395 : 651231 : pressure_class = ira_pressure_class_translate[pressure_class];
3396 : 651231 : *nregs
3397 : 651231 : = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))];
3398 : : }
3399 : 651231 : return pressure_class;
3400 : : }
3401 : :
3402 : : /* Increase (if INCR_P) or decrease current register pressure for
3403 : : register REGNO. */
3404 : : static void
3405 : 4902529 : change_pressure (int regno, bool incr_p)
3406 : : {
3407 : 4902529 : int nregs;
3408 : 4902529 : enum reg_class pressure_class;
3409 : :
3410 : 4902529 : pressure_class = get_regno_pressure_class (regno, &nregs);
3411 : 4902529 : if (! incr_p)
3412 : 1186793 : curr_reg_pressure[pressure_class] -= nregs;
3413 : : else
3414 : : {
3415 : 3715736 : curr_reg_pressure[pressure_class] += nregs;
3416 : 3715736 : if (BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3417 : : < curr_reg_pressure[pressure_class])
3418 : 1700964 : BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3419 : 1700964 : = curr_reg_pressure[pressure_class];
3420 : : }
3421 : 4902529 : }
3422 : :
3423 : : /* Calculate register pressure for each basic block by walking insns
3424 : : from last to first. */
3425 : : static void
3426 : 27800 : calculate_bb_reg_pressure (void)
3427 : : {
3428 : 27800 : int i;
3429 : 27800 : unsigned int j;
3430 : 27800 : rtx_insn *insn;
3431 : 27800 : basic_block bb;
3432 : 27800 : bitmap curr_regs_live;
3433 : 27800 : bitmap_iterator bi;
3434 : :
3435 : :
3436 : 27800 : ira_setup_eliminable_regset ();
3437 : 27800 : curr_regs_live = BITMAP_ALLOC (®_obstack);
3438 : 342120 : FOR_EACH_BB_FN (bb, cfun)
3439 : : {
3440 : 314320 : curr_bb = bb;
3441 : 314320 : BB_DATA (bb)->live_in = BITMAP_ALLOC (NULL);
3442 : 314320 : BB_DATA (bb)->backup = BITMAP_ALLOC (NULL);
3443 : 314320 : bitmap_copy (BB_DATA (bb)->live_in, df_get_live_in (bb));
3444 : 314320 : bitmap_copy (curr_regs_live, df_get_live_out (bb));
3445 : 1888053 : for (i = 0; i < ira_pressure_classes_num; i++)
3446 : 1259413 : curr_reg_pressure[ira_pressure_classes[i]] = 0;
3447 : 2883687 : EXECUTE_IF_SET_IN_BITMAP (curr_regs_live, 0, j, bi)
3448 : 2569367 : change_pressure (j, true);
3449 : :
3450 : 2643998 : FOR_BB_INSNS_REVERSE (bb, insn)
3451 : : {
3452 : 2329678 : rtx dreg;
3453 : 2329678 : int regno;
3454 : 2329678 : df_ref def, use;
3455 : :
3456 : 2329678 : if (! NONDEBUG_INSN_P (insn))
3457 : 575752 : continue;
3458 : :
3459 : 15212037 : FOR_EACH_INSN_DEF (def, insn)
3460 : : {
3461 : 13458111 : dreg = DF_REF_REAL_REG (def);
3462 : 13458111 : gcc_assert (REG_P (dreg));
3463 : 13458111 : regno = REGNO (dreg);
3464 : 13458111 : if (!(DF_REF_FLAGS (def)
3465 : : & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
3466 : : {
3467 : 13456115 : if (bitmap_clear_bit (curr_regs_live, regno))
3468 : 1186793 : change_pressure (regno, false);
3469 : : }
3470 : : }
3471 : :
3472 : 3789572 : FOR_EACH_INSN_USE (use, insn)
3473 : : {
3474 : 2035646 : dreg = DF_REF_REAL_REG (use);
3475 : 2035646 : gcc_assert (REG_P (dreg));
3476 : 2035646 : regno = REGNO (dreg);
3477 : 2035646 : if (bitmap_set_bit (curr_regs_live, regno))
3478 : 1146369 : change_pressure (regno, true);
3479 : : }
3480 : : }
3481 : : }
3482 : 27800 : BITMAP_FREE (curr_regs_live);
3483 : :
3484 : 27800 : if (dump_file == NULL)
3485 : 27793 : return;
3486 : :
3487 : 7 : fprintf (dump_file, "\nRegister Pressure: \n");
3488 : 35 : FOR_EACH_BB_FN (bb, cfun)
3489 : : {
3490 : 28 : fprintf (dump_file, " Basic block %d: \n", bb->index);
3491 : 140 : for (i = 0; (int) i < ira_pressure_classes_num; i++)
3492 : : {
3493 : 112 : enum reg_class pressure_class;
3494 : :
3495 : 112 : pressure_class = ira_pressure_classes[i];
3496 : 112 : if (BB_DATA (bb)->max_reg_pressure[pressure_class] == 0)
3497 : 84 : continue;
3498 : :
3499 : 28 : fprintf (dump_file, " %s=%d\n", reg_class_names[pressure_class],
3500 : : BB_DATA (bb)->max_reg_pressure[pressure_class]);
3501 : : }
3502 : : }
3503 : 7 : fprintf (dump_file, "\n");
3504 : : }
3505 : :
3506 : : /* Top level routine to perform one code hoisting (aka unification) pass
3507 : :
3508 : : Return true if a change was made. */
3509 : :
3510 : : static bool
3511 : 60409 : one_code_hoisting_pass (void)
3512 : : {
3513 : 60409 : bool changed = false;
3514 : :
3515 : 60409 : gcse_subst_count = 0;
3516 : 60409 : gcse_create_count = 0;
3517 : :
3518 : : /* Return if there's nothing to do, or it is too expensive. */
3519 : 60409 : if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
3520 : 60409 : || gcse_or_cprop_is_too_expensive (_("GCSE disabled")))
3521 : 32609 : return false;
3522 : :
3523 : 27800 : doing_code_hoisting_p = true;
3524 : :
3525 : : /* Calculate register pressure for each basic block. */
3526 : 27800 : if (flag_ira_hoist_pressure)
3527 : : {
3528 : 27800 : regstat_init_n_sets_and_refs ();
3529 : 27800 : ira_set_pseudo_classes (false, dump_file);
3530 : 27800 : alloc_aux_for_blocks (sizeof (struct bb_data));
3531 : 27800 : calculate_bb_reg_pressure ();
3532 : 27800 : regstat_free_n_sets_and_refs ();
3533 : : }
3534 : :
3535 : : /* We need alias. */
3536 : 27800 : init_alias_analysis ();
3537 : :
3538 : 27800 : bytes_used = 0;
3539 : 27800 : gcc_obstack_init (&gcse_obstack);
3540 : 27800 : alloc_gcse_mem ();
3541 : :
3542 : 27800 : alloc_hash_table (&expr_hash_table);
3543 : 27800 : compute_hash_table (&expr_hash_table);
3544 : 27800 : if (dump_file)
3545 : 7 : dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3546 : :
3547 : 27800 : if (expr_hash_table.n_elems > 0)
3548 : : {
3549 : 24429 : alloc_code_hoist_mem (last_basic_block_for_fn (cfun),
3550 : : expr_hash_table.n_elems);
3551 : 24429 : compute_code_hoist_data ();
3552 : 24429 : changed = hoist_code ();
3553 : 24429 : free_code_hoist_mem ();
3554 : : }
3555 : :
3556 : 27800 : if (flag_ira_hoist_pressure)
3557 : : {
3558 : 27800 : free_aux_for_blocks ();
3559 : 27800 : free_reg_info ();
3560 : : }
3561 : 27800 : free_hash_table (&expr_hash_table);
3562 : 27800 : free_gcse_mem ();
3563 : 27800 : obstack_free (&gcse_obstack, NULL);
3564 : :
3565 : : /* We are finished with alias. */
3566 : 27800 : end_alias_analysis ();
3567 : :
3568 : 27800 : if (dump_file)
3569 : : {
3570 : 7 : fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3571 : 7 : current_function_name (), n_basic_blocks_for_fn (cfun),
3572 : : bytes_used);
3573 : 7 : fprintf (dump_file, "%d substs, %d insns created\n",
3574 : : gcse_subst_count, gcse_create_count);
3575 : : }
3576 : :
3577 : 27800 : doing_code_hoisting_p = false;
3578 : :
3579 : 27800 : return changed;
3580 : : }
3581 : :
3582 : : /* Here we provide the things required to do store motion towards the exit.
3583 : : In order for this to be effective, gcse also needed to be taught how to
3584 : : move a load when it is killed only by a store to itself.
3585 : :
3586 : : int i;
3587 : : float a[10];
3588 : :
3589 : : void foo(float scale)
3590 : : {
3591 : : for (i=0; i<10; i++)
3592 : : a[i] *= scale;
3593 : : }
3594 : :
3595 : : 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3596 : : the load out since its live around the loop, and stored at the bottom
3597 : : of the loop.
3598 : :
3599 : : The 'Load Motion' referred to and implemented in this file is
3600 : : an enhancement to gcse which when using edge based LCM, recognizes
3601 : : this situation and allows gcse to move the load out of the loop.
3602 : :
3603 : : Once gcse has hoisted the load, store motion can then push this
3604 : : load towards the exit, and we end up with no loads or stores of 'i'
3605 : : in the loop. */
3606 : :
3607 : : /* This will search the ldst list for a matching expression. If it
3608 : : doesn't find one, we create one and initialize it. */
3609 : :
3610 : : static struct ls_expr *
3611 : 13226466 : ldst_entry (rtx x)
3612 : : {
3613 : 13226466 : int do_not_record_p = 0;
3614 : 13226466 : struct ls_expr * ptr;
3615 : 13226466 : unsigned int hash;
3616 : 13226466 : ls_expr **slot;
3617 : 13226466 : struct ls_expr e;
3618 : :
3619 : 13226466 : hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3620 : : NULL, /*have_reg_qty=*/false);
3621 : :
3622 : 13226466 : e.pattern = x;
3623 : 13226466 : slot = pre_ldst_table->find_slot_with_hash (&e, hash, INSERT);
3624 : 13226466 : if (*slot)
3625 : : return *slot;
3626 : :
3627 : 8774172 : ptr = XNEW (struct ls_expr);
3628 : :
3629 : 8774172 : ptr->next = pre_ldst_mems;
3630 : 8774172 : ptr->expr = NULL;
3631 : 8774172 : ptr->pattern = x;
3632 : 8774172 : ptr->pattern_regs = NULL_RTX;
3633 : 8774172 : ptr->stores.create (0);
3634 : 8774172 : ptr->reaching_reg = NULL_RTX;
3635 : 8774172 : ptr->invalid = 0;
3636 : 8774172 : ptr->index = 0;
3637 : 8774172 : ptr->hash_index = hash;
3638 : 8774172 : pre_ldst_mems = ptr;
3639 : 8774172 : *slot = ptr;
3640 : :
3641 : 8774172 : return ptr;
3642 : : }
3643 : :
3644 : : /* Free up an individual ldst entry. */
3645 : :
3646 : : static void
3647 : 8774172 : free_ldst_entry (struct ls_expr * ptr)
3648 : : {
3649 : 0 : ptr->stores.release ();
3650 : :
3651 : 8774172 : free (ptr);
3652 : 2949701 : }
3653 : :
3654 : : /* Free up all memory associated with the ldst list. */
3655 : :
3656 : : static void
3657 : 456579 : free_ld_motion_mems (void)
3658 : : {
3659 : 456579 : delete pre_ldst_table;
3660 : 456579 : pre_ldst_table = NULL;
3661 : :
3662 : 3406280 : while (pre_ldst_mems)
3663 : : {
3664 : 2949701 : struct ls_expr * tmp = pre_ldst_mems;
3665 : :
3666 : 2949701 : pre_ldst_mems = pre_ldst_mems->next;
3667 : :
3668 : 2949701 : free_ldst_entry (tmp);
3669 : : }
3670 : :
3671 : 456579 : pre_ldst_mems = NULL;
3672 : 456579 : }
3673 : :
3674 : : /* Dump debugging info about the ldst list. */
3675 : :
3676 : : static void
3677 : 16 : print_ldst_list (FILE * file)
3678 : : {
3679 : 16 : struct ls_expr * ptr;
3680 : :
3681 : 16 : fprintf (file, "LDST list: \n");
3682 : :
3683 : 56 : for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3684 : : {
3685 : 40 : fprintf (file, " Pattern (%3d): ", ptr->index);
3686 : :
3687 : 40 : print_rtl (file, ptr->pattern);
3688 : :
3689 : 40 : fprintf (file, "\n Stores : ");
3690 : 40 : print_rtx_insn_vec (file, ptr->stores);
3691 : :
3692 : 40 : fprintf (file, "\n\n");
3693 : : }
3694 : :
3695 : 16 : fprintf (file, "\n");
3696 : 16 : }
3697 : :
3698 : : /* Returns 1 if X is in the list of ldst only expressions. */
3699 : :
3700 : : static struct ls_expr *
3701 : 625545 : find_rtx_in_ldst (rtx x)
3702 : : {
3703 : 625545 : struct ls_expr e;
3704 : 625545 : ls_expr **slot;
3705 : 625545 : if (!pre_ldst_table)
3706 : : return NULL;
3707 : 625545 : e.pattern = x;
3708 : 625545 : slot = pre_ldst_table->find_slot (&e, NO_INSERT);
3709 : 625545 : if (!slot || (*slot)->invalid)
3710 : : return NULL;
3711 : : return *slot;
3712 : : }
3713 : :
3714 : : /* Load Motion for loads which only kill themselves. */
3715 : :
3716 : : /* Return true if x, a MEM, is a simple access with no side effects.
3717 : : These are the types of loads we consider for the ld_motion list,
3718 : : otherwise we let the usual aliasing take care of it. */
3719 : :
3720 : : static bool
3721 : 17780283 : simple_mem (const_rtx x)
3722 : : {
3723 : 17780283 : if (MEM_VOLATILE_P (x))
3724 : : return false;
3725 : :
3726 : 16922840 : if (GET_MODE (x) == BLKmode)
3727 : : return false;
3728 : :
3729 : : /* If we are handling exceptions, we must be careful with memory references
3730 : : that may trap. If we are not, the behavior is undefined, so we may just
3731 : : continue. */
3732 : 16864569 : if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3733 : : return false;
3734 : :
3735 : 14880699 : if (side_effects_p (x))
3736 : : return false;
3737 : :
3738 : : /* Do not consider function arguments passed on stack. */
3739 : 13464147 : if (reg_mentioned_p (stack_pointer_rtx, x))
3740 : : return false;
3741 : :
3742 : 13228346 : if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3743 : : return false;
3744 : :
3745 : : return true;
3746 : : }
3747 : :
3748 : : /* Make sure there isn't a buried reference in this pattern anywhere.
3749 : : If there is, invalidate the entry for it since we're not capable
3750 : : of fixing it up just yet.. We have to be sure we know about ALL
3751 : : loads since the aliasing code will allow all entries in the
3752 : : ld_motion list to not-alias itself. If we miss a load, we will get
3753 : : the wrong value since gcse might common it and we won't know to
3754 : : fix it up. */
3755 : :
3756 : : static void
3757 : 153047662 : invalidate_any_buried_refs (rtx x)
3758 : : {
3759 : 153047662 : const char * fmt;
3760 : 153047662 : int i, j;
3761 : 153047662 : struct ls_expr * ptr;
3762 : :
3763 : : /* Invalidate it in the list. */
3764 : 153047662 : if (MEM_P (x) && simple_mem (x))
3765 : : {
3766 : 4678207 : ptr = ldst_entry (x);
3767 : 4678207 : ptr->invalid = 1;
3768 : : }
3769 : :
3770 : : /* Recursively process the insn. */
3771 : 153047662 : fmt = GET_RTX_FORMAT (GET_CODE (x));
3772 : :
3773 : 358574618 : for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3774 : : {
3775 : 205526956 : if (fmt[i] == 'e')
3776 : 92044195 : invalidate_any_buried_refs (XEXP (x, i));
3777 : 113482761 : else if (fmt[i] == 'E')
3778 : 27219508 : for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3779 : 18485778 : invalidate_any_buried_refs (XVECEXP (x, i, j));
3780 : : }
3781 : 153047662 : }
3782 : :
3783 : : /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
3784 : : being defined as MEM loads and stores to symbols, with no side effects
3785 : : and no registers in the expression. For a MEM destination, we also
3786 : : check that the insn is still valid if we replace the destination with a
3787 : : REG, as is done in update_ld_motion_stores. If there are any uses/defs
3788 : : which don't match this criteria, they are invalidated and trimmed out
3789 : : later. */
3790 : :
3791 : : static void
3792 : 456579 : compute_ld_motion_mems (void)
3793 : : {
3794 : 456579 : struct ls_expr * ptr;
3795 : 456579 : basic_block bb;
3796 : 456579 : rtx_insn *insn;
3797 : :
3798 : 456579 : pre_ldst_mems = NULL;
3799 : 456579 : pre_ldst_table = new hash_table<pre_ldst_expr_hasher> (13);
3800 : :
3801 : 9145121 : FOR_EACH_BB_FN (bb, cfun)
3802 : : {
3803 : 104165132 : FOR_BB_INSNS (bb, insn)
3804 : : {
3805 : 95476590 : if (NONDEBUG_INSN_P (insn))
3806 : : {
3807 : 44556182 : if (GET_CODE (PATTERN (insn)) == SET)
3808 : : {
3809 : 34938337 : rtx src = SET_SRC (PATTERN (insn));
3810 : 34938337 : rtx dest = SET_DEST (PATTERN (insn));
3811 : :
3812 : : /* Check for a simple load. */
3813 : 34938337 : if (MEM_P (src) && simple_mem (src))
3814 : : {
3815 : 4466065 : ptr = ldst_entry (src);
3816 : 4466065 : if (!REG_P (dest))
3817 : 34432 : ptr->invalid = 1;
3818 : : }
3819 : : else
3820 : : {
3821 : : /* Make sure there isn't a buried load somewhere. */
3822 : 30472272 : invalidate_any_buried_refs (src);
3823 : : }
3824 : :
3825 : : /* Check for a simple load through a REG_EQUAL note. */
3826 : 34938337 : rtx note = find_reg_equal_equiv_note (insn), src_eq;
3827 : 34938337 : if (note
3828 : 2106869 : && REG_NOTE_KIND (note) == REG_EQUAL
3829 : 1969062 : && (src_eq = XEXP (note, 0))
3830 : 36907399 : && !(MEM_P (src_eq) && simple_mem (src_eq)))
3831 : 1969062 : invalidate_any_buried_refs (src_eq);
3832 : :
3833 : : /* Check for stores. Don't worry about aliased ones, they
3834 : : will block any movement we might do later. We only care
3835 : : about this exact pattern since those are the only
3836 : : circumstance that we will ignore the aliasing info. */
3837 : 34938337 : if (MEM_P (dest) && simple_mem (dest))
3838 : : {
3839 : 4082194 : ptr = ldst_entry (dest);
3840 : 4082194 : machine_mode src_mode = GET_MODE (src);
3841 : 4082194 : if (! MEM_P (src)
3842 : 4082194 : && GET_CODE (src) != ASM_OPERANDS
3843 : : /* Check for REG manually since want_to_gcse_p
3844 : : returns 0 for all REGs. */
3845 : 8164388 : && can_assign_to_reg_without_clobbers_p (src,
3846 : : src_mode))
3847 : 4073908 : ptr->stores.safe_push (insn);
3848 : : else
3849 : 8286 : ptr->invalid = 1;
3850 : : }
3851 : : }
3852 : : else
3853 : : {
3854 : : /* Invalidate all MEMs in the pattern and... */
3855 : 9617845 : invalidate_any_buried_refs (PATTERN (insn));
3856 : :
3857 : : /* ...in REG_EQUAL notes for PARALLELs with single SET. */
3858 : 9617845 : rtx note = find_reg_equal_equiv_note (insn), src_eq;
3859 : 9617845 : if (note
3860 : 458510 : && REG_NOTE_KIND (note) == REG_EQUAL
3861 : 10076355 : && (src_eq = XEXP (note, 0)))
3862 : 458510 : invalidate_any_buried_refs (src_eq);
3863 : : }
3864 : : }
3865 : : }
3866 : : }
3867 : 456579 : }
3868 : :
3869 : : /* Remove any references that have been either invalidated or are not in the
3870 : : expression list for pre gcse. */
3871 : :
3872 : : static void
3873 : 456579 : trim_ld_motion_mems (void)
3874 : : {
3875 : 456579 : struct ls_expr * * last = & pre_ldst_mems;
3876 : 456579 : struct ls_expr * ptr = pre_ldst_mems;
3877 : :
3878 : 9230751 : while (ptr != NULL)
3879 : : {
3880 : 8774172 : struct gcse_expr * expr;
3881 : :
3882 : : /* Delete if entry has been made invalid. */
3883 : 8774172 : if (! ptr->invalid)
3884 : : {
3885 : : /* Delete if we cannot find this mem in the expression list. */
3886 : 6224767 : unsigned int hash = ptr->hash_index % expr_hash_table.size;
3887 : :
3888 : 6224767 : for (expr = expr_hash_table.table[hash];
3889 : 9441915 : expr != NULL;
3890 : 3217148 : expr = expr->next_same_hash)
3891 : 6166849 : if (expr_equiv_p (expr->expr, ptr->pattern))
3892 : : break;
3893 : : }
3894 : : else
3895 : : expr = (struct gcse_expr *) 0;
3896 : :
3897 : 6224767 : if (expr)
3898 : : {
3899 : : /* Set the expression field if we are keeping it. */
3900 : 2949701 : ptr->expr = expr;
3901 : 2949701 : last = & ptr->next;
3902 : 2949701 : ptr = ptr->next;
3903 : : }
3904 : : else
3905 : : {
3906 : 5824471 : *last = ptr->next;
3907 : 5824471 : pre_ldst_table->remove_elt_with_hash (ptr, ptr->hash_index);
3908 : 5824471 : free_ldst_entry (ptr);
3909 : 5824471 : ptr = * last;
3910 : : }
3911 : : }
3912 : :
3913 : : /* Show the world what we've found. */
3914 : 456579 : if (dump_file && pre_ldst_mems != NULL)
3915 : 16 : print_ldst_list (dump_file);
3916 : 456579 : }
3917 : :
3918 : : /* This routine will take an expression which we are replacing with
3919 : : a reaching register, and update any stores that are needed if
3920 : : that expression is in the ld_motion list. Stores are updated by
3921 : : copying their SRC to the reaching register, and then storing
3922 : : the reaching register into the store location. These keeps the
3923 : : correct value in the reaching register for the loads. */
3924 : :
3925 : : static void
3926 : 526360 : update_ld_motion_stores (struct gcse_expr * expr)
3927 : : {
3928 : 526360 : struct ls_expr * mem_ptr;
3929 : :
3930 : 526360 : if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3931 : : {
3932 : : /* We can try to find just the REACHED stores, but is shouldn't
3933 : : matter to set the reaching reg everywhere... some might be
3934 : : dead and should be eliminated later. */
3935 : :
3936 : : /* We replace (set mem expr) with (set reg expr) (set mem reg)
3937 : : where reg is the reaching reg used in the load. We checked in
3938 : : compute_ld_motion_mems that we can replace (set mem expr) with
3939 : : (set reg expr) in that insn. */
3940 : 102285 : rtx_insn *insn;
3941 : 102285 : unsigned int i;
3942 : 173909 : FOR_EACH_VEC_ELT_REVERSE (mem_ptr->stores, i, insn)
3943 : : {
3944 : 54939 : rtx pat = PATTERN (insn);
3945 : 54939 : rtx src = SET_SRC (pat);
3946 : 54939 : rtx reg = expr->reaching_reg;
3947 : :
3948 : : /* If we've already copied it, continue. */
3949 : 54939 : if (expr->reaching_reg == src)
3950 : 46920 : continue;
3951 : :
3952 : 8019 : if (dump_file)
3953 : : {
3954 : 0 : fprintf (dump_file, "PRE: store updated with reaching reg ");
3955 : 0 : print_rtl (dump_file, reg);
3956 : 0 : fprintf (dump_file, ":\n ");
3957 : 0 : print_inline_rtx (dump_file, insn, 8);
3958 : 0 : fprintf (dump_file, "\n");
3959 : : }
3960 : :
3961 : 8019 : rtx_insn *copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3962 : 8019 : emit_insn_before (copy, insn);
3963 : 8019 : SET_SRC (pat) = reg;
3964 : 8019 : df_insn_rescan (insn);
3965 : :
3966 : : /* un-recognize this pattern since it's probably different now. */
3967 : 8019 : INSN_CODE (insn) = -1;
3968 : 8019 : gcse_create_count++;
3969 : : }
3970 : : }
3971 : 526360 : }
3972 : :
3973 : : /* Return true if the graph is too expensive to optimize. PASS is the
3974 : : optimization about to be performed. */
3975 : :
3976 : : bool
3977 : 2012699 : gcse_or_cprop_is_too_expensive (const char *pass)
3978 : : {
3979 : 2012699 : unsigned HOST_WIDE_INT memory_request
3980 : 2012699 : = ((unsigned HOST_WIDE_INT)n_basic_blocks_for_fn (cfun)
3981 : 2012699 : * SBITMAP_SET_SIZE (max_reg_num ()) * sizeof (SBITMAP_ELT_TYPE));
3982 : :
3983 : : /* Trying to perform global optimizations on flow graphs which have
3984 : : a high connectivity will take a long time and is unlikely to be
3985 : : particularly useful.
3986 : :
3987 : : In normal circumstances a cfg should have about twice as many
3988 : : edges as blocks. But we do not want to punish small functions
3989 : : which have a couple switch statements. Rather than simply
3990 : : threshold the number of blocks, uses something with a more
3991 : : graceful degradation. */
3992 : 2012699 : if (n_edges_for_fn (cfun) > 20000 + n_basic_blocks_for_fn (cfun) * 4)
3993 : : {
3994 : 0 : warning (OPT_Wdisabled_optimization,
3995 : : "%s: %d basic blocks and %d edges/basic block",
3996 : : pass, n_basic_blocks_for_fn (cfun),
3997 : : n_edges_for_fn (cfun) / n_basic_blocks_for_fn (cfun));
3998 : :
3999 : 0 : return true;
4000 : : }
4001 : :
4002 : : /* If allocating memory for the dataflow bitmaps would take up too much
4003 : : storage it's better just to disable the optimization. */
4004 : 2012699 : if (memory_request / 1024 > (unsigned HOST_WIDE_INT)param_max_gcse_memory)
4005 : : {
4006 : 0 : warning (OPT_Wdisabled_optimization,
4007 : : "%s: %d basic blocks and %d registers; "
4008 : : "increase %<--param max-gcse-memory%> above %wu",
4009 : 0 : pass, n_basic_blocks_for_fn (cfun), max_reg_num (),
4010 : : memory_request / 1024);
4011 : :
4012 : 0 : return true;
4013 : : }
4014 : :
4015 : : return false;
4016 : : }
4017 : :
4018 : : static unsigned int
4019 : 861249 : execute_rtl_pre (void)
4020 : : {
4021 : 861249 : int changed;
4022 : 861249 : delete_unreachable_blocks ();
4023 : 861249 : df_analyze ();
4024 : 861249 : changed = one_pre_gcse_pass ();
4025 : 861249 : flag_rerun_cse_after_global_opts |= changed;
4026 : 861249 : if (changed)
4027 : 111106 : cleanup_cfg (0);
4028 : 861249 : return 0;
4029 : : }
4030 : :
4031 : : static unsigned int
4032 : 60409 : execute_rtl_hoist (void)
4033 : : {
4034 : 60409 : int changed;
4035 : 60409 : delete_unreachable_blocks ();
4036 : 60409 : df_analyze ();
4037 : 60409 : changed = one_code_hoisting_pass ();
4038 : 60409 : flag_rerun_cse_after_global_opts |= changed;
4039 : 60409 : if (changed)
4040 : 2863 : cleanup_cfg (0);
4041 : 60409 : return 0;
4042 : : }
4043 : :
4044 : : namespace {
4045 : :
4046 : : const pass_data pass_data_rtl_pre =
4047 : : {
4048 : : RTL_PASS, /* type */
4049 : : "rtl pre", /* name */
4050 : : OPTGROUP_NONE, /* optinfo_flags */
4051 : : TV_PRE, /* tv_id */
4052 : : PROP_cfglayout, /* properties_required */
4053 : : 0, /* properties_provided */
4054 : : 0, /* properties_destroyed */
4055 : : 0, /* todo_flags_start */
4056 : : TODO_df_finish, /* todo_flags_finish */
4057 : : };
4058 : :
4059 : : class pass_rtl_pre : public rtl_opt_pass
4060 : : {
4061 : : public:
4062 : 277256 : pass_rtl_pre (gcc::context *ctxt)
4063 : 554512 : : rtl_opt_pass (pass_data_rtl_pre, ctxt)
4064 : : {}
4065 : :
4066 : : /* opt_pass methods: */
4067 : : bool gate (function *) final override;
4068 : 861249 : unsigned int execute (function *) final override
4069 : : {
4070 : 861249 : return execute_rtl_pre ();
4071 : : }
4072 : :
4073 : : }; // class pass_rtl_pre
4074 : :
4075 : : /* We do not construct an accurate cfg in functions which call
4076 : : setjmp, so none of these passes runs if the function calls
4077 : : setjmp.
4078 : : FIXME: Should just handle setjmp via REG_SETJMP notes. */
4079 : :
4080 : : bool
4081 : 1420533 : pass_rtl_pre::gate (function *fun)
4082 : : {
4083 : 996617 : return optimize > 0 && flag_gcse
4084 : 922304 : && !fun->calls_setjmp
4085 : 921659 : && optimize_function_for_speed_p (fun)
4086 : 2281783 : && dbg_cnt (pre);
4087 : : }
4088 : :
4089 : : } // anon namespace
4090 : :
4091 : : rtl_opt_pass *
4092 : 277256 : make_pass_rtl_pre (gcc::context *ctxt)
4093 : : {
4094 : 277256 : return new pass_rtl_pre (ctxt);
4095 : : }
4096 : :
4097 : : namespace {
4098 : :
4099 : : const pass_data pass_data_rtl_hoist =
4100 : : {
4101 : : RTL_PASS, /* type */
4102 : : "hoist", /* name */
4103 : : OPTGROUP_NONE, /* optinfo_flags */
4104 : : TV_HOIST, /* tv_id */
4105 : : PROP_cfglayout, /* properties_required */
4106 : : 0, /* properties_provided */
4107 : : 0, /* properties_destroyed */
4108 : : 0, /* todo_flags_start */
4109 : : TODO_df_finish, /* todo_flags_finish */
4110 : : };
4111 : :
4112 : : class pass_rtl_hoist : public rtl_opt_pass
4113 : : {
4114 : : public:
4115 : 277256 : pass_rtl_hoist (gcc::context *ctxt)
4116 : 554512 : : rtl_opt_pass (pass_data_rtl_hoist, ctxt)
4117 : : {}
4118 : :
4119 : : /* opt_pass methods: */
4120 : : bool gate (function *) final override;
4121 : 60409 : unsigned int execute (function *) final override
4122 : : {
4123 : 60409 : return execute_rtl_hoist ();
4124 : : }
4125 : :
4126 : : }; // class pass_rtl_hoist
4127 : :
4128 : : bool
4129 : 1420533 : pass_rtl_hoist::gate (function *)
4130 : : {
4131 : 996617 : return optimize > 0 && flag_gcse
4132 : 922304 : && !cfun->calls_setjmp
4133 : : /* It does not make sense to run code hoisting unless we are optimizing
4134 : : for code size -- it rarely makes programs faster, and can make then
4135 : : bigger if we did PRE (when optimizing for space, we don't run PRE). */
4136 : 921659 : && optimize_function_for_size_p (cfun)
4137 : 1480942 : && dbg_cnt (hoist);
4138 : : }
4139 : :
4140 : : } // anon namespace
4141 : :
4142 : : rtl_opt_pass *
4143 : 277256 : make_pass_rtl_hoist (gcc::context *ctxt)
4144 : : {
4145 : 277256 : return new pass_rtl_hoist (ctxt);
4146 : : }
4147 : :
4148 : : /* Reset all state within gcse.cc so that we can rerun the compiler
4149 : : within the same process. For use by toplev::finalize. */
4150 : :
4151 : : void
4152 : 249970 : gcse_cc_finalize (void)
4153 : : {
4154 : 249970 : test_insn = NULL;
4155 : 249970 : }
4156 : :
4157 : : #include "gt-gcse.h"
|