Line data Source code
1 : /* Reassociation for trees.
2 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
3 : Contributed by Daniel Berlin <dan@dberlin.org>
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify
8 : it under the terms of the GNU General Public License as published by
9 : the Free Software Foundation; either version 3, or (at your option)
10 : any later version.
11 :
12 : GCC is distributed in the hope that it will be useful,
13 : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : GNU General Public License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : #include "config.h"
22 : #include "system.h"
23 : #include "coretypes.h"
24 : #include "backend.h"
25 : #include "target.h"
26 : #include "rtl.h"
27 : #include "tree.h"
28 : #include "gimple.h"
29 : #include "cfghooks.h"
30 : #include "alloc-pool.h"
31 : #include "tree-pass.h"
32 : #include "memmodel.h"
33 : #include "tm_p.h"
34 : #include "ssa.h"
35 : #include "optabs-tree.h"
36 : #include "gimple-pretty-print.h"
37 : #include "diagnostic-core.h"
38 : #include "fold-const.h"
39 : #include "stor-layout.h"
40 : #include "cfganal.h"
41 : #include "gimple-iterator.h"
42 : #include "gimple-fold.h"
43 : #include "tree-eh.h"
44 : #include "gimplify-me.h"
45 : #include "tree-cfg.h"
46 : #include "tree-ssa-loop.h"
47 : #include "flags.h"
48 : #include "tree-ssa.h"
49 : #include "langhooks.h"
50 : #include "cfgloop.h"
51 : #include "builtins.h"
52 : #include "gimplify.h"
53 : #include "case-cfn-macros.h"
54 : #include "tree-ssa-reassoc.h"
55 : #include "tree-ssa-math-opts.h"
56 : #include "gimple-range.h"
57 : #include "internal-fn.h"
58 :
59 : /* This is a simple global reassociation pass. It is, in part, based
60 : on the LLVM pass of the same name (They do some things more/less
61 : than we do, in different orders, etc).
62 :
63 : It consists of five steps:
64 :
65 : 1. Breaking up subtract operations into addition + negate, where
66 : it would promote the reassociation of adds.
67 :
68 : 2. Left linearization of the expression trees, so that (A+B)+(C+D)
69 : becomes (((A+B)+C)+D), which is easier for us to rewrite later.
70 : During linearization, we place the operands of the binary
71 : expressions into a vector of operand_entry_*
72 :
73 : 3. Optimization of the operand lists, eliminating things like a +
74 : -a, a & a, etc.
75 :
76 : 3a. Combine repeated factors with the same occurrence counts
77 : into a __builtin_powi call that will later be optimized into
78 : an optimal number of multiplies.
79 :
80 : 4. Rewrite the expression trees we linearized and optimized so
81 : they are in proper rank order.
82 :
83 : 5. Repropagate negates, as nothing else will clean it up ATM.
84 :
85 : A bit of theory on #4, since nobody seems to write anything down
86 : about why it makes sense to do it the way they do it:
87 :
88 : We could do this much nicer theoretically, but don't (for reasons
89 : explained after how to do it theoretically nice :P).
90 :
91 : In order to promote the most redundancy elimination, you want
92 : binary expressions whose operands are the same rank (or
93 : preferably, the same value) exposed to the redundancy eliminator,
94 : for possible elimination.
95 :
96 : So the way to do this if we really cared, is to build the new op
97 : tree from the leaves to the roots, merging as you go, and putting the
98 : new op on the end of the worklist, until you are left with one
99 : thing on the worklist.
100 :
101 : IE if you have to rewrite the following set of operands (listed with
102 : rank in parentheses), with opcode PLUS_EXPR:
103 :
104 : a (1), b (1), c (1), d (2), e (2)
105 :
106 :
107 : We start with our merge worklist empty, and the ops list with all of
108 : those on it.
109 :
110 : You want to first merge all leaves of the same rank, as much as
111 : possible.
112 :
113 : So first build a binary op of
114 :
115 : mergetmp = a + b, and put "mergetmp" on the merge worklist.
116 :
117 : Because there is no three operand form of PLUS_EXPR, c is not going to
118 : be exposed to redundancy elimination as a rank 1 operand.
119 :
120 : So you might as well throw it on the merge worklist (you could also
121 : consider it to now be a rank two operand, and merge it with d and e,
122 : but in this case, you then have evicted e from a binary op. So at
123 : least in this situation, you can't win.)
124 :
125 : Then build a binary op of d + e
126 : mergetmp2 = d + e
127 :
128 : and put mergetmp2 on the merge worklist.
129 :
130 : so merge worklist = {mergetmp, c, mergetmp2}
131 :
132 : Continue building binary ops of these operations until you have only
133 : one operation left on the worklist.
134 :
135 : So we have
136 :
137 : build binary op
138 : mergetmp3 = mergetmp + c
139 :
140 : worklist = {mergetmp2, mergetmp3}
141 :
142 : mergetmp4 = mergetmp2 + mergetmp3
143 :
144 : worklist = {mergetmp4}
145 :
146 : because we have one operation left, we can now just set the original
147 : statement equal to the result of that operation.
148 :
149 : This will at least expose a + b and d + e to redundancy elimination
150 : as binary operations.
151 :
152 : For extra points, you can reuse the old statements to build the
153 : mergetmps, since you shouldn't run out.
154 :
155 : So why don't we do this?
156 :
157 : Because it's expensive, and rarely will help. Most trees we are
158 : reassociating have 3 or less ops. If they have 2 ops, they already
159 : will be written into a nice single binary op. If you have 3 ops, a
160 : single simple check suffices to tell you whether the first two are of the
161 : same rank. If so, you know to order it
162 :
163 : mergetmp = op1 + op2
164 : newstmt = mergetmp + op3
165 :
166 : instead of
167 : mergetmp = op2 + op3
168 : newstmt = mergetmp + op1
169 :
170 : If all three are of the same rank, you can't expose them all in a
171 : single binary operator anyway, so the above is *still* the best you
172 : can do.
173 :
174 : Thus, this is what we do. When we have three ops left, we check to see
175 : what order to put them in, and call it a day. As a nod to vector sum
176 : reduction, we check if any of the ops are really a phi node that is a
177 : destructive update for the associating op, and keep the destructive
178 : update together for vector sum reduction recognition. */
179 :
180 : /* Enable insertion of __builtin_powi calls during execute_reassoc. See
181 : point 3a in the pass header comment. */
182 : static bool reassoc_insert_powi_p;
183 :
184 : /* Enable biasing ranks of loop accumulators. We don't want this before
185 : vectorization, since it interferes with reduction chains. */
186 : static bool reassoc_bias_loop_carried_phi_ranks_p;
187 :
188 : /* Statistics */
189 : static struct
190 : {
191 : int linearized;
192 : int constants_eliminated;
193 : int ops_eliminated;
194 : int rewritten;
195 : int pows_encountered;
196 : int pows_created;
197 : } reassociate_stats;
198 :
199 :
200 : static object_allocator<operand_entry> operand_entry_pool
201 : ("operand entry pool");
202 :
203 : /* This is used to assign a unique ID to each struct operand_entry
204 : so that qsort results are identical on different hosts. */
205 : static unsigned int next_operand_entry_id;
206 :
207 : /* Starting rank number for a given basic block, so that we can rank
208 : operations using unmovable instructions in that BB based on the bb
209 : depth. */
210 : static int64_t *bb_rank;
211 :
212 : /* Operand->rank hashtable. */
213 : static hash_map<tree, int64_t> *operand_rank;
214 :
215 : /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
216 : biased. */
217 : static auto_bitmap biased_names;
218 :
219 : /* Vector of SSA_NAMEs on which after reassociate_bb is done with
220 : all basic blocks the CFG should be adjusted - basic blocks
221 : split right after that SSA_NAME's definition statement and before
222 : the only use, which must be a bit ior. */
223 : static vec<tree> reassoc_branch_fixups;
224 :
225 : /* Forward decls. */
226 : static int64_t get_rank (tree);
227 : static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
228 :
229 : /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
230 : possibly added by gsi_remove. */
231 :
232 : static bool
233 168164 : reassoc_remove_stmt (gimple_stmt_iterator *gsi)
234 : {
235 168164 : gimple *stmt = gsi_stmt (*gsi);
236 :
237 168164 : if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
238 72358 : return gsi_remove (gsi, true);
239 :
240 95806 : gimple_stmt_iterator prev = *gsi;
241 95806 : gsi_prev (&prev);
242 95806 : unsigned uid = gimple_uid (stmt);
243 95806 : basic_block bb = gimple_bb (stmt);
244 95806 : bool ret = gsi_remove (gsi, true);
245 95806 : if (!gsi_end_p (prev))
246 95594 : gsi_next (&prev);
247 : else
248 424 : prev = gsi_start_bb (bb);
249 95806 : gimple *end_stmt = gsi_stmt (*gsi);
250 99736 : while ((stmt = gsi_stmt (prev)) != end_stmt)
251 : {
252 3930 : gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
253 3930 : gimple_set_uid (stmt, uid);
254 3930 : gsi_next (&prev);
255 : }
256 : return ret;
257 : }
258 :
259 : /* Bias amount for loop-carried phis. We want this to be larger than
260 : the depth of any reassociation tree we can see, but not larger than
261 : the rank difference between two blocks. */
262 : #define PHI_LOOP_BIAS (1 << 15)
263 :
264 : /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
265 : operands to the STMT's left-hand side. The goal is to preserve bias in code
266 : like this:
267 :
268 : x_1 = phi(x_0, x_2)
269 : a = x_1 | 1
270 : b = a ^ 2
271 : .MEM = b
272 : c = b + d
273 : x_2 = c + e
274 :
275 : That is, we need to preserve bias along single-use chains originating from
276 : loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
277 : uses, because only they participate in rank propagation. */
278 : static bool
279 6796192 : propagate_bias_p (gimple *stmt)
280 : {
281 6796192 : use_operand_p use;
282 6796192 : imm_use_iterator use_iter;
283 6796192 : gimple *single_use_stmt = NULL;
284 :
285 6796192 : if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
286 : return false;
287 :
288 17334379 : FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
289 : {
290 7732139 : gimple *current_use_stmt = USE_STMT (use);
291 :
292 7732139 : if (is_gimple_assign (current_use_stmt)
293 7732139 : && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
294 : {
295 5887811 : if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
296 714254 : return false;
297 : single_use_stmt = current_use_stmt;
298 : }
299 714254 : }
300 :
301 4443993 : if (single_use_stmt == NULL)
302 : return false;
303 :
304 4443623 : if (gimple_bb (stmt)->loop_father
305 4443623 : != gimple_bb (single_use_stmt)->loop_father)
306 : return false;
307 :
308 : return true;
309 : }
310 :
311 : /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
312 : an innermost loop, and the phi has only a single use which is inside
313 : the loop, then the rank is the block rank of the loop latch plus an
314 : extra bias for the loop-carried dependence. This causes expressions
315 : calculated into an accumulator variable to be independent for each
316 : iteration of the loop. If STMT is some other phi, the rank is the
317 : block rank of its containing block. */
318 : static int64_t
319 1425572 : phi_rank (gimple *stmt)
320 : {
321 1425572 : basic_block bb = gimple_bb (stmt);
322 1425572 : class loop *father = bb->loop_father;
323 1425572 : tree res;
324 1425572 : unsigned i;
325 1425572 : use_operand_p use;
326 1425572 : gimple *use_stmt;
327 :
328 1425572 : if (!reassoc_bias_loop_carried_phi_ranks_p)
329 547894 : return bb_rank[bb->index];
330 :
331 : /* We only care about real loops (those with a latch). */
332 877678 : if (!father->latch)
333 1 : return bb_rank[bb->index];
334 :
335 : /* Interesting phis must be in headers of innermost loops. */
336 877677 : if (bb != father->header
337 688071 : || father->inner)
338 342881 : return bb_rank[bb->index];
339 :
340 : /* Ignore virtual SSA_NAMEs. */
341 534796 : res = gimple_phi_result (stmt);
342 1069592 : if (virtual_operand_p (res))
343 0 : return bb_rank[bb->index];
344 :
345 : /* The phi definition must have a single use, and that use must be
346 : within the loop. Otherwise this isn't an accumulator pattern. */
347 534796 : if (!single_imm_use (res, &use, &use_stmt)
348 534796 : || gimple_bb (use_stmt)->loop_father != father)
349 465262 : return bb_rank[bb->index];
350 :
351 : /* Look for phi arguments from within the loop. If found, bias this phi. */
352 79200 : for (i = 0; i < gimple_phi_num_args (stmt); i++)
353 : {
354 78992 : tree arg = gimple_phi_arg_def (stmt, i);
355 78992 : if (TREE_CODE (arg) == SSA_NAME
356 78992 : && !SSA_NAME_IS_DEFAULT_DEF (arg))
357 : {
358 74106 : gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
359 74106 : if (gimple_bb (def_stmt)->loop_father == father)
360 69326 : return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
361 : }
362 : }
363 :
364 : /* Must be an uninteresting phi. */
365 208 : return bb_rank[bb->index];
366 : }
367 :
368 : /* Return the maximum of RANK and the rank that should be propagated
369 : from expression OP. For most operands, this is just the rank of OP.
370 : For loop-carried phis, the value is zero to avoid undoing the bias
371 : in favor of the phi. */
372 : static int64_t
373 7424470 : propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
374 : {
375 7424470 : int64_t op_rank;
376 :
377 7424470 : op_rank = get_rank (op);
378 :
379 : /* Check whether op is biased after the get_rank () call, since it might have
380 : updated biased_names. */
381 7424470 : if (TREE_CODE (op) == SSA_NAME
382 7424470 : && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
383 : {
384 45870 : if (maybe_biased_p == NULL)
385 : return rank;
386 32094 : *maybe_biased_p = true;
387 : }
388 :
389 7410694 : return MAX (rank, op_rank);
390 : }
391 :
392 : /* Look up the operand rank structure for expression E. */
393 :
394 : static inline int64_t
395 13641707 : find_operand_rank (tree e)
396 : {
397 13641707 : int64_t *slot = operand_rank->get (e);
398 13641707 : return slot ? *slot : -1;
399 : }
400 :
401 : /* Insert {E,RANK} into the operand rank hashtable. */
402 :
403 : static inline void
404 14654766 : insert_operand_rank (tree e, int64_t rank)
405 : {
406 14654766 : gcc_assert (rank > 0);
407 14654766 : bool existed = operand_rank->put (e, rank);
408 14654766 : gcc_assert (!existed);
409 14654766 : }
410 :
411 : /* Given an expression E, return the rank of the expression. */
412 :
413 : static int64_t
414 16992754 : get_rank (tree e)
415 : {
416 : /* SSA_NAME's have the rank of the expression they are the result
417 : of.
418 : For globals and uninitialized values, the rank is 0.
419 : For function arguments, use the pre-setup rank.
420 : For PHI nodes, stores, asm statements, etc, we use the rank of
421 : the BB.
422 : For simple operations, the rank is the maximum rank of any of
423 : its operands, or the bb_rank, whichever is less.
424 : I make no claims that this is optimal, however, it gives good
425 : results. */
426 :
427 : /* We make an exception to the normal ranking system to break
428 : dependences of accumulator variables in loops. Suppose we
429 : have a simple one-block loop containing:
430 :
431 : x_1 = phi(x_0, x_2)
432 : b = a + x_1
433 : c = b + d
434 : x_2 = c + e
435 :
436 : As shown, each iteration of the calculation into x is fully
437 : dependent upon the iteration before it. We would prefer to
438 : see this in the form:
439 :
440 : x_1 = phi(x_0, x_2)
441 : b = a + d
442 : c = b + e
443 : x_2 = c + x_1
444 :
445 : If the loop is unrolled, the calculations of b and c from
446 : different iterations can be interleaved.
447 :
448 : To obtain this result during reassociation, we bias the rank
449 : of the phi definition x_1 upward, when it is recognized as an
450 : accumulator pattern. The artificial rank causes it to be
451 : added last, providing the desired independence. */
452 :
453 16992754 : if (TREE_CODE (e) == SSA_NAME)
454 : {
455 13641707 : ssa_op_iter iter;
456 13641707 : gimple *stmt;
457 13641707 : int64_t rank;
458 13641707 : tree op;
459 :
460 : /* If we already have a rank for this expression, use that. */
461 13641707 : rank = find_operand_rank (e);
462 13641707 : if (rank != -1)
463 : return rank;
464 :
465 8484452 : stmt = SSA_NAME_DEF_STMT (e);
466 8484452 : if (gimple_code (stmt) == GIMPLE_PHI)
467 : {
468 1425572 : rank = phi_rank (stmt);
469 1425572 : if (rank != bb_rank[gimple_bb (stmt)->index])
470 69326 : bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
471 : }
472 :
473 7058880 : else if (!is_gimple_assign (stmt))
474 262688 : rank = bb_rank[gimple_bb (stmt)->index];
475 :
476 : else
477 : {
478 6796192 : bool biased_p = false;
479 6796192 : bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
480 :
481 : /* Otherwise, find the maximum rank for the operands. As an
482 : exception, remove the bias from loop-carried phis when propagating
483 : the rank so that dependent operations are not also biased. */
484 : /* Simply walk over all SSA uses - this takes advatage of the
485 : fact that non-SSA operands are is_gimple_min_invariant and
486 : thus have rank 0. */
487 6796192 : rank = 0;
488 14220662 : FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
489 7424470 : rank = propagate_rank (rank, op, maybe_biased_p);
490 :
491 6796192 : rank += 1;
492 6796192 : if (biased_p)
493 30794 : bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
494 : }
495 :
496 8484452 : if (dump_file && (dump_flags & TDF_DETAILS))
497 : {
498 205 : fprintf (dump_file, "Rank for ");
499 205 : print_generic_expr (dump_file, e);
500 205 : fprintf (dump_file, " is %" PRId64 "\n", rank);
501 : }
502 :
503 : /* Note the rank in the hashtable so we don't recompute it. */
504 8484452 : insert_operand_rank (e, rank);
505 8484452 : return rank;
506 : }
507 :
508 : /* Constants, globals, etc., are rank 0 */
509 : return 0;
510 : }
511 :
512 :
513 : /* We want integer ones to end up last no matter what, since they are
514 : the ones we can do the most with. */
515 : #define INTEGER_CONST_TYPE 1 << 4
516 : #define FLOAT_ONE_CONST_TYPE 1 << 3
517 : #define FLOAT_CONST_TYPE 1 << 2
518 : #define OTHER_CONST_TYPE 1 << 1
519 :
520 : /* Classify an invariant tree into integer, float, or other, so that
521 : we can sort them to be near other constants of the same type. */
522 : static inline int
523 322120 : constant_type (tree t)
524 : {
525 322120 : if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
526 : return INTEGER_CONST_TYPE;
527 8962 : else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
528 : {
529 : /* Sort -1.0 and 1.0 constants last, while in some cases
530 : const_binop can't optimize some inexact operations, multiplication
531 : by -1.0 or 1.0 can be always merged with others. */
532 6588 : if (real_onep (t) || real_minus_onep (t))
533 816 : return FLOAT_ONE_CONST_TYPE;
534 : return FLOAT_CONST_TYPE;
535 : }
536 : else
537 : return OTHER_CONST_TYPE;
538 : }
539 :
540 : /* qsort comparison function to sort operand entries PA and PB by rank
541 : so that the sorted array is ordered by rank in decreasing order. */
542 : static int
543 23383504 : sort_by_operand_rank (const void *pa, const void *pb)
544 : {
545 23383504 : const operand_entry *oea = *(const operand_entry *const *)pa;
546 23383504 : const operand_entry *oeb = *(const operand_entry *const *)pb;
547 :
548 23383504 : if (oeb->rank != oea->rank)
549 34680421 : return oeb->rank > oea->rank ? 1 : -1;
550 :
551 : /* It's nicer for optimize_expression if constants that are likely
552 : to fold when added/multiplied/whatever are put next to each
553 : other. Since all constants have rank 0, order them by type. */
554 2800083 : if (oea->rank == 0)
555 : {
556 161002 : if (constant_type (oeb->op) != constant_type (oea->op))
557 58 : return constant_type (oea->op) - constant_type (oeb->op);
558 : else
559 : /* To make sorting result stable, we use unique IDs to determine
560 : order. */
561 258583 : return oeb->id > oea->id ? 1 : -1;
562 : }
563 :
564 2639081 : if (TREE_CODE (oea->op) != SSA_NAME)
565 : {
566 0 : if (TREE_CODE (oeb->op) != SSA_NAME)
567 0 : return oeb->id > oea->id ? 1 : -1;
568 : else
569 : return 1;
570 : }
571 2639081 : else if (TREE_CODE (oeb->op) != SSA_NAME)
572 : return -1;
573 :
574 : /* Lastly, make sure the versions that are the same go next to each
575 : other. */
576 2639081 : if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
577 : {
578 : /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
579 : versions of removed SSA_NAMEs, so if possible, prefer to sort
580 : based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
581 : See PR60418. */
582 2581824 : gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
583 2581824 : gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
584 2581824 : basic_block bba = gimple_bb (stmta);
585 2581824 : basic_block bbb = gimple_bb (stmtb);
586 2581824 : if (bbb != bba)
587 : {
588 : /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
589 : but the other might not. */
590 180788 : if (!bba)
591 : return 1;
592 176509 : if (!bbb)
593 : return -1;
594 : /* If neither is, compare bb_rank. */
595 170583 : if (bb_rank[bbb->index] != bb_rank[bba->index])
596 170583 : return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
597 : }
598 :
599 2401036 : bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
600 2401036 : bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
601 2401036 : if (da != db)
602 3682323 : return da ? 1 : -1;
603 :
604 54205 : return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
605 : }
606 :
607 57257 : return oeb->id > oea->id ? 1 : -1;
608 : }
609 :
610 : /* Add an operand entry to *OPS for the tree operand OP. */
611 :
612 : static void
613 9567833 : add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
614 : {
615 9567833 : operand_entry *oe = operand_entry_pool.allocate ();
616 :
617 9567833 : oe->op = op;
618 9567833 : oe->rank = get_rank (op);
619 9567833 : oe->id = next_operand_entry_id++;
620 9567833 : oe->count = 1;
621 9567833 : oe->stmt_to_insert = stmt_to_insert;
622 9567833 : ops->safe_push (oe);
623 9567833 : }
624 :
625 : /* Add an operand entry to *OPS for the tree operand OP with repeat
626 : count REPEAT. */
627 :
628 : static void
629 18 : add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
630 : HOST_WIDE_INT repeat)
631 : {
632 18 : operand_entry *oe = operand_entry_pool.allocate ();
633 :
634 18 : oe->op = op;
635 18 : oe->rank = get_rank (op);
636 18 : oe->id = next_operand_entry_id++;
637 18 : oe->count = repeat;
638 18 : oe->stmt_to_insert = NULL;
639 18 : ops->safe_push (oe);
640 :
641 18 : reassociate_stats.pows_encountered++;
642 18 : }
643 :
644 : /* Returns true if we can associate the SSA def OP. */
645 :
646 : static bool
647 31463073 : can_reassociate_op_p (tree op)
648 : {
649 31463073 : if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
650 : return false;
651 : /* Uninitialized variables can't participate in reassociation. */
652 31462247 : if (TREE_CODE (op) == SSA_NAME && ssa_name_maybe_undef_p (op))
653 : return false;
654 : /* Make sure asm goto outputs do not participate in reassociation since
655 : we have no way to find an insertion place after asm goto. */
656 31457478 : if (TREE_CODE (op) == SSA_NAME
657 23223502 : && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
658 31479351 : && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
659 70 : return false;
660 : return true;
661 : }
662 :
663 : /* Returns true if we can reassociate operations of TYPE.
664 : That is for integral or non-saturating fixed-point types, and for
665 : floating point type when associative-math is enabled. */
666 :
667 : static bool
668 57258514 : can_reassociate_type_p (tree type)
669 : {
670 57258514 : if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
671 35377879 : || NON_SAT_FIXED_POINT_TYPE_P (type)
672 92636393 : || (flag_associative_math && FLOAT_TYPE_P (type)))
673 22262344 : return true;
674 : return false;
675 : }
676 :
677 : /* Return true if STMT is reassociable operation containing a binary
678 : operation with tree code CODE, and is inside LOOP. */
679 :
680 : static bool
681 7678680 : is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
682 : {
683 7678680 : basic_block bb = gimple_bb (stmt);
684 :
685 7678680 : if (gimple_bb (stmt) == NULL)
686 : return false;
687 :
688 7496869 : if (!flow_bb_inside_loop_p (loop, bb))
689 : return false;
690 :
691 7290846 : if (is_gimple_assign (stmt)
692 5808162 : && gimple_assign_rhs_code (stmt) == code
693 8114473 : && has_single_use (gimple_assign_lhs (stmt)))
694 : {
695 610420 : tree rhs1 = gimple_assign_rhs1 (stmt);
696 610420 : tree rhs2 = gimple_assign_rhs2 (stmt);
697 610420 : if (!can_reassociate_op_p (rhs1)
698 610420 : || (rhs2 && !can_reassociate_op_p (rhs2)))
699 : return false;
700 : return true;
701 : }
702 :
703 : return false;
704 : }
705 :
706 :
707 : /* Return true if STMT is a nop-conversion. */
708 :
709 : static bool
710 7621151 : gimple_nop_conversion_p (gimple *stmt)
711 : {
712 7621151 : if (gassign *ass = dyn_cast <gassign *> (stmt))
713 : {
714 9030958 : if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
715 6735005 : && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
716 1479684 : TREE_TYPE (gimple_assign_rhs1 (ass))))
717 : return true;
718 : }
719 : return false;
720 : }
721 :
722 : /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
723 : operand of the negate operation. Otherwise, return NULL. */
724 :
725 : static tree
726 7529577 : get_unary_op (tree name, enum tree_code opcode)
727 : {
728 7529577 : gimple *stmt = SSA_NAME_DEF_STMT (name);
729 :
730 : /* Look through nop conversions (sign changes). */
731 7529577 : if (gimple_nop_conversion_p (stmt)
732 7529577 : && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
733 895587 : stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
734 :
735 7529577 : if (!is_gimple_assign (stmt))
736 : return NULL_TREE;
737 :
738 4798677 : if (gimple_assign_rhs_code (stmt) == opcode)
739 124861 : return gimple_assign_rhs1 (stmt);
740 : return NULL_TREE;
741 : }
742 :
743 : /* Return true if OP1 and OP2 have the same value if casted to either type. */
744 :
745 : static bool
746 46903 : ops_equal_values_p (tree op1, tree op2)
747 : {
748 46903 : if (op1 == op2)
749 : return true;
750 :
751 46719 : tree orig_op1 = op1;
752 46719 : if (TREE_CODE (op1) == SSA_NAME)
753 : {
754 46719 : gimple *stmt = SSA_NAME_DEF_STMT (op1);
755 46719 : if (gimple_nop_conversion_p (stmt))
756 : {
757 18217 : op1 = gimple_assign_rhs1 (stmt);
758 18217 : if (op1 == op2)
759 : return true;
760 : }
761 : }
762 :
763 44855 : if (TREE_CODE (op2) == SSA_NAME)
764 : {
765 44855 : gimple *stmt = SSA_NAME_DEF_STMT (op2);
766 44855 : if (gimple_nop_conversion_p (stmt))
767 : {
768 17016 : op2 = gimple_assign_rhs1 (stmt);
769 17016 : if (op1 == op2
770 17016 : || orig_op1 == op2)
771 : return true;
772 : }
773 : }
774 :
775 : return false;
776 : }
777 :
778 :
779 : /* If CURR and LAST are a pair of ops that OPCODE allows us to
780 : eliminate through equivalences, do so, remove them from OPS, and
781 : return true. Otherwise, return false. */
782 :
783 : static bool
784 9456109 : eliminate_duplicate_pair (enum tree_code opcode,
785 : vec<operand_entry *> *ops,
786 : bool *all_done,
787 : unsigned int i,
788 : operand_entry *curr,
789 : operand_entry *last)
790 : {
791 :
792 : /* If we have two of the same op, and the opcode is & |, min, or max,
793 : we can eliminate one of them.
794 : If we have two of the same op, and the opcode is ^, we can
795 : eliminate both of them. */
796 :
797 9456109 : if (last && last->op == curr->op)
798 : {
799 5521 : switch (opcode)
800 : {
801 30 : case MAX_EXPR:
802 30 : case MIN_EXPR:
803 30 : case BIT_IOR_EXPR:
804 30 : case BIT_AND_EXPR:
805 30 : if (dump_file && (dump_flags & TDF_DETAILS))
806 : {
807 1 : fprintf (dump_file, "Equivalence: ");
808 1 : print_generic_expr (dump_file, curr->op);
809 1 : fprintf (dump_file, " [&|minmax] ");
810 1 : print_generic_expr (dump_file, last->op);
811 1 : fprintf (dump_file, " -> ");
812 1 : print_generic_stmt (dump_file, last->op);
813 : }
814 :
815 30 : ops->ordered_remove (i);
816 30 : reassociate_stats.ops_eliminated ++;
817 :
818 30 : return true;
819 :
820 121 : case BIT_XOR_EXPR:
821 121 : if (dump_file && (dump_flags & TDF_DETAILS))
822 : {
823 0 : fprintf (dump_file, "Equivalence: ");
824 0 : print_generic_expr (dump_file, curr->op);
825 0 : fprintf (dump_file, " ^ ");
826 0 : print_generic_expr (dump_file, last->op);
827 0 : fprintf (dump_file, " -> nothing\n");
828 : }
829 :
830 121 : reassociate_stats.ops_eliminated += 2;
831 :
832 121 : if (ops->length () == 2)
833 : {
834 1 : ops->truncate (0);
835 1 : add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
836 1 : *all_done = true;
837 : }
838 : else
839 : {
840 120 : ops->ordered_remove (i-1);
841 120 : ops->ordered_remove (i-1);
842 : }
843 :
844 121 : return true;
845 :
846 : default:
847 : break;
848 : }
849 : }
850 : return false;
851 : }
852 :
853 : static vec<tree> plus_negates;
854 :
855 : /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
856 : expression, look in OPS for a corresponding positive operation to cancel
857 : it out. If we find one, remove the other from OPS, replace
858 : OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
859 : return false. */
860 :
861 : static bool
862 9455958 : eliminate_plus_minus_pair (enum tree_code opcode,
863 : vec<operand_entry *> *ops,
864 : unsigned int currindex,
865 : operand_entry *curr)
866 : {
867 9455958 : tree negateop;
868 9455958 : tree notop;
869 9455958 : unsigned int i;
870 9455958 : operand_entry *oe;
871 :
872 9455958 : if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
873 : return false;
874 :
875 3024292 : negateop = get_unary_op (curr->op, NEGATE_EXPR);
876 3024292 : notop = get_unary_op (curr->op, BIT_NOT_EXPR);
877 3024292 : if (negateop == NULL_TREE && notop == NULL_TREE)
878 : return false;
879 :
880 : /* Any non-negated version will have a rank that is one less than
881 : the current rank. So once we hit those ranks, if we don't find
882 : one, we can stop. */
883 :
884 133602 : for (i = currindex + 1;
885 195869 : ops->iterate (i, &oe)
886 242772 : && oe->rank >= curr->rank - 1 ;
887 : i++)
888 : {
889 46903 : if (negateop
890 46903 : && ops_equal_values_p (oe->op, negateop))
891 : {
892 1612 : if (dump_file && (dump_flags & TDF_DETAILS))
893 : {
894 0 : fprintf (dump_file, "Equivalence: ");
895 0 : print_generic_expr (dump_file, negateop);
896 0 : fprintf (dump_file, " + -");
897 0 : print_generic_expr (dump_file, oe->op);
898 0 : fprintf (dump_file, " -> 0\n");
899 : }
900 :
901 1612 : ops->ordered_remove (i);
902 1612 : add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
903 1612 : ops->ordered_remove (currindex);
904 1612 : reassociate_stats.ops_eliminated ++;
905 :
906 1612 : return true;
907 : }
908 45291 : else if (notop
909 45291 : && ops_equal_values_p (oe->op, notop))
910 : {
911 1860 : tree op_type = TREE_TYPE (oe->op);
912 :
913 1860 : if (dump_file && (dump_flags & TDF_DETAILS))
914 : {
915 0 : fprintf (dump_file, "Equivalence: ");
916 0 : print_generic_expr (dump_file, notop);
917 0 : fprintf (dump_file, " + ~");
918 0 : print_generic_expr (dump_file, oe->op);
919 0 : fprintf (dump_file, " -> -1\n");
920 : }
921 :
922 1860 : ops->ordered_remove (i);
923 1860 : add_to_ops_vec (ops, build_all_ones_cst (op_type));
924 1860 : ops->ordered_remove (currindex);
925 1860 : reassociate_stats.ops_eliminated ++;
926 :
927 1860 : return true;
928 : }
929 : }
930 :
931 : /* If CURR->OP is a negate expr without nop conversion in a plus expr:
932 : save it for later inspection in repropagate_negates(). */
933 86699 : if (negateop != NULL_TREE
934 86699 : && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
935 86178 : plus_negates.safe_push (curr->op);
936 :
937 : return false;
938 : }
939 :
940 : /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
941 : bitwise not expression, look in OPS for a corresponding operand to
942 : cancel it out. If we find one, remove the other from OPS, replace
943 : OPS[CURRINDEX] with 0, and return true. Otherwise, return
944 : false. */
945 :
946 : static bool
947 9456110 : eliminate_not_pairs (enum tree_code opcode,
948 : vec<operand_entry *> *ops,
949 : unsigned int currindex,
950 : operand_entry *curr)
951 : {
952 9456110 : tree notop;
953 9456110 : unsigned int i;
954 9456110 : operand_entry *oe;
955 :
956 9456110 : if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
957 1962944 : || TREE_CODE (curr->op) != SSA_NAME)
958 : return false;
959 :
960 1480993 : notop = get_unary_op (curr->op, BIT_NOT_EXPR);
961 1480993 : if (notop == NULL_TREE)
962 : return false;
963 :
964 : /* Any non-not version will have a rank that is one less than
965 : the current rank. So once we hit those ranks, if we don't find
966 : one, we can stop. */
967 :
968 41497 : for (i = currindex + 1;
969 9483424 : ops->iterate (i, &oe)
970 68811 : && oe->rank >= curr->rank - 1;
971 : i++)
972 : {
973 6808 : if (oe->op == notop)
974 : {
975 1 : if (dump_file && (dump_flags & TDF_DETAILS))
976 : {
977 0 : fprintf (dump_file, "Equivalence: ");
978 0 : print_generic_expr (dump_file, notop);
979 0 : if (opcode == BIT_AND_EXPR)
980 0 : fprintf (dump_file, " & ~");
981 0 : else if (opcode == BIT_IOR_EXPR)
982 0 : fprintf (dump_file, " | ~");
983 0 : print_generic_expr (dump_file, oe->op);
984 0 : if (opcode == BIT_AND_EXPR)
985 0 : fprintf (dump_file, " -> 0\n");
986 0 : else if (opcode == BIT_IOR_EXPR)
987 0 : fprintf (dump_file, " -> -1\n");
988 : }
989 :
990 1 : if (opcode == BIT_AND_EXPR)
991 1 : oe->op = build_zero_cst (TREE_TYPE (oe->op));
992 0 : else if (opcode == BIT_IOR_EXPR)
993 0 : oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
994 :
995 1 : reassociate_stats.ops_eliminated += ops->length () - 1;
996 1 : ops->truncate (0);
997 1 : ops->quick_push (oe);
998 1 : return true;
999 : }
1000 : }
1001 :
1002 : return false;
1003 : }
1004 :
1005 : /* Use constant value that may be present in OPS to try to eliminate
1006 : operands. Note that this function is only really used when we've
1007 : eliminated ops for other reasons, or merged constants. Across
1008 : single statements, fold already does all of this, plus more. There
1009 : is little point in duplicating logic, so I've only included the
1010 : identities that I could ever construct testcases to trigger. */
1011 :
1012 : static void
1013 4595796 : eliminate_using_constants (enum tree_code opcode,
1014 : vec<operand_entry *> *ops)
1015 : {
1016 4595796 : operand_entry *oelast = ops->last ();
1017 4595796 : tree type = TREE_TYPE (oelast->op);
1018 :
1019 4595796 : if (oelast->rank == 0
1020 4595796 : && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1021 : {
1022 3308444 : switch (opcode)
1023 : {
1024 412265 : case BIT_AND_EXPR:
1025 412265 : if (integer_zerop (oelast->op))
1026 : {
1027 0 : if (ops->length () != 1)
1028 : {
1029 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1030 0 : fprintf (dump_file, "Found & 0, removing all other ops\n");
1031 :
1032 0 : reassociate_stats.ops_eliminated += ops->length () - 1;
1033 :
1034 0 : ops->truncate (0);
1035 0 : ops->quick_push (oelast);
1036 1829 : return;
1037 : }
1038 : }
1039 412265 : else if (integer_all_onesp (oelast->op))
1040 : {
1041 4 : if (ops->length () != 1)
1042 : {
1043 4 : if (dump_file && (dump_flags & TDF_DETAILS))
1044 0 : fprintf (dump_file, "Found & -1, removing\n");
1045 4 : ops->pop ();
1046 4 : reassociate_stats.ops_eliminated++;
1047 : }
1048 : }
1049 : break;
1050 69177 : case BIT_IOR_EXPR:
1051 69177 : if (integer_all_onesp (oelast->op))
1052 : {
1053 0 : if (ops->length () != 1)
1054 : {
1055 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1056 0 : fprintf (dump_file, "Found | -1, removing all other ops\n");
1057 :
1058 0 : reassociate_stats.ops_eliminated += ops->length () - 1;
1059 :
1060 0 : ops->truncate (0);
1061 0 : ops->quick_push (oelast);
1062 0 : return;
1063 : }
1064 : }
1065 69177 : else if (integer_zerop (oelast->op))
1066 : {
1067 6 : if (ops->length () != 1)
1068 : {
1069 6 : if (dump_file && (dump_flags & TDF_DETAILS))
1070 0 : fprintf (dump_file, "Found | 0, removing\n");
1071 6 : ops->pop ();
1072 6 : reassociate_stats.ops_eliminated++;
1073 : }
1074 : }
1075 : break;
1076 933539 : case MULT_EXPR:
1077 933539 : if (integer_zerop (oelast->op)
1078 933539 : || (FLOAT_TYPE_P (type)
1079 1394 : && !HONOR_NANS (type)
1080 1304 : && !HONOR_SIGNED_ZEROS (type)
1081 1304 : && real_zerop (oelast->op)))
1082 : {
1083 0 : if (ops->length () != 1)
1084 : {
1085 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1086 0 : fprintf (dump_file, "Found * 0, removing all other ops\n");
1087 :
1088 0 : reassociate_stats.ops_eliminated += ops->length () - 1;
1089 0 : ops->truncate (0);
1090 0 : ops->quick_push (oelast);
1091 0 : return;
1092 : }
1093 : }
1094 933539 : else if (integer_onep (oelast->op)
1095 933539 : || (FLOAT_TYPE_P (type)
1096 1394 : && !HONOR_SNANS (type)
1097 1394 : && real_onep (oelast->op)))
1098 : {
1099 4 : if (ops->length () != 1)
1100 : {
1101 4 : if (dump_file && (dump_flags & TDF_DETAILS))
1102 0 : fprintf (dump_file, "Found * 1, removing\n");
1103 4 : ops->pop ();
1104 4 : reassociate_stats.ops_eliminated++;
1105 4 : return;
1106 : }
1107 : }
1108 : break;
1109 1756475 : case BIT_XOR_EXPR:
1110 1756475 : case PLUS_EXPR:
1111 1756475 : case MINUS_EXPR:
1112 1756475 : if (integer_zerop (oelast->op)
1113 1756475 : || (FLOAT_TYPE_P (type)
1114 741 : && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1115 741 : && fold_real_zero_addition_p (type, 0, oelast->op,
1116 : opcode == MINUS_EXPR)))
1117 : {
1118 1825 : if (ops->length () != 1)
1119 : {
1120 1825 : if (dump_file && (dump_flags & TDF_DETAILS))
1121 0 : fprintf (dump_file, "Found [|^+] 0, removing\n");
1122 1825 : ops->pop ();
1123 1825 : reassociate_stats.ops_eliminated++;
1124 1825 : return;
1125 : }
1126 : }
1127 : break;
1128 : default:
1129 : break;
1130 : }
1131 : }
1132 : }
1133 :
1134 :
1135 : static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1136 : bool, bool);
1137 :
1138 : /* Structure for tracking and counting operands. */
1139 : struct oecount {
1140 : unsigned int cnt;
1141 : unsigned int id;
1142 : enum tree_code oecode;
1143 : tree op;
1144 : };
1145 :
1146 :
1147 : /* The heap for the oecount hashtable and the sorted list of operands. */
1148 : static vec<oecount> cvec;
1149 :
1150 :
1151 : /* Oecount hashtable helpers. */
1152 :
1153 : struct oecount_hasher : int_hash <int, 0, 1>
1154 : {
1155 : static inline hashval_t hash (int);
1156 : static inline bool equal (int, int);
1157 : };
1158 :
1159 : /* Hash function for oecount. */
1160 :
1161 : inline hashval_t
1162 160103 : oecount_hasher::hash (int p)
1163 : {
1164 160103 : const oecount *c = &cvec[p - 42];
1165 160103 : return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1166 : }
1167 :
1168 : /* Comparison function for oecount. */
1169 :
1170 : inline bool
1171 84962 : oecount_hasher::equal (int p1, int p2)
1172 : {
1173 84962 : const oecount *c1 = &cvec[p1 - 42];
1174 84962 : const oecount *c2 = &cvec[p2 - 42];
1175 84962 : return c1->oecode == c2->oecode && c1->op == c2->op;
1176 : }
1177 :
1178 : /* Comparison function for qsort sorting oecount elements by count. */
1179 :
1180 : static int
1181 621441 : oecount_cmp (const void *p1, const void *p2)
1182 : {
1183 621441 : const oecount *c1 = (const oecount *)p1;
1184 621441 : const oecount *c2 = (const oecount *)p2;
1185 621441 : if (c1->cnt != c2->cnt)
1186 13528 : return c1->cnt > c2->cnt ? 1 : -1;
1187 : else
1188 : /* If counts are identical, use unique IDs to stabilize qsort. */
1189 897144 : return c1->id > c2->id ? 1 : -1;
1190 : }
1191 :
1192 : /* Return TRUE iff STMT represents a builtin call that raises OP
1193 : to some exponent. */
1194 :
1195 : static bool
1196 1149 : stmt_is_power_of_op (gimple *stmt, tree op)
1197 : {
1198 1149 : if (!is_gimple_call (stmt))
1199 : return false;
1200 :
1201 11 : switch (gimple_call_combined_fn (stmt))
1202 : {
1203 6 : CASE_CFN_POW:
1204 6 : CASE_CFN_POWI:
1205 6 : return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1206 :
1207 : default:
1208 : return false;
1209 : }
1210 : }
1211 :
1212 : /* Given STMT which is a __builtin_pow* call, decrement its exponent
1213 : in place and return the result. Assumes that stmt_is_power_of_op
1214 : was previously called for STMT and returned TRUE. */
1215 :
1216 : static HOST_WIDE_INT
1217 6 : decrement_power (gimple *stmt)
1218 : {
1219 6 : REAL_VALUE_TYPE c, cint;
1220 6 : HOST_WIDE_INT power;
1221 6 : tree arg1;
1222 :
1223 6 : switch (gimple_call_combined_fn (stmt))
1224 : {
1225 0 : CASE_CFN_POW:
1226 0 : arg1 = gimple_call_arg (stmt, 1);
1227 0 : c = TREE_REAL_CST (arg1);
1228 0 : power = real_to_integer (&c) - 1;
1229 0 : real_from_integer (&cint, VOIDmode, power, SIGNED);
1230 0 : gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1231 0 : return power;
1232 :
1233 6 : CASE_CFN_POWI:
1234 6 : arg1 = gimple_call_arg (stmt, 1);
1235 6 : power = TREE_INT_CST_LOW (arg1) - 1;
1236 6 : gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1237 6 : return power;
1238 :
1239 0 : default:
1240 0 : gcc_unreachable ();
1241 : }
1242 : }
1243 :
1244 : /* Replace SSA defined by STMT and replace all its uses with new
1245 : SSA. Also return the new SSA. */
1246 :
1247 : static tree
1248 326 : make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1249 : {
1250 326 : gimple *use_stmt;
1251 326 : use_operand_p use;
1252 326 : imm_use_iterator iter;
1253 326 : tree new_lhs, new_debug_lhs = NULL_TREE;
1254 326 : tree lhs = gimple_get_lhs (stmt);
1255 :
1256 326 : new_lhs = make_ssa_name (TREE_TYPE (lhs));
1257 326 : gimple_set_lhs (stmt, new_lhs);
1258 :
1259 : /* Also need to update GIMPLE_DEBUGs. */
1260 1035 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1261 : {
1262 383 : tree repl = new_lhs;
1263 383 : if (is_gimple_debug (use_stmt))
1264 : {
1265 57 : if (new_debug_lhs == NULL_TREE)
1266 : {
1267 21 : new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1268 21 : gdebug *def_temp
1269 21 : = gimple_build_debug_bind (new_debug_lhs,
1270 21 : build2 (opcode, TREE_TYPE (lhs),
1271 : new_lhs, op),
1272 : stmt);
1273 21 : gimple_set_uid (def_temp, gimple_uid (stmt));
1274 21 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1275 21 : gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1276 : }
1277 : repl = new_debug_lhs;
1278 : }
1279 1149 : FOR_EACH_IMM_USE_ON_STMT (use, iter)
1280 383 : SET_USE (use, repl);
1281 383 : update_stmt (use_stmt);
1282 326 : }
1283 326 : return new_lhs;
1284 : }
1285 :
1286 : /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1287 : uses with new SSAs. Also do this for the stmt that defines DEF
1288 : if *DEF is not OP. */
1289 :
1290 : static void
1291 259 : make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1292 : vec<gimple *> &stmts_to_fix)
1293 : {
1294 259 : unsigned i;
1295 259 : gimple *stmt;
1296 :
1297 259 : if (*def != op
1298 259 : && TREE_CODE (*def) == SSA_NAME
1299 259 : && (stmt = SSA_NAME_DEF_STMT (*def))
1300 518 : && gimple_code (stmt) != GIMPLE_NOP)
1301 259 : *def = make_new_ssa_for_def (stmt, opcode, op);
1302 :
1303 326 : FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1304 67 : make_new_ssa_for_def (stmt, opcode, op);
1305 259 : }
1306 :
1307 : /* Find the single immediate use of STMT's LHS, and replace it
1308 : with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1309 : replace *DEF with OP as well. */
1310 :
1311 : static void
1312 781 : propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1313 : {
1314 781 : tree lhs;
1315 781 : gimple *use_stmt;
1316 781 : use_operand_p use;
1317 781 : gimple_stmt_iterator gsi;
1318 :
1319 781 : if (is_gimple_call (stmt))
1320 1 : lhs = gimple_call_lhs (stmt);
1321 : else
1322 780 : lhs = gimple_assign_lhs (stmt);
1323 :
1324 781 : gcc_assert (has_single_use (lhs));
1325 781 : single_imm_use (lhs, &use, &use_stmt);
1326 781 : if (lhs == *def)
1327 531 : *def = op;
1328 781 : SET_USE (use, op);
1329 781 : if (TREE_CODE (op) != SSA_NAME)
1330 39 : update_stmt (use_stmt);
1331 781 : gsi = gsi_for_stmt (stmt);
1332 781 : unlink_stmt_vdef (stmt);
1333 781 : reassoc_remove_stmt (&gsi);
1334 781 : release_defs (stmt);
1335 781 : }
1336 :
1337 : /* Walks the linear chain with result *DEF searching for an operation
1338 : with operand OP and code OPCODE removing that from the chain. *DEF
1339 : is updated if there is only one operand but no operation left. */
1340 :
1341 : static void
1342 790 : zero_one_operation (tree *def, enum tree_code opcode, tree op)
1343 : {
1344 790 : tree orig_def = *def;
1345 790 : gimple *stmt = SSA_NAME_DEF_STMT (*def);
1346 : /* PR72835 - Record the stmt chain that has to be updated such that
1347 : we dont use the same LHS when the values computed are different. */
1348 790 : auto_vec<gimple *, 64> stmts_to_fix;
1349 :
1350 1424 : do
1351 : {
1352 1107 : tree name;
1353 :
1354 1107 : if (opcode == MULT_EXPR)
1355 : {
1356 1105 : if (stmt_is_power_of_op (stmt, op))
1357 : {
1358 6 : if (decrement_power (stmt) == 1)
1359 : {
1360 1 : if (stmts_to_fix.length () > 0)
1361 1 : stmts_to_fix.pop ();
1362 1 : propagate_op_to_single_use (op, stmt, def);
1363 : }
1364 : break;
1365 : }
1366 1099 : else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1367 : {
1368 15 : if (gimple_assign_rhs1 (stmt) == op)
1369 : {
1370 11 : tree cst = build_minus_one_cst (TREE_TYPE (op));
1371 11 : if (stmts_to_fix.length () > 0)
1372 11 : stmts_to_fix.pop ();
1373 11 : propagate_op_to_single_use (cst, stmt, def);
1374 11 : break;
1375 : }
1376 4 : else if (integer_minus_onep (op)
1377 4 : || real_minus_onep (op))
1378 : {
1379 4 : gimple_assign_set_rhs_code
1380 4 : (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1381 4 : break;
1382 : }
1383 : }
1384 : }
1385 :
1386 1086 : name = gimple_assign_rhs1 (stmt);
1387 :
1388 : /* If this is the operation we look for and one of the operands
1389 : is ours simply propagate the other operand into the stmts
1390 : single use. */
1391 1086 : if (gimple_assign_rhs_code (stmt) == opcode
1392 1086 : && (name == op
1393 884 : || gimple_assign_rhs2 (stmt) == op))
1394 : {
1395 769 : if (name == op)
1396 202 : name = gimple_assign_rhs2 (stmt);
1397 769 : if (stmts_to_fix.length () > 0)
1398 238 : stmts_to_fix.pop ();
1399 769 : propagate_op_to_single_use (name, stmt, def);
1400 769 : break;
1401 : }
1402 :
1403 : /* We might have a multiply of two __builtin_pow* calls, and
1404 : the operand might be hiding in the rightmost one. Likewise
1405 : this can happen for a negate. */
1406 317 : if (opcode == MULT_EXPR
1407 317 : && gimple_assign_rhs_code (stmt) == opcode
1408 317 : && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1409 569 : && has_single_use (gimple_assign_rhs2 (stmt)))
1410 : {
1411 44 : gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1412 44 : if (stmt_is_power_of_op (stmt2, op))
1413 : {
1414 0 : if (decrement_power (stmt2) == 1)
1415 0 : propagate_op_to_single_use (op, stmt2, def);
1416 : else
1417 0 : stmts_to_fix.safe_push (stmt2);
1418 0 : break;
1419 : }
1420 44 : else if (is_gimple_assign (stmt2)
1421 44 : && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1422 : {
1423 0 : if (gimple_assign_rhs1 (stmt2) == op)
1424 : {
1425 0 : tree cst = build_minus_one_cst (TREE_TYPE (op));
1426 0 : propagate_op_to_single_use (cst, stmt2, def);
1427 0 : break;
1428 : }
1429 0 : else if (integer_minus_onep (op)
1430 0 : || real_minus_onep (op))
1431 : {
1432 0 : stmts_to_fix.safe_push (stmt2);
1433 0 : gimple_assign_set_rhs_code
1434 0 : (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1435 0 : break;
1436 : }
1437 : }
1438 : }
1439 :
1440 : /* Continue walking the chain. */
1441 317 : gcc_assert (name != op
1442 : && TREE_CODE (name) == SSA_NAME);
1443 317 : stmt = SSA_NAME_DEF_STMT (name);
1444 317 : stmts_to_fix.safe_push (stmt);
1445 317 : }
1446 : while (1);
1447 :
1448 790 : if (stmts_to_fix.length () > 0 || *def == orig_def)
1449 259 : make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1450 790 : }
1451 :
1452 : /* Returns true if statement S1 dominates statement S2. Like
1453 : stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1454 :
1455 : static bool
1456 5480493 : reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1457 : {
1458 5480493 : basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1459 :
1460 : /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1461 : SSA_NAME. Assume it lives at the beginning of function and
1462 : thus dominates everything. */
1463 5480493 : if (!bb1 || s1 == s2)
1464 : return true;
1465 :
1466 : /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1467 5477324 : if (!bb2)
1468 : return false;
1469 :
1470 5470006 : if (bb1 == bb2)
1471 : {
1472 : /* PHIs in the same basic block are assumed to be
1473 : executed all in parallel, if only one stmt is a PHI,
1474 : it dominates the other stmt in the same basic block. */
1475 5340592 : if (gimple_code (s1) == GIMPLE_PHI)
1476 : return true;
1477 :
1478 5228761 : if (gimple_code (s2) == GIMPLE_PHI)
1479 : return false;
1480 :
1481 5179625 : gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1482 :
1483 5179625 : if (gimple_uid (s1) < gimple_uid (s2))
1484 : return true;
1485 :
1486 2803308 : if (gimple_uid (s1) > gimple_uid (s2))
1487 : return false;
1488 :
1489 34604 : gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1490 34604 : unsigned int uid = gimple_uid (s1);
1491 75617 : for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1492 : {
1493 73800 : gimple *s = gsi_stmt (gsi);
1494 73800 : if (gimple_uid (s) != uid)
1495 : break;
1496 44282 : if (s == s2)
1497 : return true;
1498 : }
1499 :
1500 : return false;
1501 : }
1502 :
1503 129414 : return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1504 : }
1505 :
1506 : /* Insert STMT after INSERT_POINT. */
1507 :
1508 : static void
1509 41854 : insert_stmt_after (gimple *stmt, gimple *insert_point)
1510 : {
1511 41854 : gimple_stmt_iterator gsi;
1512 41854 : basic_block bb;
1513 :
1514 41854 : if (gimple_code (insert_point) == GIMPLE_PHI)
1515 45 : bb = gimple_bb (insert_point);
1516 41809 : else if (!stmt_ends_bb_p (insert_point))
1517 : {
1518 41806 : gsi = gsi_for_stmt (insert_point);
1519 41806 : gimple_set_uid (stmt, gimple_uid (insert_point));
1520 41806 : gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1521 41806 : return;
1522 : }
1523 3 : else if (gimple_code (insert_point) == GIMPLE_ASM
1524 3 : && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0)
1525 : /* We have no idea where to insert - it depends on where the
1526 : uses will be placed. */
1527 0 : gcc_unreachable ();
1528 : else
1529 : /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1530 : thus if it must end a basic block, it should be a call that can
1531 : throw, or some assignment that can throw. If it throws, the LHS
1532 : of it will not be initialized though, so only valid places using
1533 : the SSA_NAME should be dominated by the fallthru edge. */
1534 3 : bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1535 48 : gsi = gsi_after_labels (bb);
1536 48 : if (gsi_end_p (gsi))
1537 : {
1538 0 : gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1539 0 : gimple_set_uid (stmt,
1540 0 : gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1541 : }
1542 : else
1543 48 : gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1544 48 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1545 : }
1546 :
1547 : /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1548 : the result. Places the statement after the definition of either
1549 : OP1 or OP2. Returns the new statement. */
1550 :
1551 : static gimple *
1552 8410 : build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1553 : {
1554 8410 : gimple *op1def = NULL, *op2def = NULL;
1555 8410 : gimple_stmt_iterator gsi;
1556 8410 : tree op;
1557 8410 : gassign *sum;
1558 :
1559 : /* Create the addition statement. */
1560 8410 : op = make_ssa_name (type);
1561 8410 : sum = gimple_build_assign (op, opcode, op1, op2);
1562 :
1563 : /* Find an insertion place and insert. */
1564 8410 : if (TREE_CODE (op1) == SSA_NAME)
1565 8409 : op1def = SSA_NAME_DEF_STMT (op1);
1566 8410 : if (TREE_CODE (op2) == SSA_NAME)
1567 8138 : op2def = SSA_NAME_DEF_STMT (op2);
1568 8409 : if ((!op1def || gimple_nop_p (op1def))
1569 8503 : && (!op2def || gimple_nop_p (op2def)))
1570 : {
1571 93 : gsi = gsi_start_nondebug_after_labels_bb
1572 93 : (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1573 93 : if (!gsi_end_p (gsi)
1574 93 : && is_gimple_call (gsi_stmt (gsi))
1575 105 : && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE))
1576 : {
1577 : /* Don't add statements before a returns_twice call at the start
1578 : of a function. */
1579 2 : split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1580 2 : gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1581 : }
1582 93 : if (gsi_end_p (gsi))
1583 : {
1584 2 : gimple_stmt_iterator gsi2
1585 2 : = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1586 2 : gimple_set_uid (sum,
1587 2 : gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1588 : }
1589 : else
1590 91 : gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1591 93 : gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1592 : }
1593 : else
1594 : {
1595 8317 : gimple *insert_point;
1596 8316 : if ((!op1def || gimple_nop_p (op1def))
1597 16633 : || (op2def && !gimple_nop_p (op2def)
1598 8025 : && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1599 : insert_point = op2def;
1600 : else
1601 : insert_point = op1def;
1602 8317 : insert_stmt_after (sum, insert_point);
1603 : }
1604 8410 : update_stmt (sum);
1605 :
1606 8410 : return sum;
1607 : }
1608 :
1609 : /* Perform un-distribution of divisions and multiplications.
1610 : A * X + B * X is transformed into (A + B) * X and A / X + B / X
1611 : to (A + B) / X for real X.
1612 :
1613 : The algorithm is organized as follows.
1614 :
1615 : - First we walk the addition chain *OPS looking for summands that
1616 : are defined by a multiplication or a real division. This results
1617 : in the candidates bitmap with relevant indices into *OPS.
1618 :
1619 : - Second we build the chains of multiplications or divisions for
1620 : these candidates, counting the number of occurrences of (operand, code)
1621 : pairs in all of the candidates chains.
1622 :
1623 : - Third we sort the (operand, code) pairs by number of occurrence and
1624 : process them starting with the pair with the most uses.
1625 :
1626 : * For each such pair we walk the candidates again to build a
1627 : second candidate bitmap noting all multiplication/division chains
1628 : that have at least one occurrence of (operand, code).
1629 :
1630 : * We build an alternate addition chain only covering these
1631 : candidates with one (operand, code) operation removed from their
1632 : multiplication/division chain.
1633 :
1634 : * The first candidate gets replaced by the alternate addition chain
1635 : multiplied/divided by the operand.
1636 :
1637 : * All candidate chains get disabled for further processing and
1638 : processing of (operand, code) pairs continues.
1639 :
1640 : The alternate addition chains built are re-processed by the main
1641 : reassociation algorithm which allows optimizing a * x * y + b * y * x
1642 : to (a + b ) * x * y in one invocation of the reassociation pass. */
1643 :
1644 : static bool
1645 4593748 : undistribute_ops_list (enum tree_code opcode,
1646 : vec<operand_entry *> *ops, class loop *loop)
1647 : {
1648 4593748 : unsigned int length = ops->length ();
1649 4593748 : operand_entry *oe1;
1650 4593748 : unsigned i, j;
1651 4593748 : unsigned nr_candidates, nr_candidates2;
1652 4593748 : sbitmap_iterator sbi0;
1653 4593748 : vec<operand_entry *> *subops;
1654 4593748 : bool changed = false;
1655 4593748 : unsigned int next_oecount_id = 0;
1656 :
1657 4593748 : if (length <= 1
1658 4593748 : || opcode != PLUS_EXPR)
1659 : return false;
1660 :
1661 : /* Build a list of candidates to process. */
1662 2286887 : auto_sbitmap candidates (length);
1663 2286887 : bitmap_clear (candidates);
1664 2286887 : nr_candidates = 0;
1665 7047200 : FOR_EACH_VEC_ELT (*ops, i, oe1)
1666 : {
1667 4760313 : enum tree_code dcode;
1668 4760313 : gimple *oe1def;
1669 :
1670 4760313 : if (TREE_CODE (oe1->op) != SSA_NAME)
1671 1743587 : continue;
1672 3016726 : oe1def = SSA_NAME_DEF_STMT (oe1->op);
1673 3016726 : if (!is_gimple_assign (oe1def))
1674 1076791 : continue;
1675 1939935 : dcode = gimple_assign_rhs_code (oe1def);
1676 3648774 : if ((dcode != MULT_EXPR
1677 1939935 : && dcode != RDIV_EXPR)
1678 1939935 : || !is_reassociable_op (oe1def, dcode, loop))
1679 1708839 : continue;
1680 :
1681 231096 : bitmap_set_bit (candidates, i);
1682 231096 : nr_candidates++;
1683 : }
1684 :
1685 2286887 : if (nr_candidates < 2)
1686 : return false;
1687 :
1688 17307 : if (dump_file && (dump_flags & TDF_DETAILS))
1689 : {
1690 1 : fprintf (dump_file, "searching for un-distribute opportunities ");
1691 2 : print_generic_expr (dump_file,
1692 1 : (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1693 1 : fprintf (dump_file, " %d\n", nr_candidates);
1694 : }
1695 :
1696 : /* Build linearized sub-operand lists and the counting table. */
1697 17307 : cvec.create (0);
1698 :
1699 17307 : hash_table<oecount_hasher> ctable (15);
1700 :
1701 : /* ??? Macro arguments cannot have multi-argument template types in
1702 : them. This typedef is needed to workaround that limitation. */
1703 17307 : typedef vec<operand_entry *> vec_operand_entry_t_heap;
1704 34614 : subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1705 76485 : EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1706 : {
1707 41871 : gimple *oedef;
1708 41871 : enum tree_code oecode;
1709 41871 : unsigned j;
1710 :
1711 41871 : oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1712 41871 : oecode = gimple_assign_rhs_code (oedef);
1713 83742 : linearize_expr_tree (&subops[i], oedef,
1714 41871 : associative_tree_code (oecode), false);
1715 :
1716 168276 : FOR_EACH_VEC_ELT (subops[i], j, oe1)
1717 : {
1718 84534 : oecount c;
1719 84534 : int *slot;
1720 84534 : int idx;
1721 84534 : c.oecode = oecode;
1722 84534 : c.cnt = 1;
1723 84534 : c.id = next_oecount_id++;
1724 84534 : c.op = oe1->op;
1725 84534 : cvec.safe_push (c);
1726 84534 : idx = cvec.length () + 41;
1727 84534 : slot = ctable.find_slot (idx, INSERT);
1728 84534 : if (!*slot)
1729 : {
1730 83561 : *slot = idx;
1731 : }
1732 : else
1733 : {
1734 973 : cvec.pop ();
1735 973 : cvec[*slot - 42].cnt++;
1736 : }
1737 : }
1738 : }
1739 :
1740 : /* Sort the counting table. */
1741 17307 : cvec.qsort (oecount_cmp);
1742 :
1743 17307 : if (dump_file && (dump_flags & TDF_DETAILS))
1744 : {
1745 1 : oecount *c;
1746 1 : fprintf (dump_file, "Candidates:\n");
1747 5 : FOR_EACH_VEC_ELT (cvec, j, c)
1748 : {
1749 3 : fprintf (dump_file, " %u %s: ", c->cnt,
1750 3 : c->oecode == MULT_EXPR
1751 : ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1752 3 : print_generic_expr (dump_file, c->op);
1753 3 : fprintf (dump_file, "\n");
1754 : }
1755 : }
1756 :
1757 : /* Process the (operand, code) pairs in order of most occurrence. */
1758 17307 : auto_sbitmap candidates2 (length);
1759 17972 : while (!cvec.is_empty ())
1760 : {
1761 17899 : oecount *c = &cvec.last ();
1762 17899 : if (c->cnt < 2)
1763 : break;
1764 :
1765 : /* Now collect the operands in the outer chain that contain
1766 : the common operand in their inner chain. */
1767 665 : bitmap_clear (candidates2);
1768 665 : nr_candidates2 = 0;
1769 4550 : EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1770 : {
1771 3220 : gimple *oedef;
1772 3220 : enum tree_code oecode;
1773 3220 : unsigned j;
1774 3220 : tree op = (*ops)[i]->op;
1775 :
1776 : /* If we undistributed in this chain already this may be
1777 : a constant. */
1778 3220 : if (TREE_CODE (op) != SSA_NAME)
1779 762 : continue;
1780 :
1781 2458 : oedef = SSA_NAME_DEF_STMT (op);
1782 2458 : oecode = gimple_assign_rhs_code (oedef);
1783 2458 : if (oecode != c->oecode)
1784 0 : continue;
1785 :
1786 9025 : FOR_EACH_VEC_ELT (subops[i], j, oe1)
1787 : {
1788 4471 : if (oe1->op == c->op)
1789 : {
1790 1124 : bitmap_set_bit (candidates2, i);
1791 1124 : ++nr_candidates2;
1792 1124 : break;
1793 : }
1794 : }
1795 : }
1796 :
1797 665 : if (nr_candidates2 >= 2)
1798 : {
1799 285 : operand_entry *oe1, *oe2;
1800 285 : gimple *prod;
1801 285 : int first = bitmap_first_set_bit (candidates2);
1802 :
1803 : /* Build the new addition chain. */
1804 285 : oe1 = (*ops)[first];
1805 285 : if (dump_file && (dump_flags & TDF_DETAILS))
1806 : {
1807 0 : fprintf (dump_file, "Building (");
1808 0 : print_generic_expr (dump_file, oe1->op);
1809 : }
1810 285 : zero_one_operation (&oe1->op, c->oecode, c->op);
1811 790 : EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1812 : {
1813 505 : gimple *sum;
1814 505 : oe2 = (*ops)[i];
1815 505 : if (dump_file && (dump_flags & TDF_DETAILS))
1816 : {
1817 0 : fprintf (dump_file, " + ");
1818 0 : print_generic_expr (dump_file, oe2->op);
1819 : }
1820 505 : zero_one_operation (&oe2->op, c->oecode, c->op);
1821 505 : sum = build_and_add_sum (TREE_TYPE (oe1->op),
1822 : oe1->op, oe2->op, opcode);
1823 505 : oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1824 505 : oe2->rank = 0;
1825 505 : oe1->op = gimple_get_lhs (sum);
1826 : }
1827 :
1828 : /* Apply the multiplication/division. */
1829 285 : prod = build_and_add_sum (TREE_TYPE (oe1->op),
1830 : oe1->op, c->op, c->oecode);
1831 285 : if (dump_file && (dump_flags & TDF_DETAILS))
1832 : {
1833 0 : fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1834 0 : print_generic_expr (dump_file, c->op);
1835 0 : fprintf (dump_file, "\n");
1836 : }
1837 :
1838 : /* Record it in the addition chain and disable further
1839 : undistribution with this op. */
1840 285 : oe1->op = gimple_assign_lhs (prod);
1841 285 : oe1->rank = get_rank (oe1->op);
1842 285 : subops[first].release ();
1843 :
1844 285 : changed = true;
1845 : }
1846 :
1847 665 : cvec.pop ();
1848 : }
1849 :
1850 74621 : for (i = 0; i < ops->length (); ++i)
1851 57314 : subops[i].release ();
1852 17307 : free (subops);
1853 17307 : cvec.release ();
1854 :
1855 17307 : return changed;
1856 2286887 : }
1857 :
1858 : /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1859 : first: element index for each relevant BIT_FIELD_REF.
1860 : second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1861 : typedef std::pair<unsigned, unsigned> v_info_elem;
1862 7043 : struct v_info {
1863 : tree vec_type;
1864 : auto_vec<v_info_elem, 32> vec;
1865 : };
1866 : typedef v_info *v_info_ptr;
1867 :
1868 : /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1869 : static int
1870 10577 : sort_by_mach_mode (const void *p_i, const void *p_j)
1871 : {
1872 10577 : const tree tr1 = *((const tree *) p_i);
1873 10577 : const tree tr2 = *((const tree *) p_j);
1874 10577 : unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1875 10577 : unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1876 10577 : if (mode1 > mode2)
1877 : return 1;
1878 10540 : else if (mode1 < mode2)
1879 : return -1;
1880 10487 : if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1881 : return -1;
1882 5063 : else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1883 5063 : return 1;
1884 : return 0;
1885 : }
1886 :
1887 : /* Cleanup hash map for VECTOR information. */
1888 : static void
1889 4395933 : cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1890 : {
1891 4402976 : for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1892 4410019 : it != info_map.end (); ++it)
1893 : {
1894 7043 : v_info_ptr info = (*it).second;
1895 7043 : delete info;
1896 7043 : (*it).second = NULL;
1897 : }
1898 4395933 : }
1899 :
1900 : /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1901 : V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1902 : is transformed to
1903 : Vs = (V1 + V2 + ... + Vn)
1904 : Vs[0] + Vs[1] + ... + Vs[k]
1905 :
1906 : The basic steps are listed below:
1907 :
1908 : 1) Check the addition chain *OPS by looking those summands coming from
1909 : VECTOR bit_field_ref on VECTOR type. Put the information into
1910 : v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1911 :
1912 : 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1913 : continuous, they can cover the whole VECTOR perfectly without any holes.
1914 : Obtain one VECTOR list which contain candidates to be transformed.
1915 :
1916 : 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1917 : candidates with same mode, build the addition statements for them and
1918 : generate BIT_FIELD_REFs accordingly.
1919 :
1920 : TODO:
1921 : The current implementation requires the whole VECTORs should be fully
1922 : covered, but it can be extended to support partial, checking adjacent
1923 : but not fill the whole, it may need some cost model to define the
1924 : boundary to do or not.
1925 : */
1926 : static bool
1927 4593748 : undistribute_bitref_for_vector (enum tree_code opcode,
1928 : vec<operand_entry *> *ops, struct loop *loop)
1929 : {
1930 4593748 : if (ops->length () <= 1)
1931 : return false;
1932 :
1933 4590320 : if (opcode != PLUS_EXPR
1934 4590320 : && opcode != MULT_EXPR
1935 : && opcode != BIT_XOR_EXPR
1936 1201203 : && opcode != BIT_IOR_EXPR
1937 818975 : && opcode != BIT_AND_EXPR)
1938 : return false;
1939 :
1940 4395933 : hash_map<tree, v_info_ptr> v_info_map;
1941 4395933 : operand_entry *oe1;
1942 4395933 : unsigned i;
1943 :
1944 : /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1945 : information into map. */
1946 13449027 : FOR_EACH_VEC_ELT (*ops, i, oe1)
1947 : {
1948 9053094 : enum tree_code dcode;
1949 9053094 : gimple *oe1def;
1950 :
1951 9053094 : if (TREE_CODE (oe1->op) != SSA_NAME)
1952 3169948 : continue;
1953 5883146 : oe1def = SSA_NAME_DEF_STMT (oe1->op);
1954 5883146 : if (!is_gimple_assign (oe1def))
1955 1488514 : continue;
1956 4394632 : dcode = gimple_assign_rhs_code (oe1def);
1957 4394632 : if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1958 4348037 : continue;
1959 :
1960 46595 : tree rhs = gimple_assign_rhs1 (oe1def);
1961 46595 : tree vec = TREE_OPERAND (rhs, 0);
1962 46595 : tree vec_type = TREE_TYPE (vec);
1963 :
1964 46595 : if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1965 26593 : continue;
1966 :
1967 : /* Ignore it if target machine can't support this VECTOR type. */
1968 20002 : if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1969 5429 : continue;
1970 :
1971 : /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1972 14573 : if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1973 : continue;
1974 :
1975 14573 : if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1976 14573 : || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1977 5232 : continue;
1978 :
1979 : /* The type of BIT_FIELD_REF might not be equal to the element type of
1980 : the vector. We want to use a vector type with element type the
1981 : same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1982 9341 : if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1983 : {
1984 1383 : machine_mode simd_mode;
1985 1383 : unsigned HOST_WIDE_INT size, nunits;
1986 1383 : unsigned HOST_WIDE_INT elem_size
1987 1383 : = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1988 2766 : if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1989 9044215 : continue;
1990 1383 : if (size <= elem_size || (size % elem_size) != 0)
1991 0 : continue;
1992 1383 : nunits = size / elem_size;
1993 1383 : if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1994 1383 : nunits).exists (&simd_mode))
1995 0 : continue;
1996 1383 : vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1997 :
1998 : /* Ignore it if target machine can't support this VECTOR type. */
1999 1383 : if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
2000 0 : continue;
2001 :
2002 : /* Check const vector type, constrain BIT_FIELD_REF offset and
2003 : size. */
2004 1383 : if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
2005 : continue;
2006 :
2007 2766 : if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
2008 2766 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
2009 0 : continue;
2010 : }
2011 :
2012 9341 : tree elem_type = TREE_TYPE (vec_type);
2013 9341 : unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
2014 9341 : if (maybe_ne (bit_field_size (rhs), elem_size))
2015 0 : continue;
2016 :
2017 9341 : unsigned idx;
2018 9341 : if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2019 0 : continue;
2020 :
2021 : /* Ignore it if target machine can't support this type of VECTOR
2022 : operation. */
2023 9341 : optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2024 9341 : if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2025 462 : continue;
2026 :
2027 8879 : bool existed;
2028 8879 : v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2029 8879 : if (!existed)
2030 : {
2031 7043 : info = new v_info;
2032 7043 : info->vec_type = vec_type;
2033 : }
2034 1836 : else if (!types_compatible_p (vec_type, info->vec_type))
2035 0 : continue;
2036 8879 : info->vec.safe_push (std::make_pair (idx, i));
2037 : }
2038 :
2039 : /* At least two VECTOR to combine. */
2040 4395933 : if (v_info_map.elements () <= 1)
2041 : {
2042 4395673 : cleanup_vinfo_map (v_info_map);
2043 4395673 : return false;
2044 : }
2045 :
2046 : /* Verify all VECTOR candidates by checking two conditions:
2047 : 1) sorted offsets are adjacent, no holes.
2048 : 2) can fill the whole VECTOR perfectly.
2049 : And add the valid candidates to a vector for further handling. */
2050 260 : auto_vec<tree> valid_vecs (v_info_map.elements ());
2051 1206 : for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2052 2152 : it != v_info_map.end (); ++it)
2053 : {
2054 946 : tree cand_vec = (*it).first;
2055 946 : v_info_ptr cand_info = (*it).second;
2056 946 : unsigned int num_elems
2057 946 : = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2058 1892 : if (cand_info->vec.length () != num_elems)
2059 572 : continue;
2060 374 : sbitmap holes = sbitmap_alloc (num_elems);
2061 374 : bitmap_ones (holes);
2062 374 : bool valid = true;
2063 374 : v_info_elem *curr;
2064 2204 : FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2065 : {
2066 1456 : if (!bitmap_bit_p (holes, curr->first))
2067 : {
2068 : valid = false;
2069 : break;
2070 : }
2071 : else
2072 1456 : bitmap_clear_bit (holes, curr->first);
2073 : }
2074 374 : if (valid && bitmap_empty_p (holes))
2075 374 : valid_vecs.quick_push (cand_vec);
2076 374 : sbitmap_free (holes);
2077 : }
2078 :
2079 : /* At least two VECTOR to combine. */
2080 260 : if (valid_vecs.length () <= 1)
2081 : {
2082 220 : cleanup_vinfo_map (v_info_map);
2083 220 : return false;
2084 : }
2085 :
2086 40 : valid_vecs.qsort (sort_by_mach_mode);
2087 : /* Go through all candidates by machine mode order, query the mode_to_total
2088 : to get the total number for each mode and skip the single one. */
2089 86 : for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2090 : {
2091 46 : tree tvec = valid_vecs[i];
2092 46 : enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2093 :
2094 : /* Skip modes with only a single candidate. */
2095 46 : if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2096 4 : continue;
2097 :
2098 42 : unsigned int idx, j;
2099 42 : gimple *sum = NULL;
2100 42 : tree sum_vec = tvec;
2101 42 : v_info_ptr info_ptr = *(v_info_map.get (tvec));
2102 42 : v_info_elem *elem;
2103 42 : tree vec_type = info_ptr->vec_type;
2104 :
2105 : /* Build the sum for all candidates with same mode. */
2106 325 : do
2107 : {
2108 975 : sum = build_and_add_sum (vec_type, sum_vec,
2109 325 : valid_vecs[i + 1], opcode);
2110 : /* Update the operands only after build_and_add_sum,
2111 : so that we don't have to repeat the placement algorithm
2112 : of build_and_add_sum. */
2113 325 : if (sum_vec == tvec
2114 325 : && !useless_type_conversion_p (vec_type, TREE_TYPE (sum_vec)))
2115 : {
2116 18 : gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2117 18 : tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2118 18 : tree lhs = make_ssa_name (vec_type);
2119 18 : gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2120 18 : gimple_set_uid (g, gimple_uid (sum));
2121 18 : gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2122 18 : gimple_assign_set_rhs1 (sum, lhs);
2123 18 : update_stmt (sum);
2124 : }
2125 325 : if (!useless_type_conversion_p (vec_type,
2126 325 : TREE_TYPE (valid_vecs[i + 1])))
2127 : {
2128 270 : gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2129 810 : tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2130 270 : valid_vecs[i + 1]);
2131 270 : tree lhs = make_ssa_name (vec_type);
2132 270 : gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2133 270 : gimple_set_uid (g, gimple_uid (sum));
2134 270 : gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2135 270 : gimple_assign_set_rhs2 (sum, lhs);
2136 270 : update_stmt (sum);
2137 : }
2138 325 : sum_vec = gimple_get_lhs (sum);
2139 325 : info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2140 325 : gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2141 : /* Update those related ops of current candidate VECTOR. */
2142 1575 : FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2143 : {
2144 1250 : idx = elem->second;
2145 1250 : gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2146 : /* Set this then op definition will get DCEd later. */
2147 1250 : gimple_set_visited (def, true);
2148 1250 : if (opcode == PLUS_EXPR
2149 1250 : || opcode == BIT_XOR_EXPR
2150 100 : || opcode == BIT_IOR_EXPR)
2151 1190 : (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2152 60 : else if (opcode == MULT_EXPR)
2153 24 : (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2154 : else
2155 : {
2156 36 : gcc_assert (opcode == BIT_AND_EXPR);
2157 36 : (*ops)[idx]->op
2158 36 : = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2159 : }
2160 1250 : (*ops)[idx]->rank = 0;
2161 : }
2162 325 : if (dump_file && (dump_flags & TDF_DETAILS))
2163 : {
2164 0 : fprintf (dump_file, "Generating addition -> ");
2165 0 : print_gimple_stmt (dump_file, sum, 0);
2166 : }
2167 325 : i++;
2168 : }
2169 325 : while ((i < valid_vecs.length () - 1)
2170 367 : && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2171 :
2172 : /* Referring to first valid VECTOR with this mode, generate the
2173 : BIT_FIELD_REF statements accordingly. */
2174 42 : info_ptr = *(v_info_map.get (tvec));
2175 42 : gcc_assert (sum);
2176 42 : tree elem_type = TREE_TYPE (vec_type);
2177 232 : FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2178 : {
2179 148 : idx = elem->second;
2180 148 : tree dst = make_ssa_name (elem_type);
2181 148 : tree pos = bitsize_int (elem->first
2182 : * tree_to_uhwi (TYPE_SIZE (elem_type)));
2183 148 : tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2184 148 : TYPE_SIZE (elem_type), pos);
2185 148 : gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2186 148 : insert_stmt_after (gs, sum);
2187 148 : gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2188 : /* Set this then op definition will get DCEd later. */
2189 148 : gimple_set_visited (def, true);
2190 148 : (*ops)[idx]->op = gimple_assign_lhs (gs);
2191 148 : (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2192 148 : if (dump_file && (dump_flags & TDF_DETAILS))
2193 : {
2194 0 : fprintf (dump_file, "Generating bit_field_ref -> ");
2195 0 : print_gimple_stmt (dump_file, gs, 0);
2196 : }
2197 : }
2198 : }
2199 :
2200 40 : if (dump_file && (dump_flags & TDF_DETAILS))
2201 0 : fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2202 :
2203 40 : cleanup_vinfo_map (v_info_map);
2204 :
2205 40 : return true;
2206 4396193 : }
2207 :
2208 : /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2209 : expression, examine the other OPS to see if any of them are comparisons
2210 : of the same values, which we may be able to combine or eliminate.
2211 : For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2212 :
2213 : static bool
2214 9452486 : eliminate_redundant_comparison (enum tree_code opcode,
2215 : vec<operand_entry *> *ops,
2216 : unsigned int currindex,
2217 : operand_entry *curr)
2218 : {
2219 9452486 : tree op1, op2;
2220 9452486 : enum tree_code lcode, rcode;
2221 9452486 : gimple *def1, *def2;
2222 9452486 : int i;
2223 9452486 : operand_entry *oe;
2224 :
2225 9452486 : if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2226 : return false;
2227 :
2228 : /* Check that CURR is a comparison. */
2229 1962914 : if (TREE_CODE (curr->op) != SSA_NAME)
2230 : return false;
2231 1480963 : def1 = SSA_NAME_DEF_STMT (curr->op);
2232 1480963 : if (!is_gimple_assign (def1))
2233 : return false;
2234 1274065 : lcode = gimple_assign_rhs_code (def1);
2235 1274065 : if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2236 : return false;
2237 507463 : op1 = gimple_assign_rhs1 (def1);
2238 507463 : op2 = gimple_assign_rhs2 (def1);
2239 :
2240 : /* Now look for a similar comparison in the remaining OPS. */
2241 1065755 : for (i = currindex + 1; ops->iterate (i, &oe); i++)
2242 : {
2243 558477 : tree t;
2244 :
2245 558477 : if (TREE_CODE (oe->op) != SSA_NAME)
2246 37 : continue;
2247 558440 : def2 = SSA_NAME_DEF_STMT (oe->op);
2248 558440 : if (!is_gimple_assign (def2))
2249 7510 : continue;
2250 550930 : rcode = gimple_assign_rhs_code (def2);
2251 550930 : if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2252 6315 : continue;
2253 :
2254 : /* If we got here, we have a match. See if we can combine the
2255 : two comparisons. */
2256 544615 : tree type = TREE_TYPE (gimple_assign_lhs (def1));
2257 544615 : if (opcode == BIT_IOR_EXPR)
2258 419157 : t = maybe_fold_or_comparisons (type,
2259 : lcode, op1, op2,
2260 : rcode, gimple_assign_rhs1 (def2),
2261 : gimple_assign_rhs2 (def2));
2262 : else
2263 125458 : t = maybe_fold_and_comparisons (type,
2264 : lcode, op1, op2,
2265 : rcode, gimple_assign_rhs1 (def2),
2266 : gimple_assign_rhs2 (def2));
2267 544615 : if (!t)
2268 544392 : continue;
2269 :
2270 : /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2271 : always give us a boolean_type_node value back. If the original
2272 : BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2273 : we need to convert. */
2274 223 : if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2275 : {
2276 2 : if (!fold_convertible_p (TREE_TYPE (curr->op), t))
2277 0 : continue;
2278 2 : t = fold_convert (TREE_TYPE (curr->op), t);
2279 : }
2280 :
2281 223 : if (TREE_CODE (t) != INTEGER_CST
2282 223 : && !operand_equal_p (t, curr->op, 0))
2283 : {
2284 217 : enum tree_code subcode;
2285 217 : tree newop1, newop2;
2286 217 : if (!COMPARISON_CLASS_P (t))
2287 38 : continue;
2288 199 : extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2289 199 : STRIP_USELESS_TYPE_CONVERSION (newop1);
2290 199 : STRIP_USELESS_TYPE_CONVERSION (newop2);
2291 199 : if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2292 0 : continue;
2293 199 : if (lcode == TREE_CODE (t)
2294 99 : && operand_equal_p (op1, newop1, 0)
2295 298 : && operand_equal_p (op2, newop2, 0))
2296 59 : t = curr->op;
2297 160 : else if ((TREE_CODE (newop1) == SSA_NAME
2298 140 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1))
2299 260 : || (TREE_CODE (newop2) == SSA_NAME
2300 81 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2)))
2301 20 : continue;
2302 : }
2303 :
2304 185 : if (dump_file && (dump_flags & TDF_DETAILS))
2305 : {
2306 6 : fprintf (dump_file, "Equivalence: ");
2307 6 : print_generic_expr (dump_file, curr->op);
2308 6 : fprintf (dump_file, " %s ", op_symbol_code (opcode));
2309 6 : print_generic_expr (dump_file, oe->op);
2310 6 : fprintf (dump_file, " -> ");
2311 6 : print_generic_expr (dump_file, t);
2312 6 : fprintf (dump_file, "\n");
2313 : }
2314 :
2315 : /* Now we can delete oe, as it has been subsumed by the new combined
2316 : expression t. */
2317 185 : ops->ordered_remove (i);
2318 185 : reassociate_stats.ops_eliminated ++;
2319 :
2320 : /* If t is the same as curr->op, we're done. Otherwise we must
2321 : replace curr->op with t. Special case is if we got a constant
2322 : back, in which case we add it to the end instead of in place of
2323 : the current entry. */
2324 185 : if (TREE_CODE (t) == INTEGER_CST)
2325 : {
2326 6 : ops->ordered_remove (currindex);
2327 6 : add_to_ops_vec (ops, t);
2328 : }
2329 179 : else if (!operand_equal_p (t, curr->op, 0))
2330 : {
2331 120 : gimple *sum;
2332 120 : enum tree_code subcode;
2333 120 : tree newop1;
2334 120 : tree newop2;
2335 120 : gcc_assert (COMPARISON_CLASS_P (t));
2336 120 : extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2337 120 : STRIP_USELESS_TYPE_CONVERSION (newop1);
2338 120 : STRIP_USELESS_TYPE_CONVERSION (newop2);
2339 120 : gcc_checking_assert (is_gimple_val (newop1)
2340 : && is_gimple_val (newop2));
2341 120 : sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2342 120 : curr->op = gimple_get_lhs (sum);
2343 : }
2344 : return true;
2345 : }
2346 :
2347 : return false;
2348 : }
2349 :
2350 :
2351 : /* Transform repeated addition of same values into multiply with
2352 : constant. */
2353 : static bool
2354 2290095 : transform_add_to_multiply (vec<operand_entry *> *ops)
2355 : {
2356 2290095 : operand_entry *oe;
2357 2290095 : tree op = NULL_TREE;
2358 2290095 : int j;
2359 2290095 : int i, start = -1, end = 0, count = 0;
2360 2290095 : auto_vec<std::pair <int, int> > indxs;
2361 2290095 : bool changed = false;
2362 :
2363 2290095 : if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2364 69943 : && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2365 31426 : || !flag_unsafe_math_optimizations))
2366 : return false;
2367 :
2368 : /* Look for repeated operands. */
2369 6926822 : FOR_EACH_VEC_ELT (*ops, i, oe)
2370 : {
2371 4675303 : if (start == -1)
2372 : {
2373 2251519 : count = 1;
2374 2251519 : op = oe->op;
2375 2251519 : start = i;
2376 : }
2377 2423784 : else if (operand_equal_p (oe->op, op, 0))
2378 : {
2379 155 : count++;
2380 155 : end = i;
2381 : }
2382 : else
2383 : {
2384 2423629 : if (count > 1)
2385 47 : indxs.safe_push (std::make_pair (start, end));
2386 2423629 : count = 1;
2387 2423629 : op = oe->op;
2388 2423629 : start = i;
2389 : }
2390 : }
2391 :
2392 2251519 : if (count > 1)
2393 38 : indxs.safe_push (std::make_pair (start, end));
2394 :
2395 2251677 : for (j = indxs.length () - 1; j >= 0; --j)
2396 : {
2397 : /* Convert repeated operand addition to multiplication. */
2398 85 : start = indxs[j].first;
2399 85 : end = indxs[j].second;
2400 85 : op = (*ops)[start]->op;
2401 85 : count = end - start + 1;
2402 325 : for (i = end; i >= start; --i)
2403 240 : ops->unordered_remove (i);
2404 85 : tree tmp = make_ssa_name (TREE_TYPE (op));
2405 85 : tree cst = build_int_cst (integer_type_node, count);
2406 85 : gassign *mul_stmt
2407 85 : = gimple_build_assign (tmp, MULT_EXPR,
2408 85 : op, fold_convert (TREE_TYPE (op), cst));
2409 85 : gimple_set_visited (mul_stmt, true);
2410 85 : add_to_ops_vec (ops, tmp, mul_stmt);
2411 85 : changed = true;
2412 : }
2413 :
2414 : return changed;
2415 2290095 : }
2416 :
2417 :
2418 : /* Perform various identities and other optimizations on the list of
2419 : operand entries, stored in OPS. The tree code for the binary
2420 : operation between all the operands is OPCODE. */
2421 :
2422 : static void
2423 4594052 : optimize_ops_list (enum tree_code opcode,
2424 : vec<operand_entry *> *ops)
2425 : {
2426 4611952 : unsigned int length = ops->length ();
2427 4611952 : unsigned int i;
2428 4611952 : operand_entry *oe;
2429 9221969 : operand_entry *oelast = NULL;
2430 9221969 : bool iterate = false;
2431 :
2432 4611952 : if (length == 1)
2433 4594052 : return;
2434 :
2435 4610017 : oelast = ops->last ();
2436 :
2437 : /* If the last two are constants, pop the constants off, merge them
2438 : and try the next two. */
2439 4610017 : if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2440 : {
2441 3323339 : operand_entry *oelm1 = (*ops)[length - 2];
2442 :
2443 3323339 : if (oelm1->rank == 0
2444 14239 : && is_gimple_min_invariant (oelm1->op)
2445 3337578 : && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2446 14239 : TREE_TYPE (oelast->op)))
2447 : {
2448 14239 : tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2449 : oelm1->op, oelast->op);
2450 :
2451 14239 : if (folded && is_gimple_min_invariant (folded))
2452 : {
2453 14221 : if (dump_file && (dump_flags & TDF_DETAILS))
2454 0 : fprintf (dump_file, "Merging constants\n");
2455 :
2456 14221 : ops->pop ();
2457 14221 : ops->pop ();
2458 :
2459 14221 : add_to_ops_vec (ops, folded);
2460 14221 : reassociate_stats.constants_eliminated++;
2461 :
2462 14221 : optimize_ops_list (opcode, ops);
2463 14221 : return;
2464 : }
2465 : }
2466 : }
2467 :
2468 4595796 : eliminate_using_constants (opcode, ops);
2469 4595796 : oelast = NULL;
2470 :
2471 14051904 : for (i = 0; ops->iterate (i, &oe);)
2472 : {
2473 9456110 : bool done = false;
2474 :
2475 9456110 : if (eliminate_not_pairs (opcode, ops, i, oe))
2476 2 : return;
2477 9456109 : if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2478 9455958 : || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2479 18908595 : || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2480 : {
2481 3808 : if (done)
2482 : return;
2483 3807 : iterate = true;
2484 3807 : oelast = NULL;
2485 3807 : continue;
2486 : }
2487 9452301 : oelast = oe;
2488 9452301 : i++;
2489 : }
2490 :
2491 4595794 : if (iterate)
2492 : optimize_ops_list (opcode, ops);
2493 : }
2494 :
2495 : /* The following functions are subroutines to optimize_range_tests and allow
2496 : it to try to change a logical combination of comparisons into a range
2497 : test.
2498 :
2499 : For example, both
2500 : X == 2 || X == 5 || X == 3 || X == 4
2501 : and
2502 : X >= 2 && X <= 5
2503 : are converted to
2504 : (unsigned) (X - 2) <= 3
2505 :
2506 : For more information see comments above fold_test_range in fold-const.cc,
2507 : this implementation is for GIMPLE. */
2508 :
2509 :
2510 :
2511 : /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2512 :
2513 : void
2514 141 : dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2515 : {
2516 141 : if (!skip_exp)
2517 59 : print_generic_expr (file, r->exp);
2518 251 : fprintf (file, " %c[", r->in_p ? '+' : '-');
2519 141 : print_generic_expr (file, r->low);
2520 141 : fputs (", ", file);
2521 141 : print_generic_expr (file, r->high);
2522 141 : fputc (']', file);
2523 141 : }
2524 :
2525 : /* Dump the range entry R to STDERR. */
2526 :
2527 : DEBUG_FUNCTION void
2528 0 : debug_range_entry (struct range_entry *r)
2529 : {
2530 0 : dump_range_entry (stderr, r, false);
2531 0 : fputc ('\n', stderr);
2532 0 : }
2533 :
2534 : /* This is similar to make_range in fold-const.cc, but on top of
2535 : GIMPLE instead of trees. If EXP is non-NULL, it should be
2536 : an SSA_NAME and STMT argument is ignored, otherwise STMT
2537 : argument should be a GIMPLE_COND. */
2538 :
2539 : void
2540 5547517 : init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2541 : {
2542 5547517 : int in_p;
2543 5547517 : tree low, high;
2544 5547517 : bool is_bool, strict_overflow_p;
2545 :
2546 5547517 : r->exp = NULL_TREE;
2547 5547517 : r->in_p = false;
2548 5547517 : r->strict_overflow_p = false;
2549 5547517 : r->low = NULL_TREE;
2550 5547517 : r->high = NULL_TREE;
2551 5547517 : if (exp != NULL_TREE
2552 5547517 : && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2553 783210 : return;
2554 :
2555 : /* Start with simply saying "EXP != 0" and then look at the code of EXP
2556 : and see if we can refine the range. Some of the cases below may not
2557 : happen, but it doesn't seem worth worrying about this. We "continue"
2558 : the outer loop when we've changed something; otherwise we "break"
2559 : the switch, which will "break" the while. */
2560 5070395 : low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2561 5070395 : high = low;
2562 5070395 : in_p = 0;
2563 5070395 : strict_overflow_p = false;
2564 5070395 : is_bool = false;
2565 5070395 : if (exp == NULL_TREE)
2566 : is_bool = true;
2567 1534482 : else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2568 : {
2569 639573 : if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2570 : is_bool = true;
2571 : else
2572 : return;
2573 : }
2574 894909 : else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2575 0 : is_bool = true;
2576 :
2577 8045810 : while (1)
2578 : {
2579 8045810 : enum tree_code code;
2580 8045810 : tree arg0, arg1, exp_type;
2581 8045810 : tree nexp;
2582 8045810 : location_t loc;
2583 :
2584 8045810 : if (exp != NULL_TREE)
2585 : {
2586 4509897 : if (TREE_CODE (exp) != SSA_NAME
2587 4509897 : || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2588 : break;
2589 :
2590 4509897 : stmt = SSA_NAME_DEF_STMT (exp);
2591 4509897 : if (!is_gimple_assign (stmt))
2592 : break;
2593 :
2594 2739031 : code = gimple_assign_rhs_code (stmt);
2595 2739031 : arg0 = gimple_assign_rhs1 (stmt);
2596 2739031 : arg1 = gimple_assign_rhs2 (stmt);
2597 2739031 : exp_type = TREE_TYPE (exp);
2598 : }
2599 : else
2600 : {
2601 3535913 : code = gimple_cond_code (stmt);
2602 3535913 : arg0 = gimple_cond_lhs (stmt);
2603 3535913 : arg1 = gimple_cond_rhs (stmt);
2604 3535913 : exp_type = boolean_type_node;
2605 : }
2606 :
2607 6274944 : if (TREE_CODE (arg0) != SSA_NAME
2608 5009930 : || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0)
2609 11284408 : || ssa_name_maybe_undef_p (arg0))
2610 : break;
2611 5002735 : loc = gimple_location (stmt);
2612 5002735 : switch (code)
2613 : {
2614 33021 : case BIT_NOT_EXPR:
2615 33021 : if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2616 : /* Ensure the range is either +[-,0], +[0,0],
2617 : -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2618 : -[1,1]. If it is e.g. +[-,-] or -[-,-]
2619 : or similar expression of unconditional true or
2620 : false, it should not be negated. */
2621 33021 : && ((high && integer_zerop (high))
2622 0 : || (low && integer_onep (low))))
2623 : {
2624 5766 : in_p = !in_p;
2625 5766 : exp = arg0;
2626 5766 : continue;
2627 : }
2628 : break;
2629 2446 : case SSA_NAME:
2630 2446 : exp = arg0;
2631 2446 : continue;
2632 229654 : CASE_CONVERT:
2633 229654 : if (is_bool)
2634 : {
2635 122437 : if ((TYPE_PRECISION (exp_type) == 1
2636 116170 : || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2637 122437 : && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2638 : return;
2639 : }
2640 107217 : else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2641 : {
2642 4242 : if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2643 : is_bool = true;
2644 : else
2645 : return;
2646 : }
2647 102975 : else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2648 123979 : is_bool = true;
2649 226954 : goto do_default;
2650 : case EQ_EXPR:
2651 : case NE_EXPR:
2652 : case LT_EXPR:
2653 : case LE_EXPR:
2654 : case GE_EXPR:
2655 : case GT_EXPR:
2656 : is_bool = true;
2657 : /* FALLTHRU */
2658 644415 : default:
2659 644415 : if (!is_bool)
2660 : return;
2661 341203 : do_default:
2662 4661356 : nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2663 : &low, &high, &in_p);
2664 4661356 : if (nexp != NULL_TREE)
2665 : {
2666 2967379 : exp = nexp;
2667 2967379 : gcc_assert (TREE_CODE (exp) == SSA_NAME);
2668 2967379 : continue;
2669 : }
2670 : break;
2671 : }
2672 : break;
2673 : }
2674 4764307 : if (is_bool)
2675 : {
2676 4176852 : r->exp = exp;
2677 4176852 : r->in_p = in_p;
2678 4176852 : r->low = low;
2679 4176852 : r->high = high;
2680 4176852 : r->strict_overflow_p = strict_overflow_p;
2681 : }
2682 : }
2683 :
2684 : /* Comparison function for qsort. Sort entries
2685 : without SSA_NAME exp first, then with SSA_NAMEs sorted
2686 : by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2687 : by increasing ->low and if ->low is the same, by increasing
2688 : ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2689 : maximum. */
2690 :
2691 : static int
2692 6215030 : range_entry_cmp (const void *a, const void *b)
2693 : {
2694 6215030 : const struct range_entry *p = (const struct range_entry *) a;
2695 6215030 : const struct range_entry *q = (const struct range_entry *) b;
2696 :
2697 6215030 : if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2698 : {
2699 2799223 : if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2700 : {
2701 : /* Group range_entries for the same SSA_NAME together. */
2702 2731373 : if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2703 : return -1;
2704 1171512 : else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2705 : return 1;
2706 : /* If ->low is different, NULL low goes first, then by
2707 : ascending low. */
2708 155369 : if (p->low != NULL_TREE)
2709 : {
2710 138470 : if (q->low != NULL_TREE)
2711 : {
2712 130958 : tree tem = fold_binary (LT_EXPR, boolean_type_node,
2713 : p->low, q->low);
2714 130958 : if (tem && integer_onep (tem))
2715 : return -1;
2716 64346 : tem = fold_binary (GT_EXPR, boolean_type_node,
2717 : p->low, q->low);
2718 64346 : if (tem && integer_onep (tem))
2719 : return 1;
2720 : }
2721 : else
2722 : return 1;
2723 : }
2724 16899 : else if (q->low != NULL_TREE)
2725 : return -1;
2726 : /* If ->high is different, NULL high goes last, before that by
2727 : ascending high. */
2728 30489 : if (p->high != NULL_TREE)
2729 : {
2730 30351 : if (q->high != NULL_TREE)
2731 : {
2732 30064 : tree tem = fold_binary (LT_EXPR, boolean_type_node,
2733 : p->high, q->high);
2734 30064 : if (tem && integer_onep (tem))
2735 : return -1;
2736 8895 : tem = fold_binary (GT_EXPR, boolean_type_node,
2737 : p->high, q->high);
2738 8895 : if (tem && integer_onep (tem))
2739 : return 1;
2740 : }
2741 : else
2742 : return -1;
2743 : }
2744 138 : else if (q->high != NULL_TREE)
2745 : return 1;
2746 : /* If both ranges are the same, sort below by ascending idx. */
2747 : }
2748 : else
2749 : return 1;
2750 : }
2751 3415807 : else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2752 : return -1;
2753 :
2754 3329190 : if (p->idx < q->idx)
2755 : return -1;
2756 : else
2757 : {
2758 1679735 : gcc_checking_assert (p->idx > q->idx);
2759 : return 1;
2760 : }
2761 : }
2762 :
2763 : /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2764 : insert needed statements BEFORE or after GSI. */
2765 :
2766 : static tree
2767 25013 : force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2768 : {
2769 25013 : enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2770 25013 : tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2771 25013 : if (TREE_CODE (ret) != SSA_NAME)
2772 : {
2773 37 : gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2774 37 : if (before)
2775 37 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
2776 : else
2777 0 : gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2778 37 : ret = gimple_assign_lhs (g);
2779 : }
2780 25013 : return ret;
2781 : }
2782 :
2783 : /* Helper routine of optimize_range_test.
2784 : [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2785 : RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2786 : OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2787 : is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2788 : an array of COUNT pointers to other ranges. Return
2789 : true if the range merge has been successful.
2790 : If OPCODE is ERROR_MARK, this is called from within
2791 : maybe_optimize_range_tests and is performing inter-bb range optimization.
2792 : In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2793 : oe->rank. */
2794 :
2795 : static bool
2796 25013 : update_range_test (struct range_entry *range, struct range_entry *otherrange,
2797 : struct range_entry **otherrangep,
2798 : unsigned int count, enum tree_code opcode,
2799 : vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2800 : bool in_p, tree low, tree high, bool strict_overflow_p)
2801 : {
2802 25013 : unsigned int idx = range->idx;
2803 25013 : struct range_entry *swap_with = NULL;
2804 25013 : basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL;
2805 25013 : if (opcode == ERROR_MARK)
2806 : {
2807 : /* For inter-bb range test optimization, pick from the range tests
2808 : the one which is tested in the earliest condition (one dominating
2809 : the others), because otherwise there could be some UB (e.g. signed
2810 : overflow) in following bbs that we'd expose which wasn't there in
2811 : the original program. See PR104196. */
2812 14492 : basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id);
2813 14492 : basic_block range_bb = orig_range_bb;
2814 31897 : for (unsigned int i = 0; i < count; i++)
2815 : {
2816 17405 : struct range_entry *this_range;
2817 17405 : if (otherrange)
2818 10708 : this_range = otherrange + i;
2819 : else
2820 6697 : this_range = otherrangep[i];
2821 17405 : operand_entry *oe = (*ops)[this_range->idx];
2822 17405 : basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id);
2823 17405 : if (range_bb != this_bb
2824 17405 : && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb))
2825 : {
2826 6856 : swap_with = this_range;
2827 6856 : range_bb = this_bb;
2828 6856 : idx = this_range->idx;
2829 : }
2830 : }
2831 : /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2832 : only defined in later blocks. In this case we can't move the
2833 : merged comparison earlier, so instead check if there are any stmts
2834 : that might trigger signed integer overflow in between and rewrite
2835 : them. But only after we check if the optimization is possible. */
2836 14492 : if (seq && swap_with)
2837 : {
2838 3469 : rewrite_bb_first = range_bb;
2839 3469 : rewrite_bb_last = orig_range_bb;
2840 3469 : idx = range->idx;
2841 3469 : swap_with = NULL;
2842 : }
2843 : }
2844 25013 : operand_entry *oe = (*ops)[idx];
2845 25013 : tree op = oe->op;
2846 25013 : gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2847 12487 : : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2848 25013 : location_t loc = gimple_location (stmt);
2849 25013 : tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2850 25013 : tree tem = build_range_check (loc, optype, unshare_expr (exp),
2851 : in_p, low, high);
2852 25013 : enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2853 25013 : gimple_stmt_iterator gsi;
2854 25013 : unsigned int i, uid;
2855 :
2856 25013 : if (tem == NULL_TREE)
2857 : return false;
2858 :
2859 : /* If op is default def SSA_NAME, there is no place to insert the
2860 : new comparison. Give up, unless we can use OP itself as the
2861 : range test. */
2862 37539 : if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2863 : {
2864 0 : if (op == range->exp
2865 0 : && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2866 0 : || TREE_CODE (optype) == BOOLEAN_TYPE)
2867 0 : && (op == tem
2868 0 : || (TREE_CODE (tem) == EQ_EXPR
2869 0 : && TREE_OPERAND (tem, 0) == op
2870 0 : && integer_onep (TREE_OPERAND (tem, 1))))
2871 0 : && opcode != BIT_IOR_EXPR
2872 0 : && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2873 : {
2874 : stmt = NULL;
2875 : tem = op;
2876 : }
2877 : else
2878 0 : return false;
2879 : }
2880 :
2881 25013 : if (swap_with)
2882 1817 : std::swap (range->idx, swap_with->idx);
2883 :
2884 25013 : if (strict_overflow_p && issue_strict_overflow_warning (wc))
2885 0 : warning_at (loc, OPT_Wstrict_overflow,
2886 : "assuming signed overflow does not occur "
2887 : "when simplifying range test");
2888 :
2889 25013 : if (dump_file && (dump_flags & TDF_DETAILS))
2890 : {
2891 39 : struct range_entry *r;
2892 39 : fprintf (dump_file, "Optimizing range tests ");
2893 39 : dump_range_entry (dump_file, range, false);
2894 180 : for (i = 0; i < count; i++)
2895 : {
2896 102 : if (otherrange)
2897 82 : r = otherrange + i;
2898 : else
2899 20 : r = otherrangep[i];
2900 102 : if (r->exp
2901 102 : && r->exp != range->exp
2902 20 : && TREE_CODE (r->exp) == SSA_NAME)
2903 : {
2904 20 : fprintf (dump_file, " and ");
2905 20 : dump_range_entry (dump_file, r, false);
2906 : }
2907 : else
2908 : {
2909 82 : fprintf (dump_file, " and");
2910 82 : dump_range_entry (dump_file, r, true);
2911 : }
2912 : }
2913 39 : fprintf (dump_file, "\n into ");
2914 39 : print_generic_expr (dump_file, tem);
2915 39 : fprintf (dump_file, "\n");
2916 : }
2917 :
2918 : /* In inter-bb range optimization mode, if we have a seq, we can't
2919 : move the merged comparison to the earliest bb from the comparisons
2920 : being replaced, so instead rewrite stmts that could trigger signed
2921 : integer overflow. */
2922 6417 : for (basic_block bb = rewrite_bb_last;
2923 31430 : bb != rewrite_bb_first; bb = single_pred (bb))
2924 12834 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2925 25682 : !gsi_end_p (gsi); gsi_next (&gsi))
2926 : {
2927 19265 : gimple *stmt = gsi_stmt (gsi);
2928 19265 : if (gimple_needing_rewrite_undefined (stmt))
2929 : {
2930 69 : gimple_stmt_iterator gsip = gsi;
2931 69 : gimple_stmt_iterator gsin = gsi;
2932 69 : gsi_prev (&gsip);
2933 69 : gsi_next (&gsin);
2934 69 : rewrite_to_defined_unconditional (&gsi);
2935 69 : unsigned uid = gimple_uid (stmt);
2936 69 : if (gsi_end_p (gsip))
2937 31 : gsip = gsi_after_labels (bb);
2938 : else
2939 38 : gsi_next (&gsip);
2940 276 : for (; gsi_stmt (gsip) != gsi_stmt (gsin);
2941 207 : gsi_next (&gsip))
2942 207 : gimple_set_uid (gsi_stmt (gsip), uid);
2943 : }
2944 : }
2945 :
2946 25013 : if (opcode == BIT_IOR_EXPR
2947 18961 : || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2948 17663 : tem = invert_truthvalue_loc (loc, tem);
2949 :
2950 25013 : tem = fold_convert_loc (loc, optype, tem);
2951 25013 : if (stmt)
2952 : {
2953 25013 : gsi = gsi_for_stmt (stmt);
2954 25013 : uid = gimple_uid (stmt);
2955 : }
2956 : else
2957 : {
2958 0 : gsi = gsi_none ();
2959 0 : uid = 0;
2960 : }
2961 25013 : if (stmt == NULL)
2962 0 : gcc_checking_assert (tem == op);
2963 : /* In rare cases range->exp can be equal to lhs of stmt.
2964 : In that case we have to insert after the stmt rather then before
2965 : it. If stmt is a PHI, insert it at the start of the basic block. */
2966 25013 : else if (op != range->exp)
2967 : {
2968 25013 : gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2969 25013 : tem = force_into_ssa_name (&gsi, tem, true);
2970 25013 : gsi_prev (&gsi);
2971 : }
2972 0 : else if (gimple_code (stmt) != GIMPLE_PHI)
2973 : {
2974 0 : gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2975 0 : tem = force_into_ssa_name (&gsi, tem, false);
2976 : }
2977 : else
2978 : {
2979 0 : gsi = gsi_after_labels (gimple_bb (stmt));
2980 0 : if (!gsi_end_p (gsi))
2981 0 : uid = gimple_uid (gsi_stmt (gsi));
2982 : else
2983 : {
2984 0 : gsi = gsi_start_bb (gimple_bb (stmt));
2985 0 : uid = 1;
2986 0 : while (!gsi_end_p (gsi))
2987 : {
2988 0 : uid = gimple_uid (gsi_stmt (gsi));
2989 0 : gsi_next (&gsi);
2990 : }
2991 : }
2992 0 : gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2993 0 : tem = force_into_ssa_name (&gsi, tem, true);
2994 0 : if (gsi_end_p (gsi))
2995 0 : gsi = gsi_last_bb (gimple_bb (stmt));
2996 : else
2997 25013 : gsi_prev (&gsi);
2998 : }
2999 155611 : for (; !gsi_end_p (gsi); gsi_prev (&gsi))
3000 86621 : if (gimple_uid (gsi_stmt (gsi)))
3001 : break;
3002 : else
3003 65299 : gimple_set_uid (gsi_stmt (gsi), uid);
3004 :
3005 25013 : oe->op = tem;
3006 25013 : range->exp = exp;
3007 25013 : range->low = low;
3008 25013 : range->high = high;
3009 25013 : range->in_p = in_p;
3010 25013 : range->strict_overflow_p = false;
3011 :
3012 54007 : for (i = 0; i < count; i++)
3013 : {
3014 28994 : if (otherrange)
3015 18374 : range = otherrange + i;
3016 : else
3017 10620 : range = otherrangep[i];
3018 28994 : oe = (*ops)[range->idx];
3019 : /* Now change all the other range test immediate uses, so that
3020 : those tests will be optimized away. */
3021 28994 : if (opcode == ERROR_MARK)
3022 : {
3023 17405 : if (oe->op)
3024 2133 : oe->op = build_int_cst (TREE_TYPE (oe->op),
3025 2133 : oe->rank == BIT_IOR_EXPR ? 0 : 1);
3026 : else
3027 15272 : oe->op = (oe->rank == BIT_IOR_EXPR
3028 15272 : ? boolean_false_node : boolean_true_node);
3029 : }
3030 : else
3031 11589 : oe->op = error_mark_node;
3032 28994 : range->exp = NULL_TREE;
3033 28994 : range->low = NULL_TREE;
3034 28994 : range->high = NULL_TREE;
3035 : }
3036 : return true;
3037 : }
3038 :
3039 : /* Optimize X == CST1 || X == CST2
3040 : if popcount (CST1 ^ CST2) == 1 into
3041 : (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3042 : Similarly for ranges. E.g.
3043 : X != 2 && X != 3 && X != 10 && X != 11
3044 : will be transformed by the previous optimization into
3045 : !((X - 2U) <= 1U || (X - 10U) <= 1U)
3046 : and this loop can transform that into
3047 : !(((X & ~8) - 2U) <= 1U). */
3048 :
3049 : static bool
3050 22554 : optimize_range_tests_xor (enum tree_code opcode, tree type,
3051 : tree lowi, tree lowj, tree highi, tree highj,
3052 : vec<operand_entry *> *ops,
3053 : struct range_entry *rangei,
3054 : struct range_entry *rangej)
3055 : {
3056 22554 : tree lowxor, highxor, tem, exp;
3057 : /* Check lowi ^ lowj == highi ^ highj and
3058 : popcount (lowi ^ lowj) == 1. */
3059 22554 : lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
3060 22554 : if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
3061 : return false;
3062 22554 : if (!integer_pow2p (lowxor))
3063 : return false;
3064 3100 : highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
3065 3100 : if (!tree_int_cst_equal (lowxor, highxor))
3066 : return false;
3067 :
3068 2705 : exp = rangei->exp;
3069 2705 : scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3070 2705 : int prec = GET_MODE_PRECISION (mode);
3071 2705 : if (TYPE_PRECISION (type) < prec
3072 2704 : || (wi::to_wide (TYPE_MIN_VALUE (type))
3073 8113 : != wi::min_value (prec, TYPE_SIGN (type)))
3074 8113 : || (wi::to_wide (TYPE_MAX_VALUE (type))
3075 8113 : != wi::max_value (prec, TYPE_SIGN (type))))
3076 : {
3077 1 : type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
3078 1 : exp = fold_convert (type, exp);
3079 1 : lowxor = fold_convert (type, lowxor);
3080 1 : lowi = fold_convert (type, lowi);
3081 1 : highi = fold_convert (type, highi);
3082 : }
3083 2705 : tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
3084 2705 : exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
3085 2705 : lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
3086 2705 : highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
3087 2705 : if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
3088 2705 : NULL, rangei->in_p, lowj, highj,
3089 2705 : rangei->strict_overflow_p
3090 2705 : || rangej->strict_overflow_p))
3091 : return true;
3092 : return false;
3093 : }
3094 :
3095 : /* Optimize X == CST1 || X == CST2
3096 : if popcount (CST2 - CST1) == 1 into
3097 : ((X - CST1) & ~(CST2 - CST1)) == 0.
3098 : Similarly for ranges. E.g.
3099 : X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3100 : || X == 75 || X == 45
3101 : will be transformed by the previous optimization into
3102 : (X - 43U) <= 3U || (X - 75U) <= 3U
3103 : and this loop can transform that into
3104 : ((X - 43U) & ~(75U - 43U)) <= 3U. */
3105 : static bool
3106 17471 : optimize_range_tests_diff (enum tree_code opcode, tree type,
3107 : tree lowi, tree lowj, tree highi, tree highj,
3108 : vec<operand_entry *> *ops,
3109 : struct range_entry *rangei,
3110 : struct range_entry *rangej)
3111 : {
3112 17471 : tree tem1, tem2, mask;
3113 : /* Check highi - lowi == highj - lowj. */
3114 17471 : tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3115 17471 : if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3116 : return false;
3117 17471 : tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3118 17471 : if (!tree_int_cst_equal (tem1, tem2))
3119 : return false;
3120 : /* Check popcount (lowj - lowi) == 1. */
3121 11678 : tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3122 11678 : if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3123 : return false;
3124 11678 : if (!integer_pow2p (tem1))
3125 : return false;
3126 :
3127 1937 : scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3128 1937 : int prec = GET_MODE_PRECISION (mode);
3129 1937 : if (TYPE_PRECISION (type) < prec
3130 1933 : || (wi::to_wide (TYPE_MIN_VALUE (type))
3131 5803 : != wi::min_value (prec, TYPE_SIGN (type)))
3132 5803 : || (wi::to_wide (TYPE_MAX_VALUE (type))
3133 5803 : != wi::max_value (prec, TYPE_SIGN (type))))
3134 4 : type = build_nonstandard_integer_type (prec, 1);
3135 : else
3136 1933 : type = unsigned_type_for (type);
3137 1937 : tem1 = fold_convert (type, tem1);
3138 1937 : tem2 = fold_convert (type, tem2);
3139 1937 : lowi = fold_convert (type, lowi);
3140 1937 : mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3141 1937 : tem1 = fold_build2 (MINUS_EXPR, type,
3142 : fold_convert (type, rangei->exp), lowi);
3143 1937 : tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3144 1937 : lowj = build_int_cst (type, 0);
3145 1937 : if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3146 1937 : NULL, rangei->in_p, lowj, tem2,
3147 1937 : rangei->strict_overflow_p
3148 1937 : || rangej->strict_overflow_p))
3149 : return true;
3150 : return false;
3151 : }
3152 :
3153 : /* It does some common checks for function optimize_range_tests_xor and
3154 : optimize_range_tests_diff.
3155 : If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3156 : Else it calls optimize_range_tests_diff. */
3157 :
3158 : static bool
3159 2093061 : optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3160 : bool optimize_xor, vec<operand_entry *> *ops,
3161 : struct range_entry *ranges)
3162 : {
3163 2093061 : int i, j;
3164 2093061 : bool any_changes = false;
3165 3667979 : for (i = first; i < length; i++)
3166 : {
3167 1574918 : tree lowi, highi, lowj, highj, type, tem;
3168 :
3169 1574918 : if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3170 974755 : continue;
3171 600163 : type = TREE_TYPE (ranges[i].exp);
3172 600163 : if (!INTEGRAL_TYPE_P (type))
3173 48346 : continue;
3174 551817 : lowi = ranges[i].low;
3175 551817 : if (lowi == NULL_TREE)
3176 35236 : lowi = TYPE_MIN_VALUE (type);
3177 551817 : highi = ranges[i].high;
3178 551817 : if (highi == NULL_TREE)
3179 6830 : continue;
3180 910906 : for (j = i + 1; j < length && j < i + 64; j++)
3181 : {
3182 370561 : bool changes;
3183 370561 : if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3184 330536 : continue;
3185 40025 : lowj = ranges[j].low;
3186 40025 : if (lowj == NULL_TREE)
3187 0 : continue;
3188 40025 : highj = ranges[j].high;
3189 40025 : if (highj == NULL_TREE)
3190 152 : highj = TYPE_MAX_VALUE (type);
3191 : /* Check lowj > highi. */
3192 40025 : tem = fold_binary (GT_EXPR, boolean_type_node,
3193 : lowj, highi);
3194 40025 : if (tem == NULL_TREE || !integer_onep (tem))
3195 0 : continue;
3196 40025 : if (optimize_xor)
3197 22554 : changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3198 : highi, highj, ops,
3199 : ranges + i, ranges + j);
3200 : else
3201 17471 : changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3202 : highi, highj, ops,
3203 : ranges + i, ranges + j);
3204 40025 : if (changes)
3205 : {
3206 : any_changes = true;
3207 : break;
3208 : }
3209 : }
3210 : }
3211 2093061 : return any_changes;
3212 : }
3213 :
3214 : /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3215 : range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3216 : EXP on success, NULL otherwise. */
3217 :
3218 : static tree
3219 166036 : extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3220 : wide_int *mask, tree *totallowp)
3221 : {
3222 166036 : tree tem = int_const_binop (MINUS_EXPR, high, low);
3223 166036 : if (tem == NULL_TREE
3224 166036 : || TREE_CODE (tem) != INTEGER_CST
3225 166036 : || TREE_OVERFLOW (tem)
3226 155102 : || tree_int_cst_sgn (tem) == -1
3227 321138 : || compare_tree_int (tem, prec) != -1)
3228 14690 : return NULL_TREE;
3229 :
3230 151346 : unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3231 151346 : *mask = wi::shifted_mask (0, max, false, prec);
3232 151346 : if (TREE_CODE (exp) == BIT_AND_EXPR
3233 151346 : && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3234 : {
3235 4844 : widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3236 4844 : msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3237 4844 : if (wi::popcount (msk) == 1
3238 4844 : && wi::ltu_p (msk, prec - max))
3239 : {
3240 4067 : *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3241 4067 : max += msk.to_uhwi ();
3242 4067 : exp = TREE_OPERAND (exp, 0);
3243 4067 : if (integer_zerop (low)
3244 2138 : && TREE_CODE (exp) == PLUS_EXPR
3245 5851 : && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3246 : {
3247 1784 : tree ret = TREE_OPERAND (exp, 0);
3248 1784 : STRIP_NOPS (ret);
3249 1784 : widest_int bias
3250 1784 : = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3251 3568 : TYPE_PRECISION (TREE_TYPE (low))));
3252 1784 : tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3253 1784 : if (totallowp)
3254 : {
3255 1752 : *totallowp = tbias;
3256 1752 : return ret;
3257 : }
3258 32 : else if (!tree_int_cst_lt (totallow, tbias))
3259 : return NULL_TREE;
3260 32 : bias = wi::to_widest (tbias);
3261 32 : bias -= wi::to_widest (totallow);
3262 32 : if (bias >= 0 && bias < prec - max)
3263 : {
3264 22 : *mask = wi::lshift (*mask, bias);
3265 22 : return ret;
3266 : }
3267 1784 : }
3268 : }
3269 4844 : }
3270 149572 : if (totallowp)
3271 : return exp;
3272 13461 : if (!tree_int_cst_lt (totallow, low))
3273 : return exp;
3274 13439 : tem = int_const_binop (MINUS_EXPR, low, totallow);
3275 13439 : if (tem == NULL_TREE
3276 13439 : || TREE_CODE (tem) != INTEGER_CST
3277 13439 : || TREE_OVERFLOW (tem)
3278 26721 : || compare_tree_int (tem, prec - max) == 1)
3279 3866 : return NULL_TREE;
3280 :
3281 9573 : *mask = wi::lshift (*mask, wi::to_widest (tem));
3282 9573 : return exp;
3283 : }
3284 :
3285 : /* Attempt to optimize small range tests using bit test.
3286 : E.g.
3287 : X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3288 : && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3289 : has been by earlier optimizations optimized into:
3290 : ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3291 : As all the 43 through 82 range is less than 64 numbers,
3292 : for 64-bit word targets optimize that into:
3293 : (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3294 :
3295 : static bool
3296 1046539 : optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3297 : vec<operand_entry *> *ops,
3298 : struct range_entry *ranges)
3299 : {
3300 1046539 : int i, j;
3301 1046539 : bool any_changes = false;
3302 1046539 : int prec = GET_MODE_BITSIZE (word_mode);
3303 1046539 : auto_vec<struct range_entry *, 64> candidates;
3304 :
3305 1473128 : for (i = first; i < length - 1; i++)
3306 : {
3307 426589 : tree lowi, highi, lowj, highj, type;
3308 :
3309 426589 : if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3310 288726 : continue;
3311 171387 : type = TREE_TYPE (ranges[i].exp);
3312 171387 : if (!INTEGRAL_TYPE_P (type))
3313 16539 : continue;
3314 154848 : lowi = ranges[i].low;
3315 154848 : if (lowi == NULL_TREE)
3316 11040 : lowi = TYPE_MIN_VALUE (type);
3317 154848 : highi = ranges[i].high;
3318 154848 : if (highi == NULL_TREE)
3319 2435 : continue;
3320 152413 : wide_int mask;
3321 152413 : tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3322 : highi, &mask, &lowi);
3323 152413 : if (exp == NULL_TREE)
3324 14550 : continue;
3325 137863 : bool strict_overflow_p = ranges[i].strict_overflow_p;
3326 137863 : candidates.truncate (0);
3327 137863 : int end = MIN (i + 64, length);
3328 298294 : for (j = i + 1; j < end; j++)
3329 : {
3330 160431 : tree exp2;
3331 160431 : if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3332 150848 : continue;
3333 92335 : if (ranges[j].exp == exp)
3334 : ;
3335 78987 : else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3336 : {
3337 1234 : exp2 = TREE_OPERAND (ranges[j].exp, 0);
3338 1234 : if (exp2 == exp)
3339 : ;
3340 1003 : else if (TREE_CODE (exp2) == PLUS_EXPR)
3341 : {
3342 810 : exp2 = TREE_OPERAND (exp2, 0);
3343 810 : STRIP_NOPS (exp2);
3344 810 : if (exp2 != exp)
3345 766 : continue;
3346 : }
3347 : else
3348 193 : continue;
3349 : }
3350 : else
3351 77753 : continue;
3352 13623 : lowj = ranges[j].low;
3353 13623 : if (lowj == NULL_TREE)
3354 0 : continue;
3355 13623 : highj = ranges[j].high;
3356 13623 : if (highj == NULL_TREE)
3357 76 : highj = TYPE_MAX_VALUE (TREE_TYPE (lowj));
3358 13623 : wide_int mask2;
3359 13623 : exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3360 : highj, &mask2, NULL);
3361 13623 : if (exp2 != exp)
3362 4040 : continue;
3363 9583 : mask |= mask2;
3364 9583 : strict_overflow_p |= ranges[j].strict_overflow_p;
3365 9583 : candidates.safe_push (&ranges[j]);
3366 13623 : }
3367 :
3368 : /* If every possible relative value of the expression is a valid shift
3369 : amount, then we can merge the entry test in the bit test. In this
3370 : case, if we would need otherwise 2 or more comparisons, then use
3371 : the bit test; in the other cases, the threshold is 3 comparisons. */
3372 137863 : bool entry_test_needed;
3373 137863 : int_range_max r;
3374 275726 : if (TREE_CODE (exp) == SSA_NAME
3375 274286 : && get_range_query (cfun)->range_of_expr (r, exp)
3376 137143 : && !r.undefined_p ()
3377 137143 : && !r.varying_p ()
3378 316319 : && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3379 : {
3380 5018 : wide_int min = r.lower_bound ();
3381 5018 : wide_int ilowi = wi::to_wide (lowi);
3382 5018 : if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3383 : {
3384 504 : lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3385 504 : mask = wi::lshift (mask, ilowi - min);
3386 : }
3387 4514 : else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3388 : {
3389 1 : lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3390 1 : mask = wi::lrshift (mask, min - ilowi);
3391 : }
3392 5018 : entry_test_needed = false;
3393 5018 : }
3394 : else
3395 : entry_test_needed = true;
3396 280744 : if (candidates.length () >= (entry_test_needed ? 2 : 1))
3397 : {
3398 852 : tree high = wide_int_to_tree (TREE_TYPE (lowi),
3399 426 : wi::to_widest (lowi)
3400 1278 : + prec - 1 - wi::clz (mask));
3401 426 : operand_entry *oe = (*ops)[ranges[i].idx];
3402 426 : tree op = oe->op;
3403 426 : gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3404 52 : : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3405 374 : (cfun, oe->id));
3406 426 : location_t loc = gimple_location (stmt);
3407 426 : tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3408 :
3409 : /* See if it isn't cheaper to pretend the minimum value of the
3410 : range is 0, if maximum value is small enough.
3411 : We can avoid then subtraction of the minimum value, but the
3412 : mask constant could be perhaps more expensive. */
3413 426 : if (compare_tree_int (lowi, 0) > 0
3414 340 : && compare_tree_int (high, prec) < 0
3415 866 : && (entry_test_needed || wi::ltu_p (r.upper_bound (), prec)))
3416 : {
3417 139 : int cost_diff;
3418 139 : HOST_WIDE_INT m = tree_to_uhwi (lowi);
3419 139 : rtx reg = gen_raw_REG (word_mode, 10000);
3420 139 : bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3421 139 : cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3422 : GEN_INT (-m)),
3423 : word_mode, speed_p);
3424 139 : rtx r = immed_wide_int_const (mask, word_mode);
3425 139 : cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3426 : word_mode, speed_p);
3427 139 : r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3428 139 : cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3429 : word_mode, speed_p);
3430 139 : if (cost_diff > 0)
3431 : {
3432 58 : mask = wi::lshift (mask, m);
3433 58 : lowi = build_zero_cst (TREE_TYPE (lowi));
3434 : }
3435 : }
3436 :
3437 426 : tree tem;
3438 426 : if (entry_test_needed)
3439 : {
3440 365 : tem = build_range_check (loc, optype, unshare_expr (exp),
3441 : false, lowi, high);
3442 365 : if (tem == NULL_TREE || is_gimple_val (tem))
3443 0 : continue;
3444 : }
3445 : else
3446 61 : tem = NULL_TREE;
3447 426 : tree etype = unsigned_type_for (TREE_TYPE (exp));
3448 426 : exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3449 : fold_convert_loc (loc, etype, exp),
3450 : fold_convert_loc (loc, etype, lowi));
3451 426 : exp = fold_convert_loc (loc, integer_type_node, exp);
3452 426 : tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3453 426 : exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3454 : build_int_cst (word_type, 1), exp);
3455 852 : exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3456 426 : wide_int_to_tree (word_type, mask));
3457 426 : exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3458 : build_zero_cst (word_type));
3459 426 : if (is_gimple_val (exp))
3460 0 : continue;
3461 :
3462 : /* The shift might have undefined behavior if TEM is true,
3463 : but reassociate_bb isn't prepared to have basic blocks
3464 : split when it is running. So, temporarily emit a code
3465 : with BIT_IOR_EXPR instead of &&, and fix it up in
3466 : branch_fixup. */
3467 426 : gimple_seq seq = NULL;
3468 426 : if (tem)
3469 : {
3470 365 : tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3471 365 : gcc_assert (TREE_CODE (tem) == SSA_NAME);
3472 365 : gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3473 : }
3474 426 : gimple_seq seq2;
3475 426 : exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3476 426 : gimple_seq_add_seq_without_update (&seq, seq2);
3477 426 : gcc_assert (TREE_CODE (exp) == SSA_NAME);
3478 426 : gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3479 426 : if (tem)
3480 : {
3481 365 : gimple *g = gimple_build_assign (make_ssa_name (optype),
3482 : BIT_IOR_EXPR, tem, exp);
3483 365 : gimple_set_location (g, loc);
3484 365 : gimple_seq_add_stmt_without_update (&seq, g);
3485 365 : exp = gimple_assign_lhs (g);
3486 : }
3487 426 : tree val = build_zero_cst (optype);
3488 1278 : if (update_range_test (&ranges[i], NULL, candidates.address (),
3489 : candidates.length (), opcode, ops, exp,
3490 : seq, false, val, val, strict_overflow_p))
3491 : {
3492 426 : any_changes = true;
3493 426 : if (tem)
3494 365 : reassoc_branch_fixups.safe_push (tem);
3495 : }
3496 : else
3497 0 : gimple_seq_discard (seq);
3498 : }
3499 152413 : }
3500 1046539 : return any_changes;
3501 1046539 : }
3502 :
3503 : /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3504 : and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3505 : Also, handle x < C && y < C && z < C where C is power of two as
3506 : (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3507 : as (x | y | z) < 0. */
3508 :
3509 : static bool
3510 1046539 : optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3511 : vec<operand_entry *> *ops,
3512 : struct range_entry *ranges)
3513 : {
3514 1046539 : int i;
3515 1046539 : unsigned int b;
3516 1046539 : bool any_changes = false;
3517 1046539 : auto_vec<int, 128> buckets;
3518 1046539 : auto_vec<int, 32> chains;
3519 1046539 : auto_vec<struct range_entry *, 32> candidates;
3520 :
3521 1834005 : for (i = first; i < length; i++)
3522 : {
3523 787466 : int idx;
3524 :
3525 1143972 : if (ranges[i].exp == NULL_TREE
3526 767630 : || TREE_CODE (ranges[i].exp) != SSA_NAME
3527 763099 : || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3528 1218426 : || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3529 356506 : continue;
3530 :
3531 430960 : if (ranges[i].low != NULL_TREE
3532 404255 : && ranges[i].high != NULL_TREE
3533 349032 : && ranges[i].in_p
3534 629021 : && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3535 : {
3536 169780 : idx = !integer_zerop (ranges[i].low);
3537 169780 : if (idx && !integer_all_onesp (ranges[i].low))
3538 95415 : continue;
3539 : }
3540 261180 : else if (ranges[i].high != NULL_TREE
3541 205922 : && TREE_CODE (ranges[i].high) == INTEGER_CST
3542 205922 : && ranges[i].in_p)
3543 : {
3544 37368 : wide_int w = wi::to_wide (ranges[i].high);
3545 37368 : int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3546 37368 : int l = wi::clz (w);
3547 37368 : idx = 2;
3548 101742 : if (l <= 0
3549 37368 : || l >= prec
3550 68684 : || w != wi::mask (prec - l, false, prec))
3551 27006 : continue;
3552 10362 : if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3553 6400 : && ranges[i].low == NULL_TREE)
3554 10362 : || (ranges[i].low
3555 8443 : && integer_zerop (ranges[i].low))))
3556 3537 : continue;
3557 37368 : }
3558 423452 : else if (ranges[i].high == NULL_TREE
3559 55258 : && ranges[i].low != NULL_TREE
3560 : /* Perform this optimization only in the last
3561 : reassoc pass, as it interferes with the reassociation
3562 : itself or could also with VRP etc. which might not
3563 : be able to virtually undo the optimization. */
3564 55223 : && !reassoc_insert_powi_p
3565 27881 : && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3566 251182 : && integer_zerop (ranges[i].low))
3567 : idx = 3;
3568 : else
3569 199640 : continue;
3570 :
3571 105362 : b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3572 105362 : if (buckets.length () <= b)
3573 91563 : buckets.safe_grow_cleared (b + 1, true);
3574 105362 : if (chains.length () <= (unsigned) i)
3575 105362 : chains.safe_grow (i + 1, true);
3576 105362 : chains[i] = buckets[b];
3577 105362 : buckets[b] = i + 1;
3578 : }
3579 :
3580 16770178 : FOR_EACH_VEC_ELT (buckets, b, i)
3581 15723639 : if (i && chains[i - 1])
3582 : {
3583 6783 : int j, k = i;
3584 6783 : if ((b % 4) == 2)
3585 : {
3586 : /* When ranges[X - 1].high + 1 is a power of two,
3587 : we need to process the same bucket up to
3588 : precision - 1 times, each time split the entries
3589 : with the same high bound into one chain and the
3590 : rest into another one to be processed later. */
3591 : int this_prev = i;
3592 : int other_prev = 0;
3593 146 : for (j = chains[i - 1]; j; j = chains[j - 1])
3594 : {
3595 79 : if (tree_int_cst_equal (ranges[i - 1].high,
3596 79 : ranges[j - 1].high))
3597 : {
3598 70 : chains[this_prev - 1] = j;
3599 70 : this_prev = j;
3600 : }
3601 9 : else if (other_prev == 0)
3602 : {
3603 7 : buckets[b] = j;
3604 7 : other_prev = j;
3605 : }
3606 : else
3607 : {
3608 2 : chains[other_prev - 1] = j;
3609 2 : other_prev = j;
3610 : }
3611 : }
3612 67 : chains[this_prev - 1] = 0;
3613 67 : if (other_prev)
3614 7 : chains[other_prev - 1] = 0;
3615 67 : if (chains[i - 1] == 0)
3616 : {
3617 5 : if (other_prev)
3618 5 : b--;
3619 5 : continue;
3620 : }
3621 : }
3622 16461 : for (j = chains[i - 1]; j; j = chains[j - 1])
3623 : {
3624 9683 : gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3625 9683 : gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3626 9683 : if (reassoc_stmt_dominates_stmt_p (gk, gj))
3627 2639 : k = j;
3628 : }
3629 6778 : tree type1 = TREE_TYPE (ranges[k - 1].exp);
3630 6778 : tree type2 = NULL_TREE;
3631 6778 : bool strict_overflow_p = false;
3632 6778 : candidates.truncate (0);
3633 6778 : if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3634 677 : type1 = pointer_sized_int_node;
3635 23239 : for (j = i; j; j = chains[j - 1])
3636 : {
3637 16461 : tree type = TREE_TYPE (ranges[j - 1].exp);
3638 16461 : strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3639 16461 : if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3640 1369 : type = pointer_sized_int_node;
3641 16461 : if ((b % 4) == 3)
3642 : {
3643 : /* For the signed < 0 cases, the types should be
3644 : really compatible (all signed with the same precision,
3645 : instead put ranges that have different in_p from
3646 : k first. */
3647 3605 : if (!useless_type_conversion_p (type1, type))
3648 0 : continue;
3649 3605 : if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3650 1009 : candidates.safe_push (&ranges[j - 1]);
3651 3605 : type2 = type1;
3652 3605 : continue;
3653 : }
3654 12856 : if (j == k
3655 12856 : || useless_type_conversion_p (type1, type))
3656 : ;
3657 427 : else if (type2 == NULL_TREE
3658 427 : || useless_type_conversion_p (type2, type))
3659 : {
3660 427 : if (type2 == NULL_TREE)
3661 416 : type2 = type;
3662 427 : candidates.safe_push (&ranges[j - 1]);
3663 : }
3664 : }
3665 6778 : unsigned l = candidates.length ();
3666 23239 : for (j = i; j; j = chains[j - 1])
3667 : {
3668 16461 : tree type = TREE_TYPE (ranges[j - 1].exp);
3669 16461 : if (j == k)
3670 6778 : continue;
3671 9683 : if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3672 692 : type = pointer_sized_int_node;
3673 9683 : if ((b % 4) == 3)
3674 : {
3675 1993 : if (!useless_type_conversion_p (type1, type))
3676 0 : continue;
3677 1993 : if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3678 984 : candidates.safe_push (&ranges[j - 1]);
3679 1993 : continue;
3680 : }
3681 7690 : if (useless_type_conversion_p (type1, type))
3682 : ;
3683 854 : else if (type2 == NULL_TREE
3684 427 : || useless_type_conversion_p (type2, type))
3685 427 : continue;
3686 7263 : candidates.safe_push (&ranges[j - 1]);
3687 : }
3688 6778 : gimple_seq seq = NULL;
3689 6778 : tree op = NULL_TREE;
3690 6778 : unsigned int id;
3691 6778 : struct range_entry *r;
3692 6778 : candidates.safe_push (&ranges[k - 1]);
3693 23239 : FOR_EACH_VEC_ELT (candidates, id, r)
3694 : {
3695 16461 : gimple *g;
3696 16461 : enum tree_code code;
3697 16461 : if (id == 0)
3698 : {
3699 6778 : op = r->exp;
3700 6778 : continue;
3701 : }
3702 9683 : if (id == l
3703 8258 : || POINTER_TYPE_P (TREE_TYPE (op))
3704 17370 : || TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3705 : {
3706 2001 : code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3707 2001 : tree type3 = id >= l ? type1 : pointer_sized_int_node;
3708 2001 : if (code == BIT_NOT_EXPR
3709 2001 : && TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3710 : {
3711 0 : g = gimple_build_assign (make_ssa_name (type3),
3712 : NOP_EXPR, op);
3713 0 : gimple_seq_add_stmt_without_update (&seq, g);
3714 0 : op = gimple_assign_lhs (g);
3715 : }
3716 2001 : g = gimple_build_assign (make_ssa_name (type3), code, op);
3717 2001 : gimple_seq_add_stmt_without_update (&seq, g);
3718 2001 : op = gimple_assign_lhs (g);
3719 : }
3720 9683 : tree type = TREE_TYPE (r->exp);
3721 9683 : tree exp = r->exp;
3722 9683 : if (POINTER_TYPE_P (type)
3723 8969 : || TREE_CODE (type) == OFFSET_TYPE
3724 18647 : || (id >= l && !useless_type_conversion_p (type1, type)))
3725 : {
3726 719 : tree type3 = id >= l ? type1 : pointer_sized_int_node;
3727 719 : g = gimple_build_assign (make_ssa_name (type3), NOP_EXPR, exp);
3728 719 : gimple_seq_add_stmt_without_update (&seq, g);
3729 719 : exp = gimple_assign_lhs (g);
3730 : }
3731 9683 : if ((b % 4) == 3)
3732 3267 : code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3733 : else
3734 7690 : code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3735 19366 : g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3736 : code, op, exp);
3737 9683 : gimple_seq_add_stmt_without_update (&seq, g);
3738 9683 : op = gimple_assign_lhs (g);
3739 : }
3740 6778 : type1 = TREE_TYPE (ranges[k - 1].exp);
3741 6778 : if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3742 : {
3743 677 : gimple *g
3744 677 : = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3745 677 : gimple_seq_add_stmt_without_update (&seq, g);
3746 677 : op = gimple_assign_lhs (g);
3747 : }
3748 6778 : candidates.pop ();
3749 6778 : if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3750 : candidates.length (), opcode, ops, op,
3751 6778 : seq, ranges[k - 1].in_p, ranges[k - 1].low,
3752 : ranges[k - 1].high, strict_overflow_p))
3753 : any_changes = true;
3754 : else
3755 0 : gimple_seq_discard (seq);
3756 6840 : if ((b % 4) == 2 && buckets[b] != i)
3757 : /* There is more work to do for this bucket. */
3758 2 : b--;
3759 : }
3760 :
3761 1046539 : return any_changes;
3762 1046539 : }
3763 :
3764 : /* Attempt to optimize for signed a and b where b is known to be >= 0:
3765 : a >= 0 && a < b into (unsigned) a < (unsigned) b
3766 : a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3767 :
3768 : static bool
3769 1046539 : optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3770 : vec<operand_entry *> *ops,
3771 : struct range_entry *ranges,
3772 : basic_block first_bb)
3773 : {
3774 1046539 : int i;
3775 1046539 : bool any_changes = false;
3776 1046539 : hash_map<tree, int> *map = NULL;
3777 :
3778 1834005 : for (i = first; i < length; i++)
3779 : {
3780 787466 : if (ranges[i].exp == NULL_TREE
3781 768155 : || TREE_CODE (ranges[i].exp) != SSA_NAME
3782 763624 : || !ranges[i].in_p)
3783 317491 : continue;
3784 :
3785 469975 : tree type = TREE_TYPE (ranges[i].exp);
3786 893935 : if (!INTEGRAL_TYPE_P (type)
3787 461334 : || TYPE_UNSIGNED (type)
3788 176129 : || ranges[i].low == NULL_TREE
3789 167042 : || !integer_zerop (ranges[i].low)
3790 543958 : || ranges[i].high != NULL_TREE)
3791 423960 : continue;
3792 : /* EXP >= 0 here. */
3793 46015 : if (map == NULL)
3794 44433 : map = new hash_map <tree, int>;
3795 46015 : map->put (ranges[i].exp, i);
3796 : }
3797 :
3798 1046539 : if (map == NULL)
3799 : return false;
3800 :
3801 136947 : for (i = 0; i < length; i++)
3802 : {
3803 92514 : bool in_p = ranges[i].in_p;
3804 92514 : if (ranges[i].low == NULL_TREE
3805 91813 : || ranges[i].high == NULL_TREE)
3806 91989 : continue;
3807 44004 : if (!integer_zerop (ranges[i].low)
3808 44004 : || !integer_zerop (ranges[i].high))
3809 : {
3810 8888 : if (ranges[i].exp
3811 4444 : && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3812 0 : && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3813 0 : && integer_onep (ranges[i].low)
3814 4444 : && integer_onep (ranges[i].high))
3815 0 : in_p = !in_p;
3816 : else
3817 4444 : continue;
3818 : }
3819 :
3820 39560 : gimple *stmt;
3821 39560 : tree_code ccode;
3822 39560 : tree rhs1, rhs2;
3823 39560 : if (ranges[i].exp)
3824 : {
3825 38553 : if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3826 5 : continue;
3827 38548 : stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3828 38548 : if (!is_gimple_assign (stmt))
3829 858 : continue;
3830 37690 : ccode = gimple_assign_rhs_code (stmt);
3831 37690 : rhs1 = gimple_assign_rhs1 (stmt);
3832 37690 : rhs2 = gimple_assign_rhs2 (stmt);
3833 : }
3834 : else
3835 : {
3836 1007 : operand_entry *oe = (*ops)[ranges[i].idx];
3837 1007 : stmt = last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3838 1007 : if (gimple_code (stmt) != GIMPLE_COND)
3839 0 : continue;
3840 1007 : ccode = gimple_cond_code (stmt);
3841 1007 : rhs1 = gimple_cond_lhs (stmt);
3842 1007 : rhs2 = gimple_cond_rhs (stmt);
3843 : }
3844 :
3845 38697 : if (TREE_CODE (rhs1) != SSA_NAME
3846 38155 : || rhs2 == NULL_TREE
3847 38075 : || TREE_CODE (rhs2) != SSA_NAME)
3848 699 : continue;
3849 :
3850 37998 : switch (ccode)
3851 : {
3852 36904 : case GT_EXPR:
3853 36904 : case GE_EXPR:
3854 36904 : case LT_EXPR:
3855 36904 : case LE_EXPR:
3856 36904 : break;
3857 1094 : default:
3858 1094 : continue;
3859 : }
3860 36904 : if (in_p)
3861 801 : ccode = invert_tree_comparison (ccode, false);
3862 36904 : switch (ccode)
3863 : {
3864 15009 : case GT_EXPR:
3865 15009 : case GE_EXPR:
3866 15009 : std::swap (rhs1, rhs2);
3867 15009 : ccode = swap_tree_comparison (ccode);
3868 15009 : break;
3869 : case LT_EXPR:
3870 : case LE_EXPR:
3871 : break;
3872 0 : default:
3873 0 : gcc_unreachable ();
3874 : }
3875 :
3876 36904 : int *idx = map->get (rhs1);
3877 36904 : if (idx == NULL)
3878 930 : continue;
3879 :
3880 : /* maybe_optimize_range_tests allows statements without side-effects
3881 : in the basic blocks as long as they are consumed in the same bb.
3882 : Make sure rhs2's def stmt is not among them, otherwise we can't
3883 : use safely get_nonzero_bits on it. E.g. in:
3884 : # RANGE [-83, 1] NONZERO 173
3885 : # k_32 = PHI <k_47(13), k_12(9)>
3886 : ...
3887 : if (k_32 >= 0)
3888 : goto <bb 5>; [26.46%]
3889 : else
3890 : goto <bb 9>; [73.54%]
3891 :
3892 : <bb 5> [local count: 140323371]:
3893 : # RANGE [0, 1] NONZERO 1
3894 : _5 = (int) k_32;
3895 : # RANGE [0, 4] NONZERO 4
3896 : _21 = _5 << 2;
3897 : # RANGE [0, 4] NONZERO 4
3898 : iftmp.0_44 = (char) _21;
3899 : if (k_32 < iftmp.0_44)
3900 : goto <bb 6>; [84.48%]
3901 : else
3902 : goto <bb 9>; [15.52%]
3903 : the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3904 : k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3905 : to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3906 : those stmts even for negative k_32 and the value ranges would be no
3907 : longer guaranteed and so the optimization would be invalid. */
3908 35974 : while (opcode == ERROR_MARK)
3909 : {
3910 605 : gimple *g = SSA_NAME_DEF_STMT (rhs2);
3911 605 : basic_block bb2 = gimple_bb (g);
3912 605 : if (bb2
3913 605 : && bb2 != first_bb
3914 605 : && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3915 : {
3916 : /* As an exception, handle a few common cases. */
3917 512 : if (gimple_assign_cast_p (g)
3918 512 : && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3919 : {
3920 40 : tree op0 = gimple_assign_rhs1 (g);
3921 40 : if (TYPE_UNSIGNED (TREE_TYPE (op0))
3922 40 : && (TYPE_PRECISION (TREE_TYPE (rhs2))
3923 10 : > TYPE_PRECISION (TREE_TYPE (op0))))
3924 : /* Zero-extension is always ok. */
3925 : break;
3926 30 : else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3927 30 : == TYPE_PRECISION (TREE_TYPE (op0))
3928 30 : && TREE_CODE (op0) == SSA_NAME)
3929 : {
3930 : /* Cast from signed to unsigned or vice versa. Retry
3931 : with the op0 as new rhs2. */
3932 0 : rhs2 = op0;
3933 0 : continue;
3934 : }
3935 : }
3936 472 : else if (is_gimple_assign (g)
3937 472 : && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3938 0 : && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3939 944 : && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3940 : /* Masking with INTEGER_CST with MSB clear is always ok
3941 : too. */
3942 : break;
3943 : rhs2 = NULL_TREE;
3944 : }
3945 : break;
3946 : }
3947 35472 : if (rhs2 == NULL_TREE)
3948 502 : continue;
3949 :
3950 35997 : wide_int nz = get_nonzero_bits (rhs2);
3951 35472 : if (wi::neg_p (nz))
3952 34947 : continue;
3953 :
3954 : /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3955 : and RHS2 is known to be RHS2 >= 0. */
3956 525 : tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3957 :
3958 525 : enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3959 525 : if ((ranges[*idx].strict_overflow_p
3960 525 : || ranges[i].strict_overflow_p)
3961 0 : && issue_strict_overflow_warning (wc))
3962 0 : warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3963 : "assuming signed overflow does not occur "
3964 : "when simplifying range test");
3965 :
3966 525 : if (dump_file && (dump_flags & TDF_DETAILS))
3967 : {
3968 7 : struct range_entry *r = &ranges[*idx];
3969 7 : fprintf (dump_file, "Optimizing range test ");
3970 7 : print_generic_expr (dump_file, r->exp);
3971 7 : fprintf (dump_file, " +[");
3972 7 : print_generic_expr (dump_file, r->low);
3973 7 : fprintf (dump_file, ", ");
3974 7 : print_generic_expr (dump_file, r->high);
3975 7 : fprintf (dump_file, "] and comparison ");
3976 7 : print_generic_expr (dump_file, rhs1);
3977 7 : fprintf (dump_file, " %s ", op_symbol_code (ccode));
3978 7 : print_generic_expr (dump_file, rhs2);
3979 7 : fprintf (dump_file, "\n into (");
3980 7 : print_generic_expr (dump_file, utype);
3981 7 : fprintf (dump_file, ") ");
3982 7 : print_generic_expr (dump_file, rhs1);
3983 7 : fprintf (dump_file, " %s (", op_symbol_code (ccode));
3984 7 : print_generic_expr (dump_file, utype);
3985 7 : fprintf (dump_file, ") ");
3986 7 : print_generic_expr (dump_file, rhs2);
3987 7 : fprintf (dump_file, "\n");
3988 : }
3989 :
3990 525 : operand_entry *oe = (*ops)[ranges[i].idx];
3991 525 : ranges[i].in_p = 0;
3992 525 : if (opcode == BIT_IOR_EXPR
3993 486 : || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3994 : {
3995 40 : ranges[i].in_p = 1;
3996 40 : ccode = invert_tree_comparison (ccode, false);
3997 : }
3998 :
3999 525 : unsigned int uid = gimple_uid (stmt);
4000 525 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4001 525 : gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
4002 525 : gimple_set_uid (g, uid);
4003 525 : rhs1 = gimple_assign_lhs (g);
4004 525 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4005 525 : if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
4006 : {
4007 525 : g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
4008 525 : gimple_set_uid (g, uid);
4009 525 : rhs2 = gimple_assign_lhs (g);
4010 525 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4011 : }
4012 525 : if (tree_swap_operands_p (rhs1, rhs2))
4013 : {
4014 486 : std::swap (rhs1, rhs2);
4015 486 : ccode = swap_tree_comparison (ccode);
4016 : }
4017 525 : if (gimple_code (stmt) == GIMPLE_COND)
4018 : {
4019 8 : gcond *c = as_a <gcond *> (stmt);
4020 8 : gimple_cond_set_code (c, ccode);
4021 8 : gimple_cond_set_lhs (c, rhs1);
4022 8 : gimple_cond_set_rhs (c, rhs2);
4023 8 : update_stmt (stmt);
4024 : }
4025 : else
4026 : {
4027 517 : tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
4028 517 : if (!INTEGRAL_TYPE_P (ctype)
4029 517 : || (TREE_CODE (ctype) != BOOLEAN_TYPE
4030 3 : && TYPE_PRECISION (ctype) != 1))
4031 3 : ctype = boolean_type_node;
4032 517 : g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
4033 517 : gimple_set_uid (g, uid);
4034 517 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4035 517 : if (oe->op && ctype != TREE_TYPE (oe->op))
4036 : {
4037 3 : g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
4038 : NOP_EXPR, gimple_assign_lhs (g));
4039 3 : gimple_set_uid (g, uid);
4040 3 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4041 : }
4042 517 : ranges[i].exp = gimple_assign_lhs (g);
4043 517 : oe->op = ranges[i].exp;
4044 517 : ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
4045 517 : ranges[i].high = ranges[i].low;
4046 : }
4047 525 : ranges[i].strict_overflow_p = false;
4048 525 : oe = (*ops)[ranges[*idx].idx];
4049 : /* Now change all the other range test immediate uses, so that
4050 : those tests will be optimized away. */
4051 525 : if (opcode == ERROR_MARK)
4052 : {
4053 11 : if (oe->op)
4054 3 : oe->op = build_int_cst (TREE_TYPE (oe->op),
4055 3 : oe->rank == BIT_IOR_EXPR ? 0 : 1);
4056 : else
4057 8 : oe->op = (oe->rank == BIT_IOR_EXPR
4058 8 : ? boolean_false_node : boolean_true_node);
4059 : }
4060 : else
4061 514 : oe->op = error_mark_node;
4062 525 : ranges[*idx].exp = NULL_TREE;
4063 525 : ranges[*idx].low = NULL_TREE;
4064 525 : ranges[*idx].high = NULL_TREE;
4065 525 : any_changes = true;
4066 : }
4067 :
4068 44433 : delete map;
4069 44433 : return any_changes;
4070 : }
4071 :
4072 : /* Optimize range tests, similarly how fold_range_test optimizes
4073 : it on trees. The tree code for the binary
4074 : operation between all the operands is OPCODE.
4075 : If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4076 : maybe_optimize_range_tests for inter-bb range optimization.
4077 : In that case if oe->op is NULL, oe->id is bb->index whose
4078 : GIMPLE_COND is && or ||ed into the test, and oe->rank says
4079 : the actual opcode.
4080 : FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4081 :
4082 : static bool
4083 1046674 : optimize_range_tests (enum tree_code opcode,
4084 : vec<operand_entry *> *ops, basic_block first_bb)
4085 : {
4086 1046674 : unsigned int length = ops->length (), i, j, first;
4087 1046674 : operand_entry *oe;
4088 1046674 : struct range_entry *ranges;
4089 2093213 : bool any_changes = false;
4090 :
4091 1046674 : if (length == 1)
4092 : return false;
4093 :
4094 1046539 : ranges = XNEWVEC (struct range_entry, length);
4095 4313599 : for (i = 0; i < length; i++)
4096 : {
4097 2220521 : oe = (*ops)[i];
4098 2220521 : ranges[i].idx = i;
4099 2220521 : init_range_entry (ranges + i, oe->op,
4100 2220521 : oe->op
4101 : ? NULL
4102 266265 : : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
4103 : /* For | invert it now, we will invert it again before emitting
4104 : the optimized expression. */
4105 2220521 : if (opcode == BIT_IOR_EXPR
4106 1540628 : || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4107 875318 : ranges[i].in_p = !ranges[i].in_p;
4108 : }
4109 :
4110 1046539 : qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
4111 3526133 : for (i = 0; i < length; i++)
4112 1793932 : if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
4113 : break;
4114 :
4115 : /* Try to merge ranges. */
4116 1820273 : for (first = i; i < length; i++)
4117 : {
4118 773734 : tree low = ranges[i].low;
4119 773734 : tree high = ranges[i].high;
4120 773734 : int in_p = ranges[i].in_p;
4121 773734 : bool strict_overflow_p = ranges[i].strict_overflow_p;
4122 773734 : int update_fail_count = 0;
4123 :
4124 787466 : for (j = i + 1; j < length; j++)
4125 : {
4126 426589 : if (ranges[i].exp != ranges[j].exp)
4127 : break;
4128 34370 : if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
4129 34370 : ranges[j].in_p, ranges[j].low, ranges[j].high))
4130 : break;
4131 13732 : strict_overflow_p |= ranges[j].strict_overflow_p;
4132 : }
4133 :
4134 773734 : if (j == i + 1)
4135 760567 : continue;
4136 :
4137 13167 : if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4138 : opcode, ops, ranges[i].exp, NULL, in_p,
4139 : low, high, strict_overflow_p))
4140 : {
4141 13167 : i = j - 1;
4142 13167 : any_changes = true;
4143 : }
4144 : /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4145 : while update_range_test would fail. */
4146 : else if (update_fail_count == 64)
4147 : i = j - 1;
4148 : else
4149 13167 : ++update_fail_count;
4150 : }
4151 :
4152 1046539 : any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4153 : ops, ranges);
4154 :
4155 1046539 : if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4156 1046522 : any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4157 : ops, ranges);
4158 1046539 : if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4159 1046539 : any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4160 : ops, ranges);
4161 1046539 : any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4162 : ranges, first_bb);
4163 1046539 : any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4164 : ops, ranges);
4165 :
4166 1046539 : if (any_changes && opcode != ERROR_MARK)
4167 : {
4168 : j = 0;
4169 35563 : FOR_EACH_VEC_ELT (*ops, i, oe)
4170 : {
4171 24756 : if (oe->op == error_mark_node)
4172 12103 : continue;
4173 12653 : else if (i != j)
4174 5160 : (*ops)[j] = oe;
4175 12653 : j++;
4176 : }
4177 10807 : ops->truncate (j);
4178 : }
4179 :
4180 1046539 : XDELETEVEC (ranges);
4181 1046539 : return any_changes;
4182 : }
4183 :
4184 : /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4185 : the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4186 : otherwise the comparison code. TYPE is a return value that is set
4187 : to type of comparison. */
4188 :
4189 : static tree_code
4190 51043 : ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4191 : tree *lhs, tree *rhs, gassign **vcond)
4192 : {
4193 51043 : if (TREE_CODE (var) != SSA_NAME)
4194 : return ERROR_MARK;
4195 :
4196 45716 : gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4197 32333 : if (stmt == NULL)
4198 : return ERROR_MARK;
4199 32333 : if (vcond)
4200 32333 : *vcond = stmt;
4201 :
4202 : /* ??? If we start creating more COND_EXPR, we could perform
4203 : this same optimization with them. For now, simplify. */
4204 43175 : if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4205 : return ERROR_MARK;
4206 :
4207 1241 : tree cond = gimple_assign_rhs1 (stmt);
4208 1241 : tree_code cmp = TREE_CODE (cond);
4209 1241 : if (cmp != SSA_NAME)
4210 : return ERROR_MARK;
4211 :
4212 52240 : gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4213 1215 : if (assign == NULL
4214 1215 : || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4215 : return ERROR_MARK;
4216 :
4217 1105 : cmp = gimple_assign_rhs_code (assign);
4218 1105 : if (lhs)
4219 1105 : *lhs = gimple_assign_rhs1 (assign);
4220 1105 : if (rhs)
4221 2210 : *rhs = gimple_assign_rhs2 (assign);
4222 :
4223 : /* ??? For now, allow only canonical true and false result vectors.
4224 : We could expand this to other constants should the need arise,
4225 : but at the moment we don't create them. */
4226 1105 : tree t = gimple_assign_rhs2 (stmt);
4227 1105 : tree f = gimple_assign_rhs3 (stmt);
4228 1105 : bool inv;
4229 1105 : if (integer_all_onesp (t))
4230 : inv = false;
4231 1069 : else if (integer_all_onesp (f))
4232 : {
4233 1 : cmp = invert_tree_comparison (cmp, false);
4234 1 : inv = true;
4235 : }
4236 : else
4237 : return ERROR_MARK;
4238 37 : if (!integer_zerop (f))
4239 : return ERROR_MARK;
4240 :
4241 : /* Success! */
4242 18 : if (rets)
4243 18 : *rets = assign;
4244 18 : if (reti)
4245 18 : *reti = inv;
4246 18 : if (type)
4247 18 : *type = TREE_TYPE (cond);
4248 : return cmp;
4249 : }
4250 :
4251 : /* Optimize the condition of VEC_COND_EXPRs which have been combined
4252 : with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4253 :
4254 : static bool
4255 23928 : optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4256 : {
4257 23928 : unsigned int length = ops->length (), i, j;
4258 23928 : bool any_changes = false;
4259 :
4260 23928 : if (length == 1)
4261 : return false;
4262 :
4263 74950 : for (i = 0; i < length; ++i)
4264 : {
4265 51025 : tree elt0 = (*ops)[i]->op;
4266 :
4267 51025 : gassign *stmt0, *vcond0;
4268 51025 : bool invert;
4269 51025 : tree type, lhs0, rhs0;
4270 51025 : tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4271 : &rhs0, &vcond0);
4272 51025 : if (cmp0 == ERROR_MARK)
4273 51007 : continue;
4274 :
4275 36 : for (j = i + 1; j < length; ++j)
4276 : {
4277 18 : tree &elt1 = (*ops)[j]->op;
4278 :
4279 18 : gassign *stmt1, *vcond1;
4280 18 : tree lhs1, rhs1;
4281 18 : tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4282 : &rhs1, &vcond1);
4283 18 : if (cmp1 == ERROR_MARK)
4284 18 : continue;
4285 :
4286 0 : tree comb;
4287 0 : if (opcode == BIT_AND_EXPR)
4288 0 : comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4289 : cmp1, lhs1, rhs1);
4290 0 : else if (opcode == BIT_IOR_EXPR)
4291 0 : comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4292 : cmp1, lhs1, rhs1);
4293 : else
4294 0 : gcc_unreachable ();
4295 0 : if (comb == NULL)
4296 0 : continue;
4297 :
4298 : /* Success! */
4299 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4300 : {
4301 0 : fprintf (dump_file, "Transforming ");
4302 0 : print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4303 0 : fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4304 0 : print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4305 0 : fprintf (dump_file, " into ");
4306 0 : print_generic_expr (dump_file, comb);
4307 0 : fputc ('\n', dump_file);
4308 : }
4309 :
4310 0 : gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4311 0 : tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4312 : true, GSI_SAME_STMT);
4313 0 : if (invert)
4314 0 : swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4315 : gimple_assign_rhs3_ptr (vcond0));
4316 0 : gimple_assign_set_rhs1 (vcond0, exp);
4317 0 : update_stmt (vcond0);
4318 :
4319 0 : elt1 = error_mark_node;
4320 0 : any_changes = true;
4321 : }
4322 : }
4323 :
4324 23925 : if (any_changes)
4325 : {
4326 : operand_entry *oe;
4327 : j = 0;
4328 0 : FOR_EACH_VEC_ELT (*ops, i, oe)
4329 : {
4330 0 : if (oe->op == error_mark_node)
4331 0 : continue;
4332 0 : else if (i != j)
4333 0 : (*ops)[j] = oe;
4334 0 : j++;
4335 : }
4336 0 : ops->truncate (j);
4337 : }
4338 :
4339 : return any_changes;
4340 : }
4341 :
4342 : /* Return true if STMT is a cast like:
4343 : <bb N>:
4344 : ...
4345 : _123 = (int) _234;
4346 :
4347 : <bb M>:
4348 : # _345 = PHI <_123(N), 1(...), 1(...)>
4349 : where _234 has bool type, _123 has single use and
4350 : bb N has a single successor M. This is commonly used in
4351 : the last block of a range test.
4352 :
4353 : Also Return true if STMT is tcc_compare like:
4354 : <bb N>:
4355 : ...
4356 : _234 = a_2(D) == 2;
4357 :
4358 : <bb M>:
4359 : # _345 = PHI <_234(N), 1(...), 1(...)>
4360 : _346 = (int) _345;
4361 : where _234 has booltype, single use and
4362 : bb N has a single successor M. This is commonly used in
4363 : the last block of a range test. */
4364 :
4365 : static bool
4366 15460615 : final_range_test_p (gimple *stmt)
4367 : {
4368 15460615 : basic_block bb, rhs_bb, lhs_bb;
4369 15460615 : edge e;
4370 15460615 : tree lhs, rhs;
4371 15460615 : use_operand_p use_p;
4372 15460615 : gimple *use_stmt;
4373 :
4374 15460615 : if (!gimple_assign_cast_p (stmt)
4375 15460615 : && (!is_gimple_assign (stmt)
4376 4852477 : || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4377 : != tcc_comparison)))
4378 : return false;
4379 563658 : bb = gimple_bb (stmt);
4380 15819745 : if (!single_succ_p (bb))
4381 : return false;
4382 563375 : e = single_succ_edge (bb);
4383 563375 : if (e->flags & EDGE_COMPLEX)
4384 : return false;
4385 :
4386 563375 : lhs = gimple_assign_lhs (stmt);
4387 563375 : rhs = gimple_assign_rhs1 (stmt);
4388 563375 : if (gimple_assign_cast_p (stmt)
4389 563375 : && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4390 392081 : || TREE_CODE (rhs) != SSA_NAME
4391 375053 : || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4392 : return false;
4393 :
4394 211555 : if (!gimple_assign_cast_p (stmt)
4395 211555 : && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4396 : return false;
4397 :
4398 : /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4399 211381 : if (!single_imm_use (lhs, &use_p, &use_stmt))
4400 : return false;
4401 :
4402 205876 : if (gimple_code (use_stmt) != GIMPLE_PHI
4403 205876 : || gimple_bb (use_stmt) != e->dest)
4404 : return false;
4405 :
4406 : /* And that the rhs is defined in the same loop. */
4407 204288 : if (gimple_assign_cast_p (stmt))
4408 : {
4409 72589 : if (TREE_CODE (rhs) != SSA_NAME
4410 72589 : || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4411 145172 : || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4412 43 : return false;
4413 : }
4414 : else
4415 : {
4416 131699 : if (TREE_CODE (lhs) != SSA_NAME
4417 131699 : || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4418 263398 : || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4419 0 : return false;
4420 : }
4421 :
4422 : return true;
4423 : }
4424 :
4425 : /* Return true if BB is suitable basic block for inter-bb range test
4426 : optimization. If BACKWARD is true, BB should be the only predecessor
4427 : of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4428 : or compared with to find a common basic block to which all conditions
4429 : branch to if true resp. false. If BACKWARD is false, TEST_BB should
4430 : be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4431 : TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4432 : block points to an empty block that falls through into *OTHER_BB and
4433 : the phi args match that path. */
4434 :
4435 : static bool
4436 11366347 : suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4437 : bool *test_swapped_p, bool backward)
4438 : {
4439 11366347 : edge_iterator ei, ei2;
4440 11366347 : edge e, e2;
4441 11366347 : gimple *stmt;
4442 11366347 : gphi_iterator gsi;
4443 11366347 : bool other_edge_seen = false;
4444 11366347 : bool is_cond;
4445 :
4446 11366347 : if (test_bb == bb)
4447 : return false;
4448 : /* Check last stmt first. */
4449 11366347 : stmt = last_nondebug_stmt (bb);
4450 11366347 : if (stmt == NULL
4451 10503452 : || (gimple_code (stmt) != GIMPLE_COND
4452 519553 : && (backward || !final_range_test_p (stmt)))
4453 10024072 : || gimple_visited_p (stmt)
4454 9964304 : || stmt_could_throw_p (cfun, stmt)
4455 21330525 : || *other_bb == bb)
4456 1402172 : return false;
4457 9964175 : is_cond = gimple_code (stmt) == GIMPLE_COND;
4458 9964175 : if (is_cond)
4459 : {
4460 : /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4461 : goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4462 : to *OTHER_BB (if not set yet, try to find it out). */
4463 18926142 : if (EDGE_COUNT (bb->succs) != 2)
4464 : return false;
4465 19559545 : FOR_EACH_EDGE (e, ei, bb->succs)
4466 : {
4467 16142829 : if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4468 : return false;
4469 16142829 : if (e->dest == test_bb)
4470 : {
4471 5360345 : if (backward)
4472 5358061 : continue;
4473 : else
4474 : return false;
4475 : }
4476 10782484 : if (e->dest == bb)
4477 : return false;
4478 10628263 : if (*other_bb == NULL)
4479 : {
4480 26060820 : FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4481 17373880 : if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4482 : return false;
4483 17373880 : else if (e->dest == e2->dest)
4484 2376123 : *other_bb = e->dest;
4485 8686940 : if (*other_bb == NULL)
4486 : return false;
4487 : }
4488 4317446 : if (e->dest == *other_bb)
4489 : other_edge_seen = true;
4490 939948 : else if (backward)
4491 : return false;
4492 : }
4493 3416716 : if (*other_bb == NULL || !other_edge_seen)
4494 : return false;
4495 : }
4496 40047 : else if (single_succ (bb) != *other_bb)
4497 : return false;
4498 :
4499 : /* Now check all PHIs of *OTHER_BB. */
4500 3416829 : e = find_edge (bb, *other_bb);
4501 3416829 : e2 = find_edge (test_bb, *other_bb);
4502 3423185 : retry:;
4503 4988519 : for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4504 : {
4505 2624186 : gphi *phi = gsi.phi ();
4506 : /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4507 : corresponding to BB and TEST_BB predecessor must be the same. */
4508 2624186 : if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4509 2624186 : gimple_phi_arg_def (phi, e2->dest_idx), 0))
4510 : {
4511 : /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4512 : one of the PHIs should have the lhs of the last stmt in
4513 : that block as PHI arg and that PHI should have 0 or 1
4514 : corresponding to it in all other range test basic blocks
4515 : considered. */
4516 1128208 : if (!is_cond)
4517 : {
4518 42006 : if (gimple_phi_arg_def (phi, e->dest_idx)
4519 42006 : == gimple_assign_lhs (stmt)
4520 42006 : && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4521 18259 : || integer_onep (gimple_phi_arg_def (phi,
4522 18259 : e2->dest_idx))))
4523 37540 : continue;
4524 : }
4525 : else
4526 : {
4527 1086202 : gimple *test_last = last_nondebug_stmt (test_bb);
4528 1086202 : if (gimple_code (test_last) == GIMPLE_COND)
4529 : {
4530 1049963 : if (backward ? e2->src != test_bb : e->src != bb)
4531 : return false;
4532 :
4533 : /* For last_bb, handle also:
4534 : if (x_3(D) == 3)
4535 : goto <bb 6>; [34.00%]
4536 : else
4537 : goto <bb 7>; [66.00%]
4538 :
4539 : <bb 6> [local count: 79512730]:
4540 :
4541 : <bb 7> [local count: 1073741824]:
4542 : # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4543 : where bb 7 is *OTHER_BB, but the PHI values from the
4544 : earlier bbs match the path through the empty bb
4545 : in between. */
4546 1045551 : edge e3;
4547 1045551 : if (backward)
4548 1384585 : e3 = EDGE_SUCC (test_bb,
4549 : e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4550 : else
4551 21032 : e3 = EDGE_SUCC (bb,
4552 : e == EDGE_SUCC (bb, 0) ? 1 : 0);
4553 1045551 : if (empty_block_p (e3->dest)
4554 34697 : && single_succ_p (e3->dest)
4555 34697 : && single_succ (e3->dest) == *other_bb
4556 1081695 : && single_pred_p (e3->dest)
4557 1079162 : && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4558 : {
4559 6356 : if (backward)
4560 5695 : e2 = single_succ_edge (e3->dest);
4561 : else
4562 661 : e = single_succ_edge (e3->dest);
4563 6356 : if (test_swapped_p)
4564 293 : *test_swapped_p = true;
4565 6356 : goto retry;
4566 : }
4567 : }
4568 36239 : else if (gimple_phi_arg_def (phi, e2->dest_idx)
4569 36239 : == gimple_assign_lhs (test_last)
4570 69109 : && (integer_zerop (gimple_phi_arg_def (phi,
4571 32870 : e->dest_idx))
4572 15068 : || integer_onep (gimple_phi_arg_def (phi,
4573 15068 : e->dest_idx))))
4574 31816 : continue;
4575 : }
4576 :
4577 1048084 : return false;
4578 : }
4579 : }
4580 : return true;
4581 : }
4582 :
4583 : /* Return true if BB doesn't have side-effects that would disallow
4584 : range test optimization, all SSA_NAMEs set in the bb are consumed
4585 : in the bb and there are no PHIs. */
4586 :
4587 : bool
4588 5323865 : no_side_effect_bb (basic_block bb)
4589 : {
4590 5323865 : gimple_stmt_iterator gsi;
4591 5323865 : gimple *last;
4592 :
4593 5323865 : if (!gimple_seq_empty_p (phi_nodes (bb)))
4594 : return false;
4595 4172365 : last = last_nondebug_stmt (bb);
4596 14233899 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4597 : {
4598 10061534 : gimple *stmt = gsi_stmt (gsi);
4599 10061534 : tree lhs;
4600 10061534 : imm_use_iterator imm_iter;
4601 10061534 : use_operand_p use_p;
4602 :
4603 10061534 : if (is_gimple_debug (stmt))
4604 4189377 : continue;
4605 5872157 : if (gimple_has_side_effects (stmt))
4606 4172365 : return false;
4607 4969837 : if (stmt == last)
4608 : return true;
4609 3984709 : if (!is_gimple_assign (stmt))
4610 : return false;
4611 3315429 : lhs = gimple_assign_lhs (stmt);
4612 3315429 : if (TREE_CODE (lhs) != SSA_NAME)
4613 : return false;
4614 3085293 : if (gimple_assign_rhs_could_trap_p (stmt))
4615 : return false;
4616 6305467 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4617 : {
4618 2438552 : gimple *use_stmt = USE_STMT (use_p);
4619 2438552 : if (is_gimple_debug (use_stmt))
4620 170654 : continue;
4621 2267898 : if (gimple_bb (use_stmt) != bb)
4622 467331 : return false;
4623 2167123 : }
4624 : }
4625 : return false;
4626 : }
4627 :
4628 : /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4629 : return true and fill in *OPS recursively. */
4630 :
4631 : static bool
4632 102684 : get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4633 : class loop *loop)
4634 : {
4635 102684 : gimple *stmt = SSA_NAME_DEF_STMT (var);
4636 102684 : tree rhs[2];
4637 102684 : int i;
4638 :
4639 102684 : if (!is_reassociable_op (stmt, code, loop))
4640 : return false;
4641 :
4642 23972 : rhs[0] = gimple_assign_rhs1 (stmt);
4643 23972 : rhs[1] = gimple_assign_rhs2 (stmt);
4644 23972 : gimple_set_visited (stmt, true);
4645 71916 : for (i = 0; i < 2; i++)
4646 47944 : if (TREE_CODE (rhs[i]) == SSA_NAME
4647 47944 : && !get_ops (rhs[i], code, ops, loop)
4648 87317 : && has_single_use (rhs[i]))
4649 : {
4650 38657 : operand_entry *oe = operand_entry_pool.allocate ();
4651 :
4652 38657 : oe->op = rhs[i];
4653 38657 : oe->rank = code;
4654 38657 : oe->id = 0;
4655 38657 : oe->count = 1;
4656 38657 : oe->stmt_to_insert = NULL;
4657 38657 : ops->safe_push (oe);
4658 : }
4659 : return true;
4660 : }
4661 :
4662 : /* Find the ops that were added by get_ops starting from VAR, see if
4663 : they were changed during update_range_test and if yes, create new
4664 : stmts. */
4665 :
4666 : static tree
4667 10235 : update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4668 : unsigned int *pidx, class loop *loop)
4669 : {
4670 10235 : gimple *stmt = SSA_NAME_DEF_STMT (var);
4671 10235 : tree rhs[4];
4672 10235 : int i;
4673 :
4674 10235 : if (!is_reassociable_op (stmt, code, loop))
4675 : return NULL;
4676 :
4677 3388 : rhs[0] = gimple_assign_rhs1 (stmt);
4678 3388 : rhs[1] = gimple_assign_rhs2 (stmt);
4679 3388 : rhs[2] = rhs[0];
4680 3388 : rhs[3] = rhs[1];
4681 10164 : for (i = 0; i < 2; i++)
4682 6776 : if (TREE_CODE (rhs[i]) == SSA_NAME)
4683 : {
4684 6776 : rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4685 6776 : if (rhs[2 + i] == NULL_TREE)
4686 : {
4687 6506 : if (has_single_use (rhs[i]))
4688 6480 : rhs[2 + i] = ops[(*pidx)++]->op;
4689 : else
4690 26 : rhs[2 + i] = rhs[i];
4691 : }
4692 : }
4693 3388 : if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4694 3077 : && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4695 : {
4696 3077 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4697 3077 : var = make_ssa_name (TREE_TYPE (var));
4698 3077 : gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4699 : rhs[2], rhs[3]);
4700 3077 : gimple_set_uid (g, gimple_uid (stmt));
4701 3077 : gimple_set_visited (g, true);
4702 3077 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4703 3077 : gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4704 3077 : if (fold_stmt_inplace (&gsi2))
4705 2025 : update_stmt (g);
4706 : }
4707 : return var;
4708 : }
4709 :
4710 : /* Structure to track the initial value passed to get_ops and
4711 : the range in the ops vector for each basic block. */
4712 :
4713 : struct inter_bb_range_test_entry
4714 : {
4715 : tree op;
4716 : unsigned int first_idx, last_idx;
4717 : };
4718 :
4719 : /* Inter-bb range test optimization.
4720 :
4721 : Returns TRUE if a gimple conditional is optimized to a true/false,
4722 : otherwise return FALSE.
4723 :
4724 : This indicates to the caller that it should run a CFG cleanup pass
4725 : once reassociation is completed. */
4726 :
4727 : static bool
4728 18801414 : maybe_optimize_range_tests (gimple *stmt)
4729 : {
4730 18801414 : basic_block first_bb = gimple_bb (stmt);
4731 18801414 : basic_block last_bb = first_bb;
4732 18801414 : basic_block other_bb = NULL;
4733 18801414 : basic_block bb;
4734 18801414 : edge_iterator ei;
4735 18801414 : edge e;
4736 18801414 : auto_vec<operand_entry *> ops;
4737 18801414 : auto_vec<inter_bb_range_test_entry> bbinfo;
4738 18801414 : bool any_changes = false;
4739 18801414 : bool cfg_cleanup_needed = false;
4740 :
4741 : /* Consider only basic blocks that end with GIMPLE_COND or
4742 : a cast statement satisfying final_range_test_p. All
4743 : but the last bb in the first_bb .. last_bb range
4744 : should end with GIMPLE_COND. */
4745 18801414 : if (gimple_code (stmt) == GIMPLE_COND)
4746 : {
4747 27496669 : if (EDGE_COUNT (first_bb->succs) != 2)
4748 : return cfg_cleanup_needed;
4749 : }
4750 10092025 : else if (final_range_test_p (stmt))
4751 85823 : other_bb = single_succ (first_bb);
4752 : else
4753 : return cfg_cleanup_needed;
4754 :
4755 8795212 : if (stmt_could_throw_p (cfun, stmt))
4756 : return cfg_cleanup_needed;
4757 :
4758 : /* As relative ordering of post-dominator sons isn't fixed,
4759 : maybe_optimize_range_tests can be called first on any
4760 : bb in the range we want to optimize. So, start searching
4761 : backwards, if first_bb can be set to a predecessor. */
4762 8797343 : while (single_pred_p (first_bb))
4763 : {
4764 5817798 : basic_block pred_bb = single_pred (first_bb);
4765 5817798 : if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4766 : break;
4767 631609 : if (!no_side_effect_bb (first_bb))
4768 : break;
4769 : first_bb = pred_bb;
4770 : }
4771 : /* If first_bb is last_bb, other_bb hasn't been computed yet.
4772 : Before starting forward search in last_bb successors, find
4773 : out the other_bb. */
4774 8795038 : if (first_bb == last_bb)
4775 : {
4776 8792947 : other_bb = NULL;
4777 : /* As non-GIMPLE_COND last stmt always terminates the range,
4778 : if forward search didn't discover anything, just give up. */
4779 8792947 : if (gimple_code (stmt) != GIMPLE_COND)
4780 : return cfg_cleanup_needed;
4781 : /* Look at both successors. Either it ends with a GIMPLE_COND
4782 : and satisfies suitable_cond_bb, or ends with a cast and
4783 : other_bb is that cast's successor. */
4784 24224602 : FOR_EACH_EDGE (e, ei, first_bb->succs)
4785 16776935 : if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4786 16776935 : || e->dest == first_bb)
4787 : return cfg_cleanup_needed;
4788 25433182 : else if (single_pred_p (e->dest))
4789 : {
4790 9915943 : stmt = last_nondebug_stmt (e->dest);
4791 9915943 : if (stmt
4792 9735122 : && gimple_code (stmt) == GIMPLE_COND
4793 14367515 : && EDGE_COUNT (e->dest->succs) == 2)
4794 : {
4795 4451572 : if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4796 : NULL, true))
4797 : break;
4798 : else
4799 3706969 : other_bb = NULL;
4800 : }
4801 5464371 : else if (stmt
4802 5283550 : && final_range_test_p (stmt)
4803 5542620 : && find_edge (first_bb, single_succ (e->dest)))
4804 : {
4805 38557 : other_bb = single_succ (e->dest);
4806 38557 : if (other_bb == first_bb)
4807 0 : other_bb = NULL;
4808 : }
4809 : }
4810 8192270 : if (other_bb == NULL)
4811 : return cfg_cleanup_needed;
4812 : }
4813 : /* Now do the forward search, moving last_bb to successor bbs
4814 : that aren't other_bb. */
4815 1773158 : while (EDGE_COUNT (last_bb->succs) == 2)
4816 : {
4817 1663196 : FOR_EACH_EDGE (e, ei, last_bb->succs)
4818 1663196 : if (e->dest != other_bb)
4819 : break;
4820 987907 : if (e == NULL)
4821 : break;
4822 987907 : if (!single_pred_p (e->dest))
4823 : break;
4824 950897 : if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4825 : break;
4826 842041 : if (!no_side_effect_bb (e->dest))
4827 : break;
4828 207829 : last_bb = e->dest;
4829 : }
4830 785251 : if (first_bb == last_bb)
4831 : return cfg_cleanup_needed;
4832 : /* Here basic blocks first_bb through last_bb's predecessor
4833 : end with GIMPLE_COND, all of them have one of the edges to
4834 : other_bb and another to another block in the range,
4835 : all blocks except first_bb don't have side-effects and
4836 : last_bb ends with either GIMPLE_COND, or cast satisfying
4837 : final_range_test_p. */
4838 210134 : for (bb = last_bb; ; bb = single_pred (bb))
4839 : {
4840 361387 : enum tree_code code;
4841 361387 : tree lhs, rhs;
4842 361387 : inter_bb_range_test_entry bb_ent;
4843 :
4844 361387 : bb_ent.op = NULL_TREE;
4845 361387 : bb_ent.first_idx = ops.length ();
4846 361387 : bb_ent.last_idx = bb_ent.first_idx;
4847 361387 : e = find_edge (bb, other_bb);
4848 361387 : stmt = last_nondebug_stmt (bb);
4849 361387 : gimple_set_visited (stmt, true);
4850 361387 : if (gimple_code (stmt) != GIMPLE_COND)
4851 : {
4852 5173 : use_operand_p use_p;
4853 5173 : gimple *phi;
4854 5173 : edge e2;
4855 5173 : unsigned int d;
4856 :
4857 5173 : lhs = gimple_assign_lhs (stmt);
4858 5173 : rhs = gimple_assign_rhs1 (stmt);
4859 5173 : gcc_assert (bb == last_bb);
4860 :
4861 : /* stmt is
4862 : _123 = (int) _234;
4863 : OR
4864 : _234 = a_2(D) == 2;
4865 :
4866 : followed by:
4867 : <bb M>:
4868 : # _345 = PHI <_123(N), 1(...), 1(...)>
4869 :
4870 : or 0 instead of 1. If it is 0, the _234
4871 : range test is anded together with all the
4872 : other range tests, if it is 1, it is ored with
4873 : them. */
4874 5173 : single_imm_use (lhs, &use_p, &phi);
4875 5173 : gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4876 5173 : e2 = find_edge (first_bb, other_bb);
4877 5173 : d = e2->dest_idx;
4878 5173 : gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4879 5173 : if (integer_zerop (gimple_phi_arg_def (phi, d)))
4880 : code = BIT_AND_EXPR;
4881 : else
4882 : {
4883 2765 : gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4884 : code = BIT_IOR_EXPR;
4885 : }
4886 :
4887 : /* If _234 SSA_NAME_DEF_STMT is
4888 : _234 = _567 | _789;
4889 : (or &, corresponding to 1/0 in the phi arguments,
4890 : push into ops the individual range test arguments
4891 : of the bitwise or resp. and, recursively. */
4892 5173 : if (TREE_CODE (rhs) == SSA_NAME
4893 5173 : && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4894 : != tcc_comparison)
4895 2740 : && !get_ops (rhs, code, &ops,
4896 : loop_containing_stmt (stmt))
4897 7706 : && has_single_use (rhs))
4898 : {
4899 : /* Otherwise, push the _234 range test itself. */
4900 2523 : operand_entry *oe = operand_entry_pool.allocate ();
4901 :
4902 2523 : oe->op = rhs;
4903 2523 : oe->rank = code;
4904 2523 : oe->id = 0;
4905 2523 : oe->count = 1;
4906 2523 : oe->stmt_to_insert = NULL;
4907 2523 : ops.safe_push (oe);
4908 2523 : bb_ent.last_idx++;
4909 2523 : bb_ent.op = rhs;
4910 : }
4911 2650 : else if (is_gimple_assign (stmt)
4912 2650 : && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4913 : == tcc_comparison)
4914 2433 : && !get_ops (lhs, code, &ops,
4915 : loop_containing_stmt (stmt))
4916 5083 : && has_single_use (lhs))
4917 : {
4918 2433 : operand_entry *oe = operand_entry_pool.allocate ();
4919 2433 : oe->op = lhs;
4920 2433 : oe->rank = code;
4921 2433 : oe->id = 0;
4922 2433 : oe->count = 1;
4923 2433 : ops.safe_push (oe);
4924 2433 : bb_ent.last_idx++;
4925 2433 : bb_ent.op = lhs;
4926 : }
4927 : else
4928 : {
4929 217 : bb_ent.last_idx = ops.length ();
4930 217 : bb_ent.op = rhs;
4931 : }
4932 5173 : bbinfo.safe_push (bb_ent);
4933 10605 : for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4934 5432 : ops[i]->id = bb->index;
4935 5173 : continue;
4936 5173 : }
4937 356214 : else if (bb == last_bb)
4938 : {
4939 : /* For last_bb, handle also:
4940 : if (x_3(D) == 3)
4941 : goto <bb 6>; [34.00%]
4942 : else
4943 : goto <bb 7>; [66.00%]
4944 :
4945 : <bb 6> [local count: 79512730]:
4946 :
4947 : <bb 7> [local count: 1073741824]:
4948 : # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4949 : where bb 7 is OTHER_BB, but the PHI values from the
4950 : earlier bbs match the path through the empty bb
4951 : in between. */
4952 146080 : bool test_swapped_p = false;
4953 146080 : bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4954 : &other_bb, &test_swapped_p, true);
4955 146080 : gcc_assert (ok);
4956 146080 : if (test_swapped_p)
4957 503 : e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4958 : }
4959 : /* Otherwise stmt is GIMPLE_COND. */
4960 356214 : code = gimple_cond_code (stmt);
4961 356214 : lhs = gimple_cond_lhs (stmt);
4962 356214 : rhs = gimple_cond_rhs (stmt);
4963 356214 : if (TREE_CODE (lhs) == SSA_NAME
4964 356122 : && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4965 653222 : && ((code != EQ_EXPR && code != NE_EXPR)
4966 223342 : || rhs != boolean_false_node
4967 : /* Either push into ops the individual bitwise
4968 : or resp. and operands, depending on which
4969 : edge is other_bb. */
4970 49567 : || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4971 49567 : ^ (code == EQ_EXPR))
4972 : ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4973 : loop_containing_stmt (stmt))))
4974 : {
4975 : /* Or push the GIMPLE_COND stmt itself. */
4976 281814 : operand_entry *oe = operand_entry_pool.allocate ();
4977 :
4978 281814 : oe->op = NULL;
4979 563628 : oe->rank = (e->flags & EDGE_TRUE_VALUE)
4980 281814 : ? BIT_IOR_EXPR : BIT_AND_EXPR;
4981 : /* oe->op = NULL signs that there is no SSA_NAME
4982 : for the range test, and oe->id instead is the
4983 : basic block number, at which's end the GIMPLE_COND
4984 : is. */
4985 281814 : oe->id = bb->index;
4986 281814 : oe->count = 1;
4987 281814 : oe->stmt_to_insert = NULL;
4988 281814 : ops.safe_push (oe);
4989 281814 : bb_ent.op = NULL;
4990 281814 : bb_ent.last_idx++;
4991 : }
4992 74400 : else if (ops.length () > bb_ent.first_idx)
4993 : {
4994 15122 : bb_ent.op = lhs;
4995 15122 : bb_ent.last_idx = ops.length ();
4996 : }
4997 356214 : bbinfo.safe_push (bb_ent);
4998 676209 : for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4999 319995 : ops[i]->id = bb->index;
5000 356214 : if (bb == first_bb)
5001 : break;
5002 210134 : }
5003 18952667 : if (ops.length () > 1)
5004 119332 : any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
5005 119332 : if (any_changes)
5006 : {
5007 : unsigned int idx, max_idx = 0;
5008 : /* update_ops relies on has_single_use predicates returning the
5009 : same values as it did during get_ops earlier. Additionally it
5010 : never removes statements, only adds new ones and it should walk
5011 : from the single imm use and check the predicate already before
5012 : making those changes.
5013 : On the other side, the handling of GIMPLE_COND directly can turn
5014 : previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5015 : it needs to be done in a separate loop afterwards. */
5016 21539 : for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5017 : {
5018 35673 : if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5019 35673 : && bbinfo[idx].op != NULL_TREE)
5020 : {
5021 3459 : tree new_op;
5022 :
5023 3459 : max_idx = idx;
5024 3459 : stmt = last_nondebug_stmt (bb);
5025 6918 : new_op = update_ops (bbinfo[idx].op,
5026 : (enum tree_code)
5027 3459 : ops[bbinfo[idx].first_idx]->rank,
5028 3459 : ops, &bbinfo[idx].first_idx,
5029 : loop_containing_stmt (stmt));
5030 3459 : if (new_op == NULL_TREE)
5031 : {
5032 341 : gcc_assert (bb == last_bb);
5033 341 : new_op = ops[bbinfo[idx].first_idx++]->op;
5034 : }
5035 3459 : if (bbinfo[idx].op != new_op)
5036 : {
5037 3184 : imm_use_iterator iter;
5038 3184 : use_operand_p use_p;
5039 3184 : gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
5040 :
5041 9562 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
5042 3194 : if (is_gimple_debug (use_stmt))
5043 10 : continue;
5044 3184 : else if (gimple_code (use_stmt) == GIMPLE_COND
5045 3184 : || gimple_code (use_stmt) == GIMPLE_PHI)
5046 8637 : FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5047 2879 : SET_USE (use_p, new_op);
5048 305 : else if ((is_gimple_assign (use_stmt)
5049 305 : && (TREE_CODE_CLASS
5050 : (gimple_assign_rhs_code (use_stmt))
5051 : == tcc_comparison)))
5052 : cast_or_tcc_cmp_stmt = use_stmt;
5053 305 : else if (gimple_assign_cast_p (use_stmt))
5054 : cast_or_tcc_cmp_stmt = use_stmt;
5055 : else
5056 0 : gcc_unreachable ();
5057 :
5058 3184 : if (cast_or_tcc_cmp_stmt)
5059 : {
5060 305 : gcc_assert (bb == last_bb);
5061 305 : tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
5062 305 : tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
5063 305 : enum tree_code rhs_code
5064 305 : = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
5065 305 : ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
5066 : : CONVERT_EXPR;
5067 305 : gassign *g;
5068 305 : if (is_gimple_min_invariant (new_op))
5069 : {
5070 91 : new_op = fold_convert (TREE_TYPE (lhs), new_op);
5071 91 : g = gimple_build_assign (new_lhs, new_op);
5072 : }
5073 : else
5074 214 : g = gimple_build_assign (new_lhs, rhs_code, new_op);
5075 305 : gimple_stmt_iterator gsi
5076 305 : = gsi_for_stmt (cast_or_tcc_cmp_stmt);
5077 305 : gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
5078 305 : gimple_set_visited (g, true);
5079 305 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5080 927 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
5081 317 : if (is_gimple_debug (use_stmt))
5082 12 : continue;
5083 305 : else if (gimple_code (use_stmt) == GIMPLE_COND
5084 305 : || gimple_code (use_stmt) == GIMPLE_PHI)
5085 915 : FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5086 305 : SET_USE (use_p, new_lhs);
5087 : else
5088 305 : gcc_unreachable ();
5089 : }
5090 : }
5091 : }
5092 35673 : if (bb == first_bb)
5093 : break;
5094 21539 : }
5095 21539 : for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5096 : {
5097 35673 : if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5098 31873 : && bbinfo[idx].op == NULL_TREE
5099 67546 : && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
5100 : {
5101 55534 : gcond *cond_stmt = as_a <gcond *> (*gsi_last_bb (bb));
5102 :
5103 27767 : if (idx > max_idx)
5104 : max_idx = idx;
5105 :
5106 : /* If we collapse the conditional to a true/false
5107 : condition, then bubble that knowledge up to our caller. */
5108 27767 : if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
5109 : {
5110 11486 : gimple_cond_make_false (cond_stmt);
5111 11486 : cfg_cleanup_needed = true;
5112 : }
5113 16281 : else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
5114 : {
5115 3798 : gimple_cond_make_true (cond_stmt);
5116 3798 : cfg_cleanup_needed = true;
5117 : }
5118 : else
5119 : {
5120 12483 : gimple_cond_set_code (cond_stmt, NE_EXPR);
5121 12483 : gimple_cond_set_lhs (cond_stmt,
5122 12483 : ops[bbinfo[idx].first_idx]->op);
5123 12483 : gimple_cond_set_rhs (cond_stmt, boolean_false_node);
5124 : }
5125 27767 : update_stmt (cond_stmt);
5126 : }
5127 35673 : if (bb == first_bb)
5128 : break;
5129 21539 : }
5130 :
5131 : /* The above changes could result in basic blocks after the first
5132 : modified one, up to and including last_bb, to be executed even if
5133 : they would not be in the original program. If the value ranges of
5134 : assignment lhs' in those bbs were dependent on the conditions
5135 : guarding those basic blocks which now can change, the VRs might
5136 : be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5137 : are only used within the same bb, it should be not a big deal if
5138 : we just reset all the VRs in those bbs. See PR68671. */
5139 34427 : for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5140 20293 : reset_flow_sensitive_info_in_bb (bb);
5141 : }
5142 : return cfg_cleanup_needed;
5143 18801414 : }
5144 :
5145 : /* Remove def stmt of VAR if VAR has zero uses and recurse
5146 : on rhs1 operand if so. */
5147 :
5148 : static void
5149 70213 : remove_visited_stmt_chain (tree var)
5150 : {
5151 95031 : gimple *stmt;
5152 95031 : gimple_stmt_iterator gsi;
5153 :
5154 119849 : while (1)
5155 : {
5156 95031 : if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5157 : return;
5158 36311 : stmt = SSA_NAME_DEF_STMT (var);
5159 36311 : if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5160 : {
5161 24818 : var = gimple_assign_rhs1 (stmt);
5162 24818 : gsi = gsi_for_stmt (stmt);
5163 24818 : reassoc_remove_stmt (&gsi);
5164 24818 : release_defs (stmt);
5165 : }
5166 : else
5167 : return;
5168 : }
5169 : }
5170 :
5171 : /* This function checks three consequtive operands in
5172 : passed operands vector OPS starting from OPINDEX and
5173 : swaps two operands if it is profitable for binary operation
5174 : consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5175 :
5176 : We pair ops with the same rank if possible. */
5177 :
5178 : static void
5179 129985 : swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5180 : unsigned int opindex)
5181 : {
5182 129985 : operand_entry *oe1, *oe2, *oe3;
5183 :
5184 129985 : oe1 = ops[opindex];
5185 129985 : oe2 = ops[opindex + 1];
5186 129985 : oe3 = ops[opindex + 2];
5187 :
5188 129985 : if (oe1->rank == oe2->rank && oe2->rank != oe3->rank)
5189 16062 : std::swap (*oe1, *oe3);
5190 113923 : else if (oe1->rank == oe3->rank && oe2->rank != oe3->rank)
5191 427 : std::swap (*oe1, *oe2);
5192 129985 : }
5193 :
5194 : /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5195 : two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5196 : whether RHS1 op RHS2 can be inserted before or needs to be inserted
5197 : after the returned stmt. */
5198 :
5199 : static inline gimple *
5200 367001 : find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
5201 : {
5202 367001 : insert_before = true;
5203 367001 : if (TREE_CODE (rhs1) == SSA_NAME
5204 367001 : && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5205 : {
5206 10135 : stmt = SSA_NAME_DEF_STMT (rhs1);
5207 10135 : insert_before = false;
5208 : }
5209 367001 : if (TREE_CODE (rhs2) == SSA_NAME
5210 367001 : && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5211 : {
5212 24279 : stmt = SSA_NAME_DEF_STMT (rhs2);
5213 24279 : insert_before = false;
5214 : }
5215 367001 : return stmt;
5216 : }
5217 :
5218 : /* If the stmt that defines operand has to be inserted, insert it
5219 : before the use. */
5220 : static void
5221 85 : insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5222 : {
5223 85 : gcc_assert (is_gimple_assign (stmt_to_insert));
5224 85 : tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5225 85 : tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5226 85 : bool insert_before;
5227 85 : gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
5228 85 : gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5229 85 : gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5230 :
5231 : /* If the insert point is not stmt, then insert_point would be
5232 : the point where operand rhs1 or rhs2 is defined. In this case,
5233 : stmt_to_insert has to be inserted afterwards. This would
5234 : only happen when the stmt insertion point is flexible. */
5235 85 : if (insert_before)
5236 85 : gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5237 : else
5238 0 : insert_stmt_after (stmt_to_insert, insert_point);
5239 85 : }
5240 :
5241 :
5242 : /* Recursively rewrite our linearized statements so that the operators
5243 : match those in OPS[OPINDEX], putting the computation in rank
5244 : order. Return new lhs.
5245 : CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5246 : the current stmt and during recursive invocations.
5247 : NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5248 : recursive invocations. */
5249 :
5250 : static tree
5251 4827169 : rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5252 : const vec<operand_entry *> &ops, bool changed,
5253 : bool next_changed)
5254 : {
5255 4827169 : tree rhs1 = gimple_assign_rhs1 (stmt);
5256 4827169 : tree rhs2 = gimple_assign_rhs2 (stmt);
5257 4827169 : tree lhs = gimple_assign_lhs (stmt);
5258 4827169 : operand_entry *oe;
5259 :
5260 : /* The final recursion case for this function is that you have
5261 : exactly two operations left.
5262 : If we had exactly one op in the entire list to start with, we
5263 : would have never called this function, and the tail recursion
5264 : rewrites them one at a time. */
5265 9654338 : if (opindex + 2 == ops.length ())
5266 : {
5267 4577316 : operand_entry *oe1, *oe2;
5268 :
5269 4577316 : oe1 = ops[opindex];
5270 4577316 : oe2 = ops[opindex + 1];
5271 4577316 : if (commutative_tree_code (rhs_code)
5272 4577316 : && tree_swap_operands_p (oe1->op, oe2->op))
5273 : std::swap (oe1, oe2);
5274 :
5275 4577316 : if (rhs1 != oe1->op || rhs2 != oe2->op)
5276 : {
5277 234971 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5278 234971 : unsigned int uid = gimple_uid (stmt);
5279 :
5280 234971 : if (dump_file && (dump_flags & TDF_DETAILS))
5281 : {
5282 31 : fprintf (dump_file, "Transforming ");
5283 31 : print_gimple_stmt (dump_file, stmt, 0);
5284 : }
5285 :
5286 : /* If the stmt that defines operand has to be inserted, insert it
5287 : before the use. */
5288 234971 : if (oe1->stmt_to_insert)
5289 36 : insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5290 234971 : if (oe2->stmt_to_insert)
5291 49 : insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5292 : /* Even when changed is false, reassociation could have e.g. removed
5293 : some redundant operations, so unless we are just swapping the
5294 : arguments or unless there is no change at all (then we just
5295 : return lhs), force creation of a new SSA_NAME. */
5296 234971 : if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5297 : {
5298 105636 : bool insert_before;
5299 105636 : gimple *insert_point
5300 105636 : = find_insert_point (stmt, oe1->op, oe2->op, insert_before);
5301 105636 : lhs = make_ssa_name (TREE_TYPE (lhs));
5302 105636 : stmt
5303 105636 : = gimple_build_assign (lhs, rhs_code,
5304 : oe1->op, oe2->op);
5305 105636 : gimple_set_uid (stmt, uid);
5306 105636 : gimple_set_visited (stmt, true);
5307 105636 : if (insert_before)
5308 85367 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5309 : else
5310 20269 : insert_stmt_after (stmt, insert_point);
5311 105636 : }
5312 : else
5313 : {
5314 129335 : bool insert_before;
5315 129335 : gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
5316 : insert_before)
5317 : == stmt);
5318 129335 : gimple_assign_set_rhs1 (stmt, oe1->op);
5319 129335 : gimple_assign_set_rhs2 (stmt, oe2->op);
5320 129335 : update_stmt (stmt);
5321 : }
5322 :
5323 234971 : if (rhs1 != oe1->op && rhs1 != oe2->op)
5324 53781 : remove_visited_stmt_chain (rhs1);
5325 :
5326 234971 : if (dump_file && (dump_flags & TDF_DETAILS))
5327 : {
5328 31 : fprintf (dump_file, " into ");
5329 31 : print_gimple_stmt (dump_file, stmt, 0);
5330 : }
5331 : }
5332 4577316 : return lhs;
5333 : }
5334 :
5335 : /* If we hit here, we should have 3 or more ops left. */
5336 249853 : gcc_assert (opindex + 2 < ops.length ());
5337 :
5338 : /* Rewrite the next operator. */
5339 249853 : oe = ops[opindex];
5340 :
5341 : /* If the stmt that defines operand has to be inserted, insert it
5342 : before the use. */
5343 249853 : if (oe->stmt_to_insert)
5344 0 : insert_stmt_before_use (stmt, oe->stmt_to_insert);
5345 :
5346 : /* Recurse on the LHS of the binary operator, which is guaranteed to
5347 : be the non-leaf side. */
5348 249853 : tree new_rhs1
5349 249853 : = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5350 249853 : changed || oe->op != rhs2 || next_changed,
5351 : false);
5352 :
5353 249853 : if (oe->op != rhs2 || new_rhs1 != rhs1)
5354 : {
5355 131945 : if (dump_file && (dump_flags & TDF_DETAILS))
5356 : {
5357 6 : fprintf (dump_file, "Transforming ");
5358 6 : print_gimple_stmt (dump_file, stmt, 0);
5359 : }
5360 :
5361 : /* If changed is false, this is either opindex == 0
5362 : or all outer rhs2's were equal to corresponding oe->op,
5363 : and powi_result is NULL.
5364 : That means lhs is equivalent before and after reassociation.
5365 : Otherwise ensure the old lhs SSA_NAME is not reused and
5366 : create a new stmt as well, so that any debug stmts will be
5367 : properly adjusted. */
5368 131945 : if (changed)
5369 : {
5370 24747 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5371 24747 : unsigned int uid = gimple_uid (stmt);
5372 24747 : bool insert_before;
5373 24747 : gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
5374 : insert_before);
5375 :
5376 24747 : lhs = make_ssa_name (TREE_TYPE (lhs));
5377 24747 : stmt = gimple_build_assign (lhs, rhs_code,
5378 : new_rhs1, oe->op);
5379 24747 : gimple_set_uid (stmt, uid);
5380 24747 : gimple_set_visited (stmt, true);
5381 24747 : if (insert_before)
5382 11650 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5383 : else
5384 13097 : insert_stmt_after (stmt, insert_point);
5385 : }
5386 : else
5387 : {
5388 107198 : bool insert_before;
5389 107198 : gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5390 : insert_before)
5391 : == stmt);
5392 107198 : gimple_assign_set_rhs1 (stmt, new_rhs1);
5393 107198 : gimple_assign_set_rhs2 (stmt, oe->op);
5394 107198 : update_stmt (stmt);
5395 : }
5396 :
5397 131945 : if (dump_file && (dump_flags & TDF_DETAILS))
5398 : {
5399 6 : fprintf (dump_file, " into ");
5400 6 : print_gimple_stmt (dump_file, stmt, 0);
5401 : }
5402 : }
5403 : return lhs;
5404 : }
5405 :
5406 : /* Find out how many cycles we need to compute statements chain.
5407 : OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5408 : maximum number of independent statements we may execute per cycle. */
5409 :
5410 : static int
5411 18848 : get_required_cycles (int ops_num, int cpu_width)
5412 : {
5413 18848 : int res;
5414 18848 : int elog;
5415 18848 : unsigned int rest;
5416 :
5417 : /* While we have more than 2 * cpu_width operands
5418 : we may reduce number of operands by cpu_width
5419 : per cycle. */
5420 18848 : res = ops_num / (2 * cpu_width);
5421 :
5422 : /* Remained operands count may be reduced twice per cycle
5423 : until we have only one operand. */
5424 18848 : rest = (unsigned)(ops_num - res * cpu_width);
5425 18848 : elog = exact_log2 (rest);
5426 8727 : if (elog >= 0)
5427 8727 : res += elog;
5428 : else
5429 20242 : res += floor_log2 (rest) + 1;
5430 :
5431 18848 : return res;
5432 : }
5433 :
5434 : /* Given that the target fully pipelines FMA instructions, return the latency
5435 : of MULT_EXPRs that can't be hidden by the FMAs. WIDTH is the number of
5436 : pipes. */
5437 :
5438 : static inline int
5439 0 : get_mult_latency_consider_fma (int ops_num, int mult_num, int width)
5440 : {
5441 0 : gcc_checking_assert (mult_num && mult_num <= ops_num);
5442 :
5443 : /* For each partition, if mult_num == ops_num, there's latency(MULT)*2.
5444 : e.g:
5445 :
5446 : A * B + C * D
5447 : =>
5448 : _1 = A * B;
5449 : _2 = .FMA (C, D, _1);
5450 :
5451 : Otherwise there's latency(MULT)*1 in the first FMA. */
5452 0 : return CEIL (ops_num, width) == CEIL (mult_num, width) ? 2 : 1;
5453 : }
5454 :
5455 : /* Returns an optimal number of registers to use for computation of
5456 : given statements.
5457 :
5458 : LHS is the result ssa name of OPS. MULT_NUM is number of sub-expressions
5459 : that are MULT_EXPRs, when OPS are PLUS_EXPRs or MINUS_EXPRs. */
5460 :
5461 : static int
5462 21369 : get_reassociation_width (vec<operand_entry *> *ops, int mult_num, tree lhs,
5463 : enum tree_code opc, machine_mode mode)
5464 : {
5465 21369 : int param_width = param_tree_reassoc_width;
5466 21369 : int width;
5467 21369 : int width_min;
5468 21369 : int cycles_best;
5469 21369 : int ops_num = ops->length ();
5470 :
5471 21369 : if (param_width > 0)
5472 : width = param_width;
5473 : else
5474 21324 : width = targetm.sched.reassociation_width (opc, mode);
5475 :
5476 21369 : if (width == 1)
5477 : return width;
5478 :
5479 : /* Get the minimal time required for sequence computation. */
5480 7177 : cycles_best = get_required_cycles (ops_num, width);
5481 :
5482 : /* Check if we may use less width and still compute sequence for
5483 : the same time. It will allow us to reduce registers usage.
5484 : get_required_cycles is monotonically increasing with lower width
5485 : so we can perform a binary search for the minimal width that still
5486 : results in the optimal cycle count. */
5487 7177 : width_min = 1;
5488 :
5489 : /* If the target fully pipelines FMA instruction, the multiply part can start
5490 : already if its operands are ready. Assuming symmetric pipes are used for
5491 : FMUL/FADD/FMA, then for a sequence of FMA like:
5492 :
5493 : _8 = .FMA (_2, _3, _1);
5494 : _9 = .FMA (_5, _4, _8);
5495 : _10 = .FMA (_7, _6, _9);
5496 :
5497 : , if width=1, the latency is latency(MULT) + latency(ADD)*3.
5498 : While with width=2:
5499 :
5500 : _8 = _4 * _5;
5501 : _9 = .FMA (_2, _3, _1);
5502 : _10 = .FMA (_6, _7, _8);
5503 : _11 = _9 + _10;
5504 :
5505 : , it is latency(MULT)*2 + latency(ADD)*2. Assuming latency(MULT) >=
5506 : latency(ADD), the first variant is preferred.
5507 :
5508 : Find out if we can get a smaller width considering FMA.
5509 : Assume FMUL and FMA use the same units that can also do FADD.
5510 : For other scenarios, such as when FMUL and FADD are using separated units,
5511 : the following code may not apply. */
5512 :
5513 7177 : int width_mult = targetm.sched.reassociation_width (MULT_EXPR, mode);
5514 7177 : if (width > 1 && mult_num && param_fully_pipelined_fma
5515 0 : && width_mult <= width)
5516 : {
5517 : /* Latency of MULT_EXPRs. */
5518 0 : int lat_mul
5519 0 : = get_mult_latency_consider_fma (ops_num, mult_num, width_mult);
5520 :
5521 : /* Quick search might not apply. So start from 1. */
5522 0 : for (int i = 1; i < width_mult; i++)
5523 : {
5524 0 : int lat_mul_new
5525 0 : = get_mult_latency_consider_fma (ops_num, mult_num, i);
5526 0 : int lat_add_new = get_required_cycles (ops_num, i);
5527 :
5528 : /* Assume latency(MULT) >= latency(ADD). */
5529 0 : if (lat_mul - lat_mul_new >= lat_add_new - cycles_best)
5530 : {
5531 : width = i;
5532 : break;
5533 : }
5534 : }
5535 : }
5536 : else
5537 : {
5538 17033 : while (width > width_min)
5539 : {
5540 11671 : int width_mid = (width + width_min) / 2;
5541 :
5542 11671 : if (get_required_cycles (ops_num, width_mid) == cycles_best)
5543 : width = width_mid;
5544 2009 : else if (width_min < width_mid)
5545 : width_min = width_mid;
5546 : else
5547 : break;
5548 : }
5549 : }
5550 :
5551 : /* If there's loop dependent FMA result, return width=2 to avoid it. This is
5552 : better than skipping these FMA candidates in widening_mul. */
5553 7177 : if (width == 1
5554 7177 : && maybe_le (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (lhs))),
5555 : param_avoid_fma_max_bits))
5556 : {
5557 : /* Look for cross backedge dependency:
5558 : 1. LHS is a phi argument in the same basic block it is defined.
5559 : 2. And the result of the phi node is used in OPS. */
5560 4946 : basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (lhs));
5561 :
5562 4946 : use_operand_p use_p;
5563 4946 : imm_use_iterator iter;
5564 15749 : FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
5565 8100 : if (gphi *phi = dyn_cast<gphi *> (USE_STMT (use_p)))
5566 : {
5567 4321 : if (gimple_phi_arg_edge (phi, phi_arg_index_from_use (use_p))->src
5568 : != bb)
5569 0 : continue;
5570 4321 : tree phi_result = gimple_phi_result (phi);
5571 4321 : operand_entry *oe;
5572 4321 : unsigned int j;
5573 20898 : FOR_EACH_VEC_ELT (*ops, j, oe)
5574 : {
5575 12963 : if (TREE_CODE (oe->op) != SSA_NAME)
5576 0 : continue;
5577 :
5578 : /* Result of phi is operand of PLUS_EXPR. */
5579 12963 : if (oe->op == phi_result)
5580 2243 : return 2;
5581 :
5582 : /* Check is result of phi is operand of MULT_EXPR. */
5583 10720 : gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5584 10720 : if (is_gimple_assign (def_stmt)
5585 10720 : && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR)
5586 : {
5587 2192 : tree rhs = gimple_assign_rhs1 (def_stmt);
5588 2192 : if (TREE_CODE (rhs) == SSA_NAME)
5589 : {
5590 2192 : if (rhs == phi_result)
5591 : return 2;
5592 2192 : def_stmt = SSA_NAME_DEF_STMT (rhs);
5593 : }
5594 : }
5595 10720 : if (is_gimple_assign (def_stmt)
5596 10720 : && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
5597 : {
5598 8642 : if (gimple_assign_rhs1 (def_stmt) == phi_result
5599 8642 : || gimple_assign_rhs2 (def_stmt) == phi_result)
5600 : return 2;
5601 : }
5602 : }
5603 4946 : }
5604 : }
5605 :
5606 : return width;
5607 : }
5608 :
5609 : #define SPECIAL_BIASED_END_STMT 0 /* It is the end stmt of all ops. */
5610 : #define BIASED_END_STMT 1 /* It is the end stmt of normal or biased ops. */
5611 : #define NORMAL_END_STMT 2 /* It is the end stmt of normal ops. */
5612 :
5613 : /* Rewrite statements with dependency chain with regard the chance to generate
5614 : FMA.
5615 : For the chain with FMA: Try to keep fma opportunity as much as possible.
5616 : For the chain without FMA: Putting the computation in rank order and trying
5617 : to allow operations to be executed in parallel.
5618 : E.g.
5619 : e + f + a * b + c * d;
5620 :
5621 : ssa1 = e + a * b;
5622 : ssa2 = f + c * d;
5623 : ssa3 = ssa1 + ssa2;
5624 :
5625 : This reassociation approach preserves the chance of fma generation as much
5626 : as possible.
5627 :
5628 : Another thing is to avoid adding loop-carried ops to long chains, otherwise
5629 : the whole chain will have dependencies across the loop iteration. Just keep
5630 : loop-carried ops in a separate chain.
5631 : E.g.
5632 : x_1 = phi (x_0, x_2)
5633 : y_1 = phi (y_0, y_2)
5634 :
5635 : a + b + c + d + e + x1 + y1
5636 :
5637 : SSA1 = a + b;
5638 : SSA2 = c + d;
5639 : SSA3 = SSA1 + e;
5640 : SSA4 = SSA3 + SSA2;
5641 : SSA5 = x1 + y1;
5642 : SSA6 = SSA4 + SSA5;
5643 : */
5644 : static void
5645 1815 : rewrite_expr_tree_parallel (gassign *stmt, int width, bool has_fma,
5646 : const vec<operand_entry *> &ops)
5647 : {
5648 1815 : enum tree_code opcode = gimple_assign_rhs_code (stmt);
5649 1815 : int op_num = ops.length ();
5650 1815 : int op_normal_num = op_num;
5651 1815 : gcc_assert (op_num > 0);
5652 1815 : int stmt_num = op_num - 1;
5653 1815 : gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5654 1815 : int i = 0, j = 0;
5655 1815 : tree tmp_op[2], op1;
5656 1815 : operand_entry *oe;
5657 1815 : gimple *stmt1 = NULL;
5658 1815 : tree last_rhs1 = gimple_assign_rhs1 (stmt);
5659 1815 : int last_rhs1_stmt_index = 0, last_rhs2_stmt_index = 0;
5660 1815 : int width_active = 0, width_count = 0;
5661 1815 : bool has_biased = false, ops_changed = false;
5662 1815 : auto_vec<operand_entry *> ops_normal;
5663 1815 : auto_vec<operand_entry *> ops_biased;
5664 1815 : vec<operand_entry *> *ops1;
5665 :
5666 : /* We start expression rewriting from the top statements.
5667 : So, in this loop we create a full list of statements
5668 : we will work with. */
5669 1815 : stmts[stmt_num - 1] = stmt;
5670 8990 : for (i = stmt_num - 2; i >= 0; i--)
5671 7175 : stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5672 :
5673 : /* Avoid adding loop-carried ops to long chains, first filter out the
5674 : loop-carried. But we need to make sure that the length of the remainder
5675 : is not less than 4, which is the smallest ops length we can break the
5676 : dependency. */
5677 12620 : FOR_EACH_VEC_ELT (ops, i, oe)
5678 : {
5679 10805 : if (TREE_CODE (oe->op) == SSA_NAME
5680 10645 : && bitmap_bit_p (biased_names, SSA_NAME_VERSION (oe->op))
5681 11057 : && op_normal_num > 4)
5682 : {
5683 223 : ops_biased.safe_push (oe);
5684 223 : has_biased = true;
5685 223 : op_normal_num --;
5686 : }
5687 : else
5688 10582 : ops_normal.safe_push (oe);
5689 : }
5690 :
5691 : /* Width should not be larger than ops length / 2, since we can not create
5692 : more parallel dependency chains that exceeds such value. */
5693 1815 : int width_normal = op_normal_num / 2;
5694 1815 : int width_biased = (op_num - op_normal_num) / 2;
5695 1815 : width_normal = width <= width_normal ? width : width_normal;
5696 1815 : width_biased = width <= width_biased ? width : width_biased;
5697 :
5698 1815 : ops1 = &ops_normal;
5699 1815 : width_count = width_active = width_normal;
5700 :
5701 : /* Build parallel dependency chain according to width. */
5702 10805 : for (i = 0; i < stmt_num; i++)
5703 : {
5704 8990 : if (dump_file && (dump_flags & TDF_DETAILS))
5705 : {
5706 6 : fprintf (dump_file, "Transforming ");
5707 6 : print_gimple_stmt (dump_file, stmts[i], 0);
5708 : }
5709 :
5710 : /* When the work of normal ops is over, but the loop is not over,
5711 : continue to do biased ops. */
5712 8990 : if (width_count == 0 && ops1 == &ops_normal)
5713 : {
5714 219 : ops1 = &ops_biased;
5715 219 : width_count = width_active = width_biased;
5716 219 : ops_changed = true;
5717 : }
5718 :
5719 : /* Swap the operands if no FMA in the chain. */
5720 8990 : if (ops1->length () > 2 && !has_fma)
5721 4426 : swap_ops_for_binary_stmt (*ops1, ops1->length () - 3);
5722 :
5723 8990 : if (i < width_active
5724 5167 : || (ops_changed && i <= (last_rhs1_stmt_index + width_active)))
5725 : {
5726 11478 : for (j = 0; j < 2; j++)
5727 : {
5728 7652 : oe = ops1->pop ();
5729 7652 : tmp_op[j] = oe->op;
5730 : /* If the stmt that defines operand has to be inserted, insert it
5731 : before the use. */
5732 7652 : stmt1 = oe->stmt_to_insert;
5733 7652 : if (stmt1)
5734 0 : insert_stmt_before_use (stmts[i], stmt1);
5735 7652 : stmt1 = NULL;
5736 : }
5737 3826 : stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5738 : tmp_op[1],
5739 : tmp_op[0],
5740 : opcode);
5741 3826 : gimple_set_visited (stmts[i], true);
5742 :
5743 : }
5744 : else
5745 : {
5746 : /* We keep original statement only for the last one. All others are
5747 : recreated. */
5748 5164 : if (!ops1->length ())
5749 : {
5750 : /* For biased length equal to 2. */
5751 2011 : if (width_count == BIASED_END_STMT && !last_rhs2_stmt_index)
5752 1 : last_rhs2_stmt_index = i - 1;
5753 :
5754 : /* When width_count == 2 and there is no biased, just finish. */
5755 2011 : if (width_count == NORMAL_END_STMT && !has_biased)
5756 : {
5757 1596 : last_rhs1_stmt_index = i - 1;
5758 1596 : last_rhs2_stmt_index = i - 2;
5759 : }
5760 2011 : if (last_rhs1_stmt_index && (last_rhs2_stmt_index || !has_biased))
5761 : {
5762 : /* We keep original statement only for the last one. All
5763 : others are recreated. */
5764 1598 : gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5765 1598 : (stmts[last_rhs1_stmt_index]));
5766 1598 : gimple_assign_set_rhs2 (stmts[i], gimple_assign_lhs
5767 1598 : (stmts[last_rhs2_stmt_index]));
5768 1598 : update_stmt (stmts[i]);
5769 : }
5770 : else
5771 : {
5772 1239 : stmts[i] =
5773 413 : build_and_add_sum (TREE_TYPE (last_rhs1),
5774 413 : gimple_assign_lhs (stmts[i-width_count]),
5775 : gimple_assign_lhs
5776 413 : (stmts[i-width_count+1]),
5777 : opcode);
5778 413 : gimple_set_visited (stmts[i], true);
5779 413 : width_count--;
5780 :
5781 : /* It is the end of normal or biased ops.
5782 : last_rhs1_stmt_index used to record the last stmt index
5783 : for normal ops. last_rhs2_stmt_index used to record the
5784 : last stmt index for biased ops. */
5785 413 : if (width_count == BIASED_END_STMT)
5786 : {
5787 220 : gcc_assert (has_biased);
5788 220 : if (ops_biased.length ())
5789 : last_rhs1_stmt_index = i;
5790 : else
5791 1 : last_rhs2_stmt_index = i;
5792 : width_count--;
5793 : }
5794 : }
5795 : }
5796 : else
5797 : {
5798 : /* Attach the rest ops to the parallel dependency chain. */
5799 3153 : oe = ops1->pop ();
5800 3153 : op1 = oe->op;
5801 3153 : stmt1 = oe->stmt_to_insert;
5802 3153 : if (stmt1)
5803 0 : insert_stmt_before_use (stmts[i], stmt1);
5804 3153 : stmt1 = NULL;
5805 :
5806 : /* For only one biased ops. */
5807 3153 : if (width_count == SPECIAL_BIASED_END_STMT)
5808 : {
5809 : /* We keep original statement only for the last one. All
5810 : others are recreated. */
5811 217 : gcc_assert (has_biased);
5812 217 : gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5813 217 : (stmts[last_rhs1_stmt_index]));
5814 217 : gimple_assign_set_rhs2 (stmts[i], op1);
5815 217 : update_stmt (stmts[i]);
5816 : }
5817 : else
5818 : {
5819 2936 : stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5820 : gimple_assign_lhs
5821 2936 : (stmts[i-width_active]),
5822 : op1,
5823 : opcode);
5824 2936 : gimple_set_visited (stmts[i], true);
5825 : }
5826 : }
5827 : }
5828 :
5829 8990 : if (dump_file && (dump_flags & TDF_DETAILS))
5830 : {
5831 6 : fprintf (dump_file, " into ");
5832 6 : print_gimple_stmt (dump_file, stmts[i], 0);
5833 : }
5834 : }
5835 :
5836 1815 : remove_visited_stmt_chain (last_rhs1);
5837 1815 : }
5838 :
5839 : /* Transform STMT, which is really (A +B) + (C + D) into the left
5840 : linear form, ((A+B)+C)+D.
5841 : Recurse on D if necessary. */
5842 :
5843 : static void
5844 2299 : linearize_expr (gimple *stmt)
5845 : {
5846 2299 : gimple_stmt_iterator gsi;
5847 2299 : gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5848 2299 : gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5849 2299 : gimple *oldbinrhs = binrhs;
5850 2299 : enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5851 2299 : gimple *newbinrhs = NULL;
5852 2299 : class loop *loop = loop_containing_stmt (stmt);
5853 2299 : tree lhs = gimple_assign_lhs (stmt);
5854 :
5855 2299 : gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5856 : && is_reassociable_op (binrhs, rhscode, loop));
5857 :
5858 2299 : gsi = gsi_for_stmt (stmt);
5859 :
5860 2299 : gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5861 2299 : binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5862 : gimple_assign_rhs_code (binrhs),
5863 : gimple_assign_lhs (binlhs),
5864 : gimple_assign_rhs2 (binrhs));
5865 2299 : gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5866 2299 : gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5867 2299 : gimple_set_uid (binrhs, gimple_uid (stmt));
5868 :
5869 2299 : if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5870 2294 : newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5871 :
5872 2299 : if (dump_file && (dump_flags & TDF_DETAILS))
5873 : {
5874 0 : fprintf (dump_file, "Linearized: ");
5875 0 : print_gimple_stmt (dump_file, stmt, 0);
5876 : }
5877 :
5878 2299 : reassociate_stats.linearized++;
5879 2299 : update_stmt (stmt);
5880 :
5881 2299 : gsi = gsi_for_stmt (oldbinrhs);
5882 2299 : reassoc_remove_stmt (&gsi);
5883 2299 : release_defs (oldbinrhs);
5884 :
5885 2299 : gimple_set_visited (stmt, true);
5886 2299 : gimple_set_visited (binlhs, true);
5887 2299 : gimple_set_visited (binrhs, true);
5888 :
5889 : /* Tail recurse on the new rhs if it still needs reassociation. */
5890 2299 : if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5891 : /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5892 : want to change the algorithm while converting to tuples. */
5893 536 : linearize_expr (stmt);
5894 2299 : }
5895 :
5896 : /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5897 : it. Otherwise, return NULL. */
5898 :
5899 : static gimple *
5900 406954 : get_single_immediate_use (tree lhs)
5901 : {
5902 406954 : use_operand_p immuse;
5903 406954 : gimple *immusestmt;
5904 :
5905 406954 : if (TREE_CODE (lhs) == SSA_NAME
5906 406954 : && single_imm_use (lhs, &immuse, &immusestmt)
5907 720938 : && is_gimple_assign (immusestmt))
5908 : return immusestmt;
5909 :
5910 : return NULL;
5911 : }
5912 :
5913 : /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5914 : representing the negated value. Insertions of any necessary
5915 : instructions go before GSI.
5916 : This function is recursive in that, if you hand it "a_5" as the
5917 : value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5918 : transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5919 :
5920 : static tree
5921 76992 : negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5922 : {
5923 76992 : gimple *negatedefstmt = NULL;
5924 76992 : tree resultofnegate;
5925 76992 : gimple_stmt_iterator gsi;
5926 76992 : unsigned int uid;
5927 :
5928 : /* If we are trying to negate a name, defined by an add, negate the
5929 : add operands instead. */
5930 76992 : if (TREE_CODE (tonegate) == SSA_NAME)
5931 75211 : negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5932 76992 : if (TREE_CODE (tonegate) == SSA_NAME
5933 75211 : && is_gimple_assign (negatedefstmt)
5934 66294 : && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5935 66294 : && has_single_use (gimple_assign_lhs (negatedefstmt))
5936 126115 : && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5937 : {
5938 875 : tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5939 875 : tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5940 875 : tree lhs = gimple_assign_lhs (negatedefstmt);
5941 875 : gimple *g;
5942 :
5943 875 : gsi = gsi_for_stmt (negatedefstmt);
5944 875 : rhs1 = negate_value (rhs1, &gsi);
5945 :
5946 875 : gsi = gsi_for_stmt (negatedefstmt);
5947 875 : rhs2 = negate_value (rhs2, &gsi);
5948 :
5949 875 : gsi = gsi_for_stmt (negatedefstmt);
5950 875 : lhs = make_ssa_name (TREE_TYPE (lhs));
5951 875 : gimple_set_visited (negatedefstmt, true);
5952 875 : g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5953 875 : gimple_set_uid (g, gimple_uid (negatedefstmt));
5954 875 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5955 875 : return lhs;
5956 : }
5957 :
5958 76117 : tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5959 76117 : resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5960 : NULL_TREE, true, GSI_SAME_STMT);
5961 76117 : gsi = *gsip;
5962 76117 : uid = gimple_uid (gsi_stmt (gsi));
5963 300906 : for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5964 : {
5965 146131 : gimple *stmt = gsi_stmt (gsi);
5966 146131 : if (gimple_uid (stmt) != 0)
5967 : break;
5968 74336 : gimple_set_uid (stmt, uid);
5969 : }
5970 : return resultofnegate;
5971 : }
5972 :
5973 : /* Return true if we should break up the subtract in STMT into an add
5974 : with negate. This is true when we the subtract operands are really
5975 : adds, or the subtract itself is used in an add expression. In
5976 : either case, breaking up the subtract into an add with negate
5977 : exposes the adds to reassociation. */
5978 :
5979 : static bool
5980 292125 : should_break_up_subtract (gimple *stmt)
5981 : {
5982 292125 : tree lhs = gimple_assign_lhs (stmt);
5983 292125 : tree binlhs = gimple_assign_rhs1 (stmt);
5984 292125 : tree binrhs = gimple_assign_rhs2 (stmt);
5985 292125 : gimple *immusestmt;
5986 292125 : class loop *loop = loop_containing_stmt (stmt);
5987 :
5988 292125 : if (TREE_CODE (binlhs) == SSA_NAME
5989 292125 : && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5990 : return true;
5991 :
5992 273509 : if (TREE_CODE (binrhs) == SSA_NAME
5993 273509 : && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5994 : return true;
5995 :
5996 272727 : if (TREE_CODE (lhs) == SSA_NAME
5997 272727 : && (immusestmt = get_single_immediate_use (lhs))
5998 131999 : && is_gimple_assign (immusestmt)
5999 404726 : && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
6000 88802 : || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
6001 2349 : && gimple_assign_rhs1 (immusestmt) == lhs)
6002 86463 : || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
6003 : return true;
6004 : return false;
6005 : }
6006 :
6007 : /* Transform STMT from A - B into A + -B. */
6008 :
6009 : static void
6010 75242 : break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
6011 : {
6012 75242 : tree rhs1 = gimple_assign_rhs1 (stmt);
6013 75242 : tree rhs2 = gimple_assign_rhs2 (stmt);
6014 :
6015 75242 : if (dump_file && (dump_flags & TDF_DETAILS))
6016 : {
6017 0 : fprintf (dump_file, "Breaking up subtract ");
6018 0 : print_gimple_stmt (dump_file, stmt, 0);
6019 : }
6020 :
6021 75242 : rhs2 = negate_value (rhs2, gsip);
6022 75242 : gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
6023 75242 : update_stmt (stmt);
6024 75242 : }
6025 :
6026 : /* Determine whether STMT is a builtin call that raises an SSA name
6027 : to an integer power and has only one use. If so, and this is early
6028 : reassociation and unsafe math optimizations are permitted, place
6029 : the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
6030 : If any of these conditions does not hold, return FALSE. */
6031 :
6032 : static bool
6033 127 : acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
6034 : {
6035 127 : tree arg1;
6036 127 : REAL_VALUE_TYPE c, cint;
6037 :
6038 127 : switch (gimple_call_combined_fn (stmt))
6039 : {
6040 19 : CASE_CFN_POW:
6041 19 : if (flag_errno_math)
6042 : return false;
6043 :
6044 19 : *base = gimple_call_arg (stmt, 0);
6045 19 : arg1 = gimple_call_arg (stmt, 1);
6046 :
6047 19 : if (TREE_CODE (arg1) != REAL_CST)
6048 : return false;
6049 :
6050 16 : c = TREE_REAL_CST (arg1);
6051 :
6052 16 : if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
6053 : return false;
6054 :
6055 16 : *exponent = real_to_integer (&c);
6056 16 : real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
6057 16 : if (!real_identical (&c, &cint))
6058 : return false;
6059 :
6060 : break;
6061 :
6062 10 : CASE_CFN_POWI:
6063 10 : *base = gimple_call_arg (stmt, 0);
6064 10 : arg1 = gimple_call_arg (stmt, 1);
6065 :
6066 10 : if (!tree_fits_shwi_p (arg1))
6067 : return false;
6068 :
6069 10 : *exponent = tree_to_shwi (arg1);
6070 10 : break;
6071 :
6072 : default:
6073 : return false;
6074 : }
6075 :
6076 : /* Expanding negative exponents is generally unproductive, so we don't
6077 : complicate matters with those. Exponents of zero and one should
6078 : have been handled by expression folding. */
6079 18 : if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
6080 : return false;
6081 :
6082 : return true;
6083 : }
6084 :
6085 : /* Try to derive and add operand entry for OP to *OPS. Return false if
6086 : unsuccessful. */
6087 :
6088 : static bool
6089 9549783 : try_special_add_to_ops (vec<operand_entry *> *ops,
6090 : enum tree_code code,
6091 : tree op, gimple* def_stmt)
6092 : {
6093 9549783 : tree base = NULL_TREE;
6094 9549783 : HOST_WIDE_INT exponent = 0;
6095 :
6096 9549783 : if (TREE_CODE (op) != SSA_NAME
6097 9549783 : || ! has_single_use (op))
6098 : return false;
6099 :
6100 3570289 : if (code == MULT_EXPR
6101 762429 : && reassoc_insert_powi_p
6102 361230 : && flag_unsafe_math_optimizations
6103 30061 : && is_gimple_call (def_stmt)
6104 3570416 : && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
6105 : {
6106 18 : add_repeat_to_ops_vec (ops, base, exponent);
6107 18 : gimple_set_visited (def_stmt, true);
6108 18 : return true;
6109 : }
6110 3570271 : else if (code == MULT_EXPR
6111 762411 : && is_gimple_assign (def_stmt)
6112 722601 : && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
6113 275 : && !HONOR_SNANS (TREE_TYPE (op))
6114 275 : && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
6115 0 : || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))
6116 3570546 : && (!FLOAT_TYPE_P (TREE_TYPE (op))
6117 51 : || !DECIMAL_FLOAT_MODE_P (element_mode (op))))
6118 : {
6119 268 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
6120 268 : tree cst = build_minus_one_cst (TREE_TYPE (op));
6121 268 : add_to_ops_vec (ops, rhs1);
6122 268 : add_to_ops_vec (ops, cst);
6123 268 : gimple_set_visited (def_stmt, true);
6124 268 : return true;
6125 : }
6126 :
6127 : return false;
6128 : }
6129 :
6130 : /* Recursively linearize a binary expression that is the RHS of STMT.
6131 : Place the operands of the expression tree in the vector named OPS. */
6132 :
6133 : static void
6134 4914194 : linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
6135 : bool is_associative, bool set_visited)
6136 : {
6137 4914194 : tree binlhs = gimple_assign_rhs1 (stmt);
6138 4914194 : tree binrhs = gimple_assign_rhs2 (stmt);
6139 4914194 : gimple *binlhsdef = NULL, *binrhsdef = NULL;
6140 4914194 : bool binlhsisreassoc = false;
6141 4914194 : bool binrhsisreassoc = false;
6142 4914194 : enum tree_code rhscode = gimple_assign_rhs_code (stmt);
6143 4914194 : class loop *loop = loop_containing_stmt (stmt);
6144 :
6145 4914194 : if (set_visited)
6146 4871531 : gimple_set_visited (stmt, true);
6147 :
6148 4914194 : if (TREE_CODE (binlhs) == SSA_NAME)
6149 : {
6150 4911550 : binlhsdef = SSA_NAME_DEF_STMT (binlhs);
6151 4911550 : binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
6152 4911550 : && !stmt_could_throw_p (cfun, binlhsdef));
6153 : }
6154 :
6155 4914194 : if (TREE_CODE (binrhs) == SSA_NAME)
6156 : {
6157 1583760 : binrhsdef = SSA_NAME_DEF_STMT (binrhs);
6158 1583760 : binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
6159 1583760 : && !stmt_could_throw_p (cfun, binrhsdef));
6160 : }
6161 :
6162 : /* If the LHS is not reassociable, but the RHS is, we need to swap
6163 : them. If neither is reassociable, there is nothing we can do, so
6164 : just put them in the ops vector. If the LHS is reassociable,
6165 : linearize it. If both are reassociable, then linearize the RHS
6166 : and the LHS. */
6167 :
6168 4914194 : if (!binlhsisreassoc)
6169 : {
6170 : /* If this is not a associative operation like division, give up. */
6171 4702433 : if (!is_associative)
6172 : {
6173 15 : add_to_ops_vec (ops, binrhs);
6174 15 : return;
6175 : }
6176 :
6177 4702418 : if (!binrhsisreassoc)
6178 : {
6179 4635604 : bool swap = false;
6180 4635604 : if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6181 : /* If we add ops for the rhs we expect to be able to recurse
6182 : to it via the lhs during expression rewrite so swap
6183 : operands. */
6184 : swap = true;
6185 : else
6186 4635434 : add_to_ops_vec (ops, binrhs);
6187 :
6188 4635604 : if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
6189 4635492 : add_to_ops_vec (ops, binlhs);
6190 :
6191 4635604 : if (!swap)
6192 : return;
6193 : }
6194 :
6195 66984 : if (dump_file && (dump_flags & TDF_DETAILS))
6196 : {
6197 9 : fprintf (dump_file, "swapping operands of ");
6198 9 : print_gimple_stmt (dump_file, stmt, 0);
6199 : }
6200 :
6201 66984 : swap_ssa_operands (stmt,
6202 : gimple_assign_rhs1_ptr (stmt),
6203 : gimple_assign_rhs2_ptr (stmt));
6204 66984 : update_stmt (stmt);
6205 :
6206 66984 : if (dump_file && (dump_flags & TDF_DETAILS))
6207 : {
6208 9 : fprintf (dump_file, " is now ");
6209 9 : print_gimple_stmt (dump_file, stmt, 0);
6210 : }
6211 66984 : if (!binrhsisreassoc)
6212 : return;
6213 :
6214 : /* We want to make it so the lhs is always the reassociative op,
6215 : so swap. */
6216 : std::swap (binlhs, binrhs);
6217 : }
6218 211761 : else if (binrhsisreassoc)
6219 : {
6220 1763 : linearize_expr (stmt);
6221 1763 : binlhs = gimple_assign_rhs1 (stmt);
6222 1763 : binrhs = gimple_assign_rhs2 (stmt);
6223 : }
6224 :
6225 278575 : gcc_assert (TREE_CODE (binrhs) != SSA_NAME
6226 : || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
6227 : rhscode, loop));
6228 278575 : linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
6229 : is_associative, set_visited);
6230 :
6231 278575 : if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6232 278571 : add_to_ops_vec (ops, binrhs);
6233 : }
6234 :
6235 : /* Repropagate the negates back into subtracts, since no other pass
6236 : currently does it. */
6237 :
6238 : static void
6239 2088342 : repropagate_negates (void)
6240 : {
6241 2088342 : unsigned int i = 0;
6242 2088342 : tree negate;
6243 :
6244 2222569 : FOR_EACH_VEC_ELT (plus_negates, i, negate)
6245 : {
6246 134227 : gimple *user = get_single_immediate_use (negate);
6247 134227 : if (!user || !is_gimple_assign (user))
6248 21944 : continue;
6249 :
6250 112283 : tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
6251 112293 : if (TREE_CODE (negateop) == SSA_NAME
6252 112283 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
6253 10 : continue;
6254 :
6255 : /* The negate operand can be either operand of a PLUS_EXPR
6256 : (it can be the LHS if the RHS is a constant for example).
6257 :
6258 : Force the negate operand to the RHS of the PLUS_EXPR, then
6259 : transform the PLUS_EXPR into a MINUS_EXPR. */
6260 112273 : if (gimple_assign_rhs_code (user) == PLUS_EXPR)
6261 : {
6262 : /* If the negated operand appears on the LHS of the
6263 : PLUS_EXPR, exchange the operands of the PLUS_EXPR
6264 : to force the negated operand to the RHS of the PLUS_EXPR. */
6265 83421 : if (gimple_assign_rhs1 (user) == negate)
6266 : {
6267 24969 : swap_ssa_operands (user,
6268 : gimple_assign_rhs1_ptr (user),
6269 : gimple_assign_rhs2_ptr (user));
6270 : }
6271 :
6272 : /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6273 : the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6274 83421 : if (gimple_assign_rhs2 (user) == negate)
6275 : {
6276 83421 : tree rhs1 = gimple_assign_rhs1 (user);
6277 83421 : gimple_stmt_iterator gsi = gsi_for_stmt (user);
6278 83421 : gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
6279 : negateop);
6280 83421 : update_stmt (user);
6281 : }
6282 : }
6283 28852 : else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
6284 : {
6285 1965 : if (gimple_assign_rhs1 (user) == negate)
6286 : {
6287 : /* We have
6288 : x = -negateop
6289 : y = x - b
6290 : which we transform into
6291 : x = negateop + b
6292 : y = -x .
6293 : This pushes down the negate which we possibly can merge
6294 : into some other operation, hence insert it into the
6295 : plus_negates vector. */
6296 1965 : gimple *feed = SSA_NAME_DEF_STMT (negate);
6297 1965 : tree b = gimple_assign_rhs2 (user);
6298 1965 : gimple_stmt_iterator gsi = gsi_for_stmt (feed);
6299 1965 : gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
6300 1965 : tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
6301 1965 : gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
6302 1965 : gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
6303 1965 : gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
6304 1965 : user = gsi_stmt (gsi2);
6305 1965 : update_stmt (user);
6306 1965 : reassoc_remove_stmt (&gsi);
6307 1965 : release_defs (feed);
6308 1965 : plus_negates.safe_push (gimple_assign_lhs (user));
6309 : }
6310 : else
6311 : {
6312 : /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6313 : getting rid of one operation. */
6314 0 : tree rhs1 = gimple_assign_rhs1 (user);
6315 0 : gimple_stmt_iterator gsi = gsi_for_stmt (user);
6316 0 : gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
6317 0 : update_stmt (gsi_stmt (gsi));
6318 : }
6319 : }
6320 : }
6321 2088342 : }
6322 :
6323 : /* Break up subtract operations in block BB.
6324 :
6325 : We do this top down because we don't know whether the subtract is
6326 : part of a possible chain of reassociation except at the top.
6327 :
6328 : IE given
6329 : d = f + g
6330 : c = a + e
6331 : b = c - d
6332 : q = b - r
6333 : k = t - q
6334 :
6335 : we want to break up k = t - q, but we won't until we've transformed q
6336 : = b - r, which won't be broken up until we transform b = c - d.
6337 :
6338 : En passant, clear the GIMPLE visited flag on every statement
6339 : and set UIDs within each basic block. */
6340 :
6341 : static void
6342 19421440 : break_up_subtract_bb (basic_block bb)
6343 : {
6344 19421440 : gimple_stmt_iterator gsi;
6345 19421440 : unsigned int uid = 1;
6346 :
6347 209469211 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6348 : {
6349 170626331 : gimple *stmt = gsi_stmt (gsi);
6350 170626331 : gimple_set_visited (stmt, false);
6351 170626331 : gimple_set_uid (stmt, uid++);
6352 :
6353 170626331 : if (!is_gimple_assign (stmt)
6354 46860992 : || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
6355 187923981 : || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
6356 153329011 : continue;
6357 :
6358 : /* Look for simple gimple subtract operations. */
6359 17297320 : if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6360 : {
6361 292399 : if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6362 292399 : || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6363 274 : continue;
6364 :
6365 : /* Check for a subtract used only in an addition. If this
6366 : is the case, transform it into add of a negate for better
6367 : reassociation. IE transform C = A-B into C = A + -B if C
6368 : is only used in an addition. */
6369 292125 : if (should_break_up_subtract (stmt))
6370 75242 : break_up_subtract (stmt, &gsi);
6371 : }
6372 17004921 : else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6373 17004921 : && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6374 46084 : plus_negates.safe_push (gimple_assign_lhs (stmt));
6375 : }
6376 19421440 : }
6377 :
6378 : /* Used for repeated factor analysis. */
6379 : struct repeat_factor
6380 : {
6381 : /* An SSA name that occurs in a multiply chain. */
6382 : tree factor;
6383 :
6384 : /* Cached rank of the factor. */
6385 : unsigned rank;
6386 :
6387 : /* Number of occurrences of the factor in the chain. */
6388 : HOST_WIDE_INT count;
6389 :
6390 : /* An SSA name representing the product of this factor and
6391 : all factors appearing later in the repeated factor vector. */
6392 : tree repr;
6393 : };
6394 :
6395 :
6396 : static vec<repeat_factor> repeat_factor_vec;
6397 :
6398 : /* Used for sorting the repeat factor vector. Sort primarily by
6399 : ascending occurrence count, secondarily by descending rank. */
6400 :
6401 : static int
6402 247302 : compare_repeat_factors (const void *x1, const void *x2)
6403 : {
6404 247302 : const repeat_factor *rf1 = (const repeat_factor *) x1;
6405 247302 : const repeat_factor *rf2 = (const repeat_factor *) x2;
6406 :
6407 247302 : if (rf1->count < rf2->count)
6408 : return -1;
6409 246668 : else if (rf1->count > rf2->count)
6410 : return 1;
6411 :
6412 246175 : if (rf1->rank < rf2->rank)
6413 : return 1;
6414 134288 : else if (rf1->rank > rf2->rank)
6415 110748 : return -1;
6416 :
6417 : return 0;
6418 : }
6419 :
6420 : /* Look for repeated operands in OPS in the multiply tree rooted at
6421 : STMT. Replace them with an optimal sequence of multiplies and powi
6422 : builtin calls, and remove the used operands from OPS. Return an
6423 : SSA name representing the value of the replacement sequence. */
6424 :
6425 : static tree
6426 488043 : attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6427 : {
6428 488043 : unsigned i, j, vec_len;
6429 488043 : int ii;
6430 488043 : operand_entry *oe;
6431 488043 : repeat_factor *rf1, *rf2;
6432 488043 : repeat_factor rfnew;
6433 488043 : tree result = NULL_TREE;
6434 488043 : tree target_ssa, iter_result;
6435 488043 : tree type = TREE_TYPE (gimple_get_lhs (stmt));
6436 488043 : tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6437 488043 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6438 488043 : gimple *mul_stmt, *pow_stmt;
6439 :
6440 : /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6441 : target, unless type is integral. */
6442 488043 : if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6443 : return NULL_TREE;
6444 :
6445 : /* Allocate the repeated factor vector. */
6446 484387 : repeat_factor_vec.create (10);
6447 :
6448 : /* Scan the OPS vector for all SSA names in the product and build
6449 : up a vector of occurrence counts for each factor. */
6450 1941864 : FOR_EACH_VEC_ELT (*ops, i, oe)
6451 : {
6452 973090 : if (TREE_CODE (oe->op) == SSA_NAME)
6453 : {
6454 613883 : FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6455 : {
6456 67183 : if (rf1->factor == oe->op)
6457 : {
6458 2760 : rf1->count += oe->count;
6459 2760 : break;
6460 : }
6461 : }
6462 :
6463 549460 : if (j >= repeat_factor_vec.length ())
6464 : {
6465 546700 : rfnew.factor = oe->op;
6466 546700 : rfnew.rank = oe->rank;
6467 546700 : rfnew.count = oe->count;
6468 546700 : rfnew.repr = NULL_TREE;
6469 546700 : repeat_factor_vec.safe_push (rfnew);
6470 : }
6471 : }
6472 : }
6473 :
6474 : /* Sort the repeated factor vector by (a) increasing occurrence count,
6475 : and (b) decreasing rank. */
6476 484387 : repeat_factor_vec.qsort (compare_repeat_factors);
6477 :
6478 : /* It is generally best to combine as many base factors as possible
6479 : into a product before applying __builtin_powi to the result.
6480 : However, the sort order chosen for the repeated factor vector
6481 : allows us to cache partial results for the product of the base
6482 : factors for subsequent use. When we already have a cached partial
6483 : result from a previous iteration, it is best to make use of it
6484 : before looking for another __builtin_pow opportunity.
6485 :
6486 : As an example, consider x * x * y * y * y * z * z * z * z.
6487 : We want to first compose the product x * y * z, raise it to the
6488 : second power, then multiply this by y * z, and finally multiply
6489 : by z. This can be done in 5 multiplies provided we cache y * z
6490 : for use in both expressions:
6491 :
6492 : t1 = y * z
6493 : t2 = t1 * x
6494 : t3 = t2 * t2
6495 : t4 = t1 * t3
6496 : result = t4 * z
6497 :
6498 : If we instead ignored the cached y * z and first multiplied by
6499 : the __builtin_pow opportunity z * z, we would get the inferior:
6500 :
6501 : t1 = y * z
6502 : t2 = t1 * x
6503 : t3 = t2 * t2
6504 : t4 = z * z
6505 : t5 = t3 * t4
6506 : result = t5 * y */
6507 :
6508 968774 : vec_len = repeat_factor_vec.length ();
6509 :
6510 : /* Repeatedly look for opportunities to create a builtin_powi call. */
6511 486119 : while (true)
6512 : {
6513 486119 : HOST_WIDE_INT power;
6514 :
6515 : /* First look for the largest cached product of factors from
6516 : preceding iterations. If found, create a builtin_powi for
6517 : it if the minimum occurrence count for its factors is at
6518 : least 2, or just use this cached product as our next
6519 : multiplicand if the minimum occurrence count is 1. */
6520 1034852 : FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6521 : {
6522 548742 : if (rf1->repr && rf1->count > 0)
6523 : break;
6524 : }
6525 :
6526 486119 : if (j < vec_len)
6527 : {
6528 9 : power = rf1->count;
6529 :
6530 9 : if (power == 1)
6531 : {
6532 7 : iter_result = rf1->repr;
6533 :
6534 7 : if (dump_file && (dump_flags & TDF_DETAILS))
6535 : {
6536 0 : unsigned elt;
6537 0 : repeat_factor *rf;
6538 0 : fputs ("Multiplying by cached product ", dump_file);
6539 0 : for (elt = j; elt < vec_len; elt++)
6540 : {
6541 0 : rf = &repeat_factor_vec[elt];
6542 0 : print_generic_expr (dump_file, rf->factor);
6543 0 : if (elt < vec_len - 1)
6544 0 : fputs (" * ", dump_file);
6545 : }
6546 0 : fputs ("\n", dump_file);
6547 : }
6548 : }
6549 : else
6550 : {
6551 2 : if (INTEGRAL_TYPE_P (type))
6552 : {
6553 0 : gcc_assert (power > 1);
6554 0 : gimple_stmt_iterator gsip = gsi;
6555 0 : gsi_prev (&gsip);
6556 0 : iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6557 : rf1->repr, power);
6558 0 : gimple_stmt_iterator gsic = gsi;
6559 0 : while (gsi_stmt (gsic) != gsi_stmt (gsip))
6560 : {
6561 0 : gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6562 0 : gimple_set_visited (gsi_stmt (gsic), true);
6563 0 : gsi_prev (&gsic);
6564 : }
6565 : }
6566 : else
6567 : {
6568 2 : iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6569 2 : pow_stmt
6570 2 : = gimple_build_call (powi_fndecl, 2, rf1->repr,
6571 : build_int_cst (integer_type_node,
6572 2 : power));
6573 2 : gimple_call_set_lhs (pow_stmt, iter_result);
6574 2 : gimple_set_location (pow_stmt, gimple_location (stmt));
6575 2 : gimple_set_uid (pow_stmt, gimple_uid (stmt));
6576 2 : gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6577 : }
6578 :
6579 2 : if (dump_file && (dump_flags & TDF_DETAILS))
6580 : {
6581 0 : unsigned elt;
6582 0 : repeat_factor *rf;
6583 0 : fputs ("Building __builtin_pow call for cached product (",
6584 : dump_file);
6585 0 : for (elt = j; elt < vec_len; elt++)
6586 : {
6587 0 : rf = &repeat_factor_vec[elt];
6588 0 : print_generic_expr (dump_file, rf->factor);
6589 0 : if (elt < vec_len - 1)
6590 0 : fputs (" * ", dump_file);
6591 : }
6592 0 : fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6593 : power);
6594 : }
6595 : }
6596 : }
6597 : else
6598 : {
6599 : /* Otherwise, find the first factor in the repeated factor
6600 : vector whose occurrence count is at least 2. If no such
6601 : factor exists, there are no builtin_powi opportunities
6602 : remaining. */
6603 1033076 : FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6604 : {
6605 548689 : if (rf1->count >= 2)
6606 : break;
6607 : }
6608 :
6609 486110 : if (j >= vec_len)
6610 : break;
6611 :
6612 1723 : power = rf1->count;
6613 :
6614 1723 : if (dump_file && (dump_flags & TDF_DETAILS))
6615 : {
6616 0 : unsigned elt;
6617 0 : repeat_factor *rf;
6618 0 : fputs ("Building __builtin_pow call for (", dump_file);
6619 0 : for (elt = j; elt < vec_len; elt++)
6620 : {
6621 0 : rf = &repeat_factor_vec[elt];
6622 0 : print_generic_expr (dump_file, rf->factor);
6623 0 : if (elt < vec_len - 1)
6624 0 : fputs (" * ", dump_file);
6625 : }
6626 0 : fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6627 : }
6628 :
6629 1723 : reassociate_stats.pows_created++;
6630 :
6631 : /* Visit each element of the vector in reverse order (so that
6632 : high-occurrence elements are visited first, and within the
6633 : same occurrence count, lower-ranked elements are visited
6634 : first). Form a linear product of all elements in this order
6635 : whose occurrencce count is at least that of element J.
6636 : Record the SSA name representing the product of each element
6637 : with all subsequent elements in the vector. */
6638 1723 : if (j == vec_len - 1)
6639 1702 : rf1->repr = rf1->factor;
6640 : else
6641 : {
6642 50 : for (ii = vec_len - 2; ii >= (int)j; ii--)
6643 : {
6644 29 : tree op1, op2;
6645 :
6646 29 : rf1 = &repeat_factor_vec[ii];
6647 29 : rf2 = &repeat_factor_vec[ii + 1];
6648 :
6649 : /* Init the last factor's representative to be itself. */
6650 29 : if (!rf2->repr)
6651 21 : rf2->repr = rf2->factor;
6652 :
6653 29 : op1 = rf1->factor;
6654 29 : op2 = rf2->repr;
6655 :
6656 29 : target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6657 29 : mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6658 : op1, op2);
6659 29 : gimple_set_location (mul_stmt, gimple_location (stmt));
6660 29 : gimple_set_uid (mul_stmt, gimple_uid (stmt));
6661 29 : gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6662 29 : rf1->repr = target_ssa;
6663 :
6664 : /* Don't reprocess the multiply we just introduced. */
6665 29 : gimple_set_visited (mul_stmt, true);
6666 : }
6667 : }
6668 :
6669 : /* Form a call to __builtin_powi for the maximum product
6670 : just formed, raised to the power obtained earlier. */
6671 1723 : rf1 = &repeat_factor_vec[j];
6672 1723 : if (INTEGRAL_TYPE_P (type))
6673 : {
6674 1161 : gcc_assert (power > 1);
6675 1161 : gimple_stmt_iterator gsip = gsi;
6676 1161 : gsi_prev (&gsip);
6677 1161 : iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6678 : rf1->repr, power);
6679 1161 : gimple_stmt_iterator gsic = gsi;
6680 1161 : while (gsi_stmt (gsic) != gsi_stmt (gsip))
6681 : {
6682 2357 : gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6683 2357 : gimple_set_visited (gsi_stmt (gsic), true);
6684 3518 : gsi_prev (&gsic);
6685 : }
6686 : }
6687 : else
6688 : {
6689 562 : iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6690 562 : pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6691 : build_int_cst (integer_type_node,
6692 562 : power));
6693 562 : gimple_call_set_lhs (pow_stmt, iter_result);
6694 562 : gimple_set_location (pow_stmt, gimple_location (stmt));
6695 562 : gimple_set_uid (pow_stmt, gimple_uid (stmt));
6696 562 : gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6697 : }
6698 : }
6699 :
6700 : /* If we previously formed at least one other builtin_powi call,
6701 : form the product of this one and those others. */
6702 1732 : if (result)
6703 : {
6704 9 : tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6705 9 : mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6706 : result, iter_result);
6707 9 : gimple_set_location (mul_stmt, gimple_location (stmt));
6708 9 : gimple_set_uid (mul_stmt, gimple_uid (stmt));
6709 9 : gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6710 9 : gimple_set_visited (mul_stmt, true);
6711 9 : result = new_result;
6712 : }
6713 : else
6714 : result = iter_result;
6715 :
6716 : /* Decrement the occurrence count of each element in the product
6717 : by the count found above, and remove this many copies of each
6718 : factor from OPS. */
6719 3498 : for (i = j; i < vec_len; i++)
6720 : {
6721 1766 : unsigned k = power;
6722 1766 : unsigned n;
6723 :
6724 1766 : rf1 = &repeat_factor_vec[i];
6725 1766 : rf1->count -= power;
6726 :
6727 8461 : FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6728 : {
6729 4929 : if (oe->op == rf1->factor)
6730 : {
6731 4518 : if (oe->count <= k)
6732 : {
6733 4512 : ops->ordered_remove (n);
6734 4512 : k -= oe->count;
6735 :
6736 4512 : if (k == 0)
6737 : break;
6738 : }
6739 : else
6740 : {
6741 6 : oe->count -= k;
6742 6 : break;
6743 : }
6744 : }
6745 : }
6746 : }
6747 : }
6748 :
6749 : /* At this point all elements in the repeated factor vector have a
6750 : remaining occurrence count of 0 or 1, and those with a count of 1
6751 : don't have cached representatives. Re-sort the ops vector and
6752 : clean up. */
6753 484387 : ops->qsort (sort_by_operand_rank);
6754 484387 : repeat_factor_vec.release ();
6755 :
6756 : /* Return the final product computed herein. Note that there may
6757 : still be some elements with single occurrence count left in OPS;
6758 : those will be handled by the normal reassociation logic. */
6759 484387 : return result;
6760 : }
6761 :
6762 : /* Attempt to optimize
6763 : CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6764 : CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6765 :
6766 : static void
6767 1077976 : attempt_builtin_copysign (vec<operand_entry *> *ops)
6768 : {
6769 1077976 : operand_entry *oe;
6770 1077976 : unsigned int i;
6771 1077976 : unsigned int length = ops->length ();
6772 1077976 : tree cst = ops->last ()->op;
6773 :
6774 1077976 : if (length == 1 || TREE_CODE (cst) != REAL_CST)
6775 : return;
6776 :
6777 4125 : FOR_EACH_VEC_ELT (*ops, i, oe)
6778 : {
6779 2940 : if (TREE_CODE (oe->op) == SSA_NAME
6780 2940 : && has_single_use (oe->op))
6781 : {
6782 866 : gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6783 2978 : if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6784 : {
6785 54 : tree arg0, arg1;
6786 54 : switch (gimple_call_combined_fn (old_call))
6787 : {
6788 20 : CASE_CFN_COPYSIGN:
6789 20 : CASE_CFN_COPYSIGN_FN:
6790 20 : arg0 = gimple_call_arg (old_call, 0);
6791 20 : arg1 = gimple_call_arg (old_call, 1);
6792 : /* The first argument of copysign must be a constant,
6793 : otherwise there's nothing to do. */
6794 20 : if (TREE_CODE (arg0) == REAL_CST)
6795 : {
6796 20 : tree type = TREE_TYPE (arg0);
6797 20 : tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6798 : /* If we couldn't fold to a single constant, skip it.
6799 : That happens e.g. for inexact multiplication when
6800 : -frounding-math. */
6801 20 : if (mul == NULL_TREE)
6802 : break;
6803 : /* Instead of adjusting OLD_CALL, let's build a new
6804 : call to not leak the LHS and prevent keeping bogus
6805 : debug statements. DCE will clean up the old call. */
6806 16 : gcall *new_call;
6807 16 : if (gimple_call_internal_p (old_call))
6808 0 : new_call = gimple_build_call_internal
6809 0 : (IFN_COPYSIGN, 2, mul, arg1);
6810 : else
6811 16 : new_call = gimple_build_call
6812 16 : (gimple_call_fndecl (old_call), 2, mul, arg1);
6813 16 : tree lhs = make_ssa_name (type);
6814 16 : gimple_call_set_lhs (new_call, lhs);
6815 16 : gimple_set_location (new_call,
6816 : gimple_location (old_call));
6817 16 : insert_stmt_after (new_call, old_call);
6818 : /* We've used the constant, get rid of it. */
6819 16 : ops->pop ();
6820 16 : bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6821 : /* Handle the CST1 < 0 case by negating the result. */
6822 16 : if (cst1_neg)
6823 : {
6824 7 : tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6825 7 : gimple *negate_stmt
6826 7 : = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6827 7 : insert_stmt_after (negate_stmt, new_call);
6828 7 : oe->op = negrhs;
6829 : }
6830 : else
6831 9 : oe->op = lhs;
6832 16 : if (dump_file && (dump_flags & TDF_DETAILS))
6833 : {
6834 14 : fprintf (dump_file, "Optimizing copysign: ");
6835 14 : print_generic_expr (dump_file, cst);
6836 14 : fprintf (dump_file, " * COPYSIGN (");
6837 14 : print_generic_expr (dump_file, arg0);
6838 14 : fprintf (dump_file, ", ");
6839 14 : print_generic_expr (dump_file, arg1);
6840 23 : fprintf (dump_file, ") into %sCOPYSIGN (",
6841 : cst1_neg ? "-" : "");
6842 14 : print_generic_expr (dump_file, mul);
6843 14 : fprintf (dump_file, ", ");
6844 14 : print_generic_expr (dump_file, arg1);
6845 14 : fprintf (dump_file, "\n");
6846 : }
6847 16 : return;
6848 : }
6849 : break;
6850 : default:
6851 : break;
6852 : }
6853 : }
6854 : }
6855 : }
6856 : }
6857 :
6858 : /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6859 :
6860 : static void
6861 14427 : transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6862 : {
6863 14427 : tree rhs1;
6864 :
6865 14427 : if (dump_file && (dump_flags & TDF_DETAILS))
6866 : {
6867 28 : fprintf (dump_file, "Transforming ");
6868 28 : print_gimple_stmt (dump_file, stmt, 0);
6869 : }
6870 :
6871 14427 : rhs1 = gimple_assign_rhs1 (stmt);
6872 14427 : gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6873 14427 : update_stmt (stmt);
6874 14427 : remove_visited_stmt_chain (rhs1);
6875 :
6876 14427 : if (dump_file && (dump_flags & TDF_DETAILS))
6877 : {
6878 28 : fprintf (dump_file, " into ");
6879 28 : print_gimple_stmt (dump_file, stmt, 0);
6880 : }
6881 14427 : }
6882 :
6883 : /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6884 :
6885 : static void
6886 190 : transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6887 : tree rhs1, tree rhs2)
6888 : {
6889 190 : if (dump_file && (dump_flags & TDF_DETAILS))
6890 : {
6891 0 : fprintf (dump_file, "Transforming ");
6892 0 : print_gimple_stmt (dump_file, stmt, 0);
6893 : }
6894 :
6895 190 : gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6896 190 : update_stmt (gsi_stmt (*gsi));
6897 190 : remove_visited_stmt_chain (rhs1);
6898 :
6899 190 : if (dump_file && (dump_flags & TDF_DETAILS))
6900 : {
6901 0 : fprintf (dump_file, " into ");
6902 0 : print_gimple_stmt (dump_file, stmt, 0);
6903 : }
6904 190 : }
6905 :
6906 : /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs.
6907 : Put no-mult ops and mult ops alternately at the end of the queue, which is
6908 : conducive to generating more FMA and reducing the loss of FMA when breaking
6909 : the chain.
6910 : E.g.
6911 : a * b + c * d + e generates:
6912 :
6913 : _4 = c_9(D) * d_10(D);
6914 : _12 = .FMA (a_7(D), b_8(D), _4);
6915 : _11 = e_6(D) + _12;
6916 :
6917 : Rearrange ops to -> e + a * b + c * d generates:
6918 :
6919 : _4 = .FMA (c_7(D), d_8(D), _3);
6920 : _11 = .FMA (a_5(D), b_6(D), _4);
6921 :
6922 : Return the number of MULT_EXPRs in the chain. */
6923 : static int
6924 16630 : rank_ops_for_fma (vec<operand_entry *> *ops)
6925 : {
6926 16630 : operand_entry *oe;
6927 16630 : unsigned int i;
6928 16630 : unsigned int ops_length = ops->length ();
6929 16630 : auto_vec<operand_entry *> ops_mult;
6930 16630 : auto_vec<operand_entry *> ops_others;
6931 :
6932 55510 : FOR_EACH_VEC_ELT (*ops, i, oe)
6933 : {
6934 38880 : if (TREE_CODE (oe->op) == SSA_NAME)
6935 : {
6936 38865 : gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6937 38865 : if (is_gimple_assign (def_stmt))
6938 : {
6939 27750 : if (gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
6940 13286 : ops_mult.safe_push (oe);
6941 : /* A negate on the multiplication leads to FNMA. */
6942 14464 : else if (gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
6943 14464 : && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
6944 : {
6945 2777 : gimple *neg_def_stmt
6946 2777 : = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
6947 2777 : if (is_gimple_assign (neg_def_stmt)
6948 2774 : && gimple_bb (neg_def_stmt) == gimple_bb (def_stmt)
6949 5546 : && gimple_assign_rhs_code (neg_def_stmt) == MULT_EXPR)
6950 2728 : ops_mult.safe_push (oe);
6951 : else
6952 49 : ops_others.safe_push (oe);
6953 : }
6954 : else
6955 11687 : ops_others.safe_push (oe);
6956 : }
6957 : else
6958 11115 : ops_others.safe_push (oe);
6959 : }
6960 : else
6961 15 : ops_others.safe_push (oe);
6962 : }
6963 : /* 1. When ops_mult.length == 2, like the following case,
6964 :
6965 : a * b + c * d + e.
6966 :
6967 : we need to rearrange the ops.
6968 :
6969 : Putting ops that not def from mult in front can generate more FMAs.
6970 :
6971 : 2. If all ops are defined with mult, we don't need to rearrange them. */
6972 16630 : unsigned mult_num = ops_mult.length ();
6973 16630 : if (mult_num >= 2 && mult_num != ops_length)
6974 : {
6975 : /* Put no-mult ops and mult ops alternately at the end of the
6976 : queue, which is conducive to generating more FMA and reducing the
6977 : loss of FMA when breaking the chain. */
6978 5366 : ops->truncate (0);
6979 5366 : ops->splice (ops_mult);
6980 5366 : int j, opindex = ops->length ();
6981 5366 : int others_length = ops_others.length ();
6982 10737 : for (j = 0; j < others_length; j++)
6983 : {
6984 5371 : oe = ops_others.pop ();
6985 5371 : ops->quick_insert (opindex, oe);
6986 5371 : if (opindex > 0)
6987 5370 : opindex--;
6988 : }
6989 : }
6990 16630 : return mult_num;
6991 16630 : }
6992 : /* Reassociate expressions in basic block BB and its post-dominator as
6993 : children.
6994 :
6995 : Bubble up return status from maybe_optimize_range_tests. */
6996 :
6997 : static bool
6998 19421376 : reassociate_bb (basic_block bb)
6999 : {
7000 19421376 : gimple_stmt_iterator gsi;
7001 19421376 : gimple *stmt = last_nondebug_stmt (bb);
7002 19421376 : bool cfg_cleanup_needed = false;
7003 :
7004 19421376 : if (stmt && !gimple_visited_p (stmt))
7005 18801414 : cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
7006 :
7007 19421376 : bool do_prev = false;
7008 38842752 : for (gsi = gsi_last_bb (bb);
7009 190313374 : !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
7010 : {
7011 170891998 : do_prev = true;
7012 170891998 : stmt = gsi_stmt (gsi);
7013 :
7014 170891998 : if (is_gimple_assign (stmt)
7015 170891998 : && !stmt_could_throw_p (cfun, stmt))
7016 : {
7017 45095170 : tree lhs, rhs1, rhs2;
7018 45095170 : enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
7019 :
7020 : /* If this was part of an already processed statement,
7021 : we don't need to touch it again. */
7022 45095170 : if (gimple_visited_p (stmt))
7023 : {
7024 : /* This statement might have become dead because of previous
7025 : reassociations. */
7026 424752 : if (has_zero_uses (gimple_get_lhs (stmt)))
7027 : {
7028 138301 : reassoc_remove_stmt (&gsi);
7029 138301 : release_defs (stmt);
7030 : /* We might end up removing the last stmt above which
7031 : places the iterator to the end of the sequence.
7032 : Reset it to the last stmt in this case and make sure
7033 : we don't do gsi_prev in that case. */
7034 138301 : if (gsi_end_p (gsi))
7035 : {
7036 402 : gsi = gsi_last_bb (bb);
7037 402 : do_prev = false;
7038 : }
7039 : }
7040 424752 : continue;
7041 : }
7042 :
7043 : /* If this is not a gimple binary expression, there is
7044 : nothing for us to do with it. */
7045 44670418 : if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
7046 33055765 : continue;
7047 :
7048 11614653 : lhs = gimple_assign_lhs (stmt);
7049 11614653 : rhs1 = gimple_assign_rhs1 (stmt);
7050 11614653 : rhs2 = gimple_assign_rhs2 (stmt);
7051 :
7052 : /* For non-bit or min/max operations we can't associate
7053 : all types. Verify that here. */
7054 17052027 : if ((rhs_code != BIT_IOR_EXPR
7055 11614653 : && rhs_code != BIT_AND_EXPR
7056 10649001 : && rhs_code != BIT_XOR_EXPR
7057 10649001 : && rhs_code != MIN_EXPR
7058 10512481 : && rhs_code != MAX_EXPR
7059 10397522 : && !can_reassociate_type_p (TREE_TYPE (lhs)))
7060 6181825 : || !can_reassociate_op_p (rhs1)
7061 17793456 : || !can_reassociate_op_p (rhs2))
7062 5437374 : continue;
7063 :
7064 6177279 : if (associative_tree_code (rhs_code))
7065 : {
7066 4639660 : auto_vec<operand_entry *> ops;
7067 4639660 : tree powi_result = NULL_TREE;
7068 4639660 : bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
7069 :
7070 : /* There may be no immediate uses left by the time we
7071 : get here because we may have eliminated them all. */
7072 4639660 : if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
7073 45912 : continue;
7074 :
7075 4593748 : gimple_set_visited (stmt, true);
7076 4593748 : linearize_expr_tree (&ops, stmt, true, true);
7077 4593748 : ops.qsort (sort_by_operand_rank);
7078 4593748 : int orig_len = ops.length ();
7079 4593748 : optimize_ops_list (rhs_code, &ops);
7080 9187496 : if (undistribute_ops_list (rhs_code, &ops,
7081 : loop_containing_stmt (stmt)))
7082 : {
7083 264 : ops.qsort (sort_by_operand_rank);
7084 264 : optimize_ops_list (rhs_code, &ops);
7085 : }
7086 9187496 : if (undistribute_bitref_for_vector (rhs_code, &ops,
7087 : loop_containing_stmt (stmt)))
7088 : {
7089 40 : ops.qsort (sort_by_operand_rank);
7090 40 : optimize_ops_list (rhs_code, &ops);
7091 : }
7092 4593748 : if (rhs_code == PLUS_EXPR
7093 4593748 : && transform_add_to_multiply (&ops))
7094 73 : ops.qsort (sort_by_operand_rank);
7095 :
7096 4593748 : if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
7097 : {
7098 951270 : if (is_vector)
7099 23928 : optimize_vec_cond_expr (rhs_code, &ops);
7100 : else
7101 927342 : optimize_range_tests (rhs_code, &ops, NULL);
7102 : }
7103 :
7104 4593748 : if (rhs_code == MULT_EXPR && !is_vector)
7105 : {
7106 1077976 : attempt_builtin_copysign (&ops);
7107 :
7108 1077976 : if (reassoc_insert_powi_p
7109 1077976 : && (flag_unsafe_math_optimizations
7110 444775 : || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
7111 488043 : powi_result = attempt_builtin_powi (stmt, &ops);
7112 : }
7113 :
7114 4593748 : operand_entry *last;
7115 4593748 : bool negate_result = false;
7116 4593748 : if (ops.length () > 1
7117 4593748 : && rhs_code == MULT_EXPR)
7118 : {
7119 1100697 : last = ops.last ();
7120 1100697 : if ((integer_minus_onep (last->op)
7121 1100477 : || real_minus_onep (last->op))
7122 248 : && !HONOR_SNANS (TREE_TYPE (lhs))
7123 1100945 : && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
7124 0 : || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
7125 : {
7126 248 : ops.pop ();
7127 248 : negate_result = true;
7128 : }
7129 : }
7130 :
7131 4593748 : tree new_lhs = lhs;
7132 : /* If the operand vector is now empty, all operands were
7133 : consumed by the __builtin_powi optimization. */
7134 4593748 : if (ops.length () == 0)
7135 1404 : transform_stmt_to_copy (&gsi, stmt, powi_result);
7136 4592344 : else if (ops.length () == 1)
7137 : {
7138 13213 : tree last_op = ops.last ()->op;
7139 :
7140 : /* If the stmt that defines operand has to be inserted, insert it
7141 : before the use. */
7142 13213 : if (ops.last ()->stmt_to_insert)
7143 0 : insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
7144 13213 : if (powi_result)
7145 190 : transform_stmt_to_multiply (&gsi, stmt, last_op,
7146 : powi_result);
7147 : else
7148 13023 : transform_stmt_to_copy (&gsi, stmt, last_op);
7149 : }
7150 : else
7151 : {
7152 4579131 : machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
7153 4579131 : int ops_num = ops.length ();
7154 4579131 : int width = 0;
7155 4579131 : int mult_num = 0;
7156 :
7157 : /* For binary bit operations, if there are at least 3
7158 : operands and the last operand in OPS is a constant,
7159 : move it to the front. This helps ensure that we generate
7160 : (X & Y) & C rather than (X & C) & Y. The former will
7161 : often match a canonical bit test when we get to RTL. */
7162 4579131 : if (ops.length () > 2
7163 184092 : && (rhs_code == BIT_AND_EXPR
7164 : || rhs_code == BIT_IOR_EXPR
7165 163192 : || rhs_code == BIT_XOR_EXPR)
7166 4604047 : && TREE_CODE (ops.last ()->op) == INTEGER_CST)
7167 1606 : std::swap (*ops[0], *ops[ops_num - 1]);
7168 :
7169 4579131 : optimization_type opt_type = bb_optimization_type (bb);
7170 :
7171 : /* If the target support FMA, rank_ops_for_fma will detect if
7172 : the chain has fmas and rearrange the ops if so. */
7173 4579131 : if (!reassoc_insert_powi_p
7174 2808077 : && direct_internal_fn_supported_p (IFN_FMA,
7175 2808077 : TREE_TYPE (lhs),
7176 : opt_type)
7177 4613420 : && (rhs_code == PLUS_EXPR || rhs_code == MINUS_EXPR))
7178 : {
7179 16630 : mult_num = rank_ops_for_fma (&ops);
7180 : }
7181 :
7182 : /* Only rewrite the expression tree to parallel in the
7183 : last reassoc pass to avoid useless work back-and-forth
7184 : with initial linearization. */
7185 4579131 : bool has_fma = mult_num >= 2 && mult_num != ops_num;
7186 4579131 : if (!reassoc_insert_powi_p
7187 2808077 : && ops.length () > 3
7188 4595138 : && (width = get_reassociation_width (&ops, mult_num, lhs,
7189 : rhs_code, mode))
7190 : > 1)
7191 : {
7192 1815 : if (dump_file && (dump_flags & TDF_DETAILS))
7193 2 : fprintf (dump_file,
7194 : "Width = %d was chosen for reassociation\n",
7195 : width);
7196 1815 : rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
7197 : width,
7198 : has_fma,
7199 : ops);
7200 : }
7201 : else
7202 : {
7203 : /* When there are three operands left, we want
7204 : to make sure the ones that get the double
7205 : binary op are chosen wisely. */
7206 4577316 : int len = ops.length ();
7207 4577316 : if (!reassoc_insert_powi_p
7208 2806262 : && len >= 3
7209 4705994 : && (!has_fma
7210 : /* width > 1 means ranking ops results in better
7211 : parallelism. Check current value to avoid
7212 : calling get_reassociation_width again. */
7213 5362 : || (width != 1
7214 5362 : && get_reassociation_width (
7215 : &ops, mult_num, lhs, rhs_code, mode)
7216 : > 1)))
7217 125559 : swap_ops_for_binary_stmt (ops, len - 3);
7218 :
7219 4577316 : new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
7220 4577316 : powi_result != NULL
7221 4577316 : || negate_result,
7222 : len != orig_len);
7223 : }
7224 :
7225 : /* If we combined some repeated factors into a
7226 : __builtin_powi call, multiply that result by the
7227 : reassociated operands. */
7228 4579131 : if (powi_result)
7229 : {
7230 129 : gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
7231 129 : tree type = TREE_TYPE (lhs);
7232 129 : tree target_ssa = make_temp_ssa_name (type, NULL,
7233 : "reassocpow");
7234 129 : gimple_set_lhs (lhs_stmt, target_ssa);
7235 129 : update_stmt (lhs_stmt);
7236 129 : if (lhs != new_lhs)
7237 : {
7238 129 : target_ssa = new_lhs;
7239 129 : new_lhs = lhs;
7240 : }
7241 129 : mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
7242 : powi_result, target_ssa);
7243 129 : gimple_set_location (mul_stmt, gimple_location (stmt));
7244 129 : gimple_set_uid (mul_stmt, gimple_uid (stmt));
7245 129 : gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
7246 : }
7247 : }
7248 :
7249 4593748 : if (negate_result)
7250 : {
7251 248 : stmt = SSA_NAME_DEF_STMT (lhs);
7252 248 : tree tmp = make_ssa_name (TREE_TYPE (lhs));
7253 248 : gimple_set_lhs (stmt, tmp);
7254 248 : if (lhs != new_lhs)
7255 238 : tmp = new_lhs;
7256 248 : gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
7257 : tmp);
7258 248 : gimple_set_uid (neg_stmt, gimple_uid (stmt));
7259 248 : gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
7260 248 : update_stmt (stmt);
7261 : }
7262 4639660 : }
7263 : }
7264 : }
7265 :
7266 19421376 : return cfg_cleanup_needed;
7267 : }
7268 :
7269 : /* Add jumps around shifts for range tests turned into bit tests.
7270 : For each SSA_NAME VAR we have code like:
7271 : VAR = ...; // final stmt of range comparison
7272 : // bit test here...;
7273 : OTHERVAR = ...; // final stmt of the bit test sequence
7274 : RES = VAR | OTHERVAR;
7275 : Turn the above into:
7276 : VAR = ...;
7277 : if (VAR != 0)
7278 : goto <l3>;
7279 : else
7280 : goto <l2>;
7281 : <l2>:
7282 : // bit test here...;
7283 : OTHERVAR = ...;
7284 : <l3>:
7285 : # RES = PHI<1(l1), OTHERVAR(l2)>; */
7286 :
7287 : static void
7288 2088342 : branch_fixup (void)
7289 : {
7290 2088342 : tree var;
7291 2088342 : unsigned int i;
7292 :
7293 2088707 : FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
7294 : {
7295 365 : gimple *def_stmt = SSA_NAME_DEF_STMT (var);
7296 365 : gimple *use_stmt;
7297 365 : use_operand_p use;
7298 365 : bool ok = single_imm_use (var, &use, &use_stmt);
7299 365 : gcc_assert (ok
7300 : && is_gimple_assign (use_stmt)
7301 : && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
7302 : && gimple_bb (def_stmt) == gimple_bb (use_stmt));
7303 :
7304 365 : basic_block cond_bb = gimple_bb (def_stmt);
7305 365 : basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
7306 365 : basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
7307 :
7308 365 : gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7309 365 : gimple *g = gimple_build_cond (NE_EXPR, var,
7310 365 : build_zero_cst (TREE_TYPE (var)),
7311 : NULL_TREE, NULL_TREE);
7312 365 : location_t loc = gimple_location (use_stmt);
7313 365 : gimple_set_location (g, loc);
7314 365 : gsi_insert_after (&gsi, g, GSI_NEW_STMT);
7315 :
7316 365 : edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
7317 365 : etrue->probability = profile_probability::even ();
7318 365 : edge efalse = find_edge (cond_bb, then_bb);
7319 365 : efalse->flags = EDGE_FALSE_VALUE;
7320 365 : efalse->probability -= etrue->probability;
7321 365 : then_bb->count -= etrue->count ();
7322 :
7323 365 : tree othervar = NULL_TREE;
7324 365 : if (gimple_assign_rhs1 (use_stmt) == var)
7325 254 : othervar = gimple_assign_rhs2 (use_stmt);
7326 111 : else if (gimple_assign_rhs2 (use_stmt) == var)
7327 : othervar = gimple_assign_rhs1 (use_stmt);
7328 : else
7329 0 : gcc_unreachable ();
7330 365 : tree lhs = gimple_assign_lhs (use_stmt);
7331 365 : gphi *phi = create_phi_node (lhs, merge_bb);
7332 365 : add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
7333 365 : add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
7334 365 : gsi = gsi_for_stmt (use_stmt);
7335 365 : gsi_remove (&gsi, true);
7336 :
7337 365 : set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
7338 365 : set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
7339 : }
7340 2088342 : reassoc_branch_fixups.release ();
7341 2088342 : }
7342 :
7343 : void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
7344 : void debug_ops_vector (vec<operand_entry *> ops);
7345 :
7346 : /* Dump the operand entry vector OPS to FILE. */
7347 :
7348 : void
7349 0 : dump_ops_vector (FILE *file, vec<operand_entry *> ops)
7350 : {
7351 0 : operand_entry *oe;
7352 0 : unsigned int i;
7353 :
7354 0 : FOR_EACH_VEC_ELT (ops, i, oe)
7355 : {
7356 0 : fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
7357 0 : print_generic_expr (file, oe->op);
7358 0 : fprintf (file, "\n");
7359 : }
7360 0 : }
7361 :
7362 : /* Dump the operand entry vector OPS to STDERR. */
7363 :
7364 : DEBUG_FUNCTION void
7365 0 : debug_ops_vector (vec<operand_entry *> ops)
7366 : {
7367 0 : dump_ops_vector (stderr, ops);
7368 0 : }
7369 :
7370 : /* Bubble up return status from reassociate_bb. */
7371 :
7372 : static bool
7373 2088342 : do_reassoc ()
7374 : {
7375 2088342 : bool cfg_cleanup_needed = false;
7376 2088342 : basic_block *worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
7377 :
7378 2088342 : unsigned sp = 0;
7379 2088342 : for (auto son = first_dom_son (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
7380 4176684 : son; son = next_dom_son (CDI_DOMINATORS, son))
7381 2088342 : worklist[sp++] = son;
7382 21509782 : while (sp)
7383 : {
7384 19421440 : basic_block bb = worklist[--sp];
7385 19421440 : break_up_subtract_bb (bb);
7386 19421440 : for (auto son = first_dom_son (CDI_DOMINATORS, bb);
7387 36754538 : son; son = next_dom_son (CDI_DOMINATORS, son))
7388 17333098 : worklist[sp++] = son;
7389 : }
7390 :
7391 10843223 : for (auto son = first_dom_son (CDI_POST_DOMINATORS,
7392 2088342 : EXIT_BLOCK_PTR_FOR_FN (cfun));
7393 10843223 : son; son = next_dom_son (CDI_POST_DOMINATORS, son))
7394 8754881 : worklist[sp++] = son;
7395 21509718 : while (sp)
7396 : {
7397 19421376 : basic_block bb = worklist[--sp];
7398 19421376 : cfg_cleanup_needed |= reassociate_bb (bb);
7399 19421376 : for (auto son = first_dom_son (CDI_POST_DOMINATORS, bb);
7400 30087871 : son; son = next_dom_son (CDI_POST_DOMINATORS, son))
7401 10666495 : worklist[sp++] = son;
7402 : }
7403 :
7404 2088342 : free (worklist);
7405 2088342 : return cfg_cleanup_needed;
7406 : }
7407 :
7408 : /* Initialize the reassociation pass. */
7409 :
7410 : static void
7411 2088342 : init_reassoc (void)
7412 : {
7413 2088342 : int i;
7414 2088342 : int64_t rank = 2;
7415 2088342 : int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
7416 :
7417 : /* Find the loops, so that we can prevent moving calculations in
7418 : them. */
7419 2088342 : loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
7420 :
7421 2088342 : memset (&reassociate_stats, 0, sizeof (reassociate_stats));
7422 :
7423 2088342 : next_operand_entry_id = 0;
7424 :
7425 : /* Reverse RPO (Reverse Post Order) will give us something where
7426 : deeper loops come later. */
7427 2088342 : pre_and_rev_post_order_compute (NULL, bbs, false);
7428 2088342 : bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
7429 2088342 : operand_rank = new hash_map<tree, int64_t>;
7430 :
7431 : /* Give each default definition a distinct rank. This includes
7432 : parameters and the static chain. Walk backwards over all
7433 : SSA names so that we get proper rank ordering according
7434 : to tree_swap_operands_p. */
7435 109066435 : for (i = num_ssa_names - 1; i > 0; --i)
7436 : {
7437 104889751 : tree name = ssa_name (i);
7438 179196391 : if (name && SSA_NAME_IS_DEFAULT_DEF (name))
7439 6170314 : insert_operand_rank (name, ++rank);
7440 : }
7441 :
7442 : /* Set up rank for each BB */
7443 21509718 : for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
7444 19421376 : bb_rank[bbs[i]] = ++rank << 16;
7445 :
7446 2088342 : free (bbs);
7447 2088342 : calculate_dominance_info (CDI_POST_DOMINATORS);
7448 2088342 : plus_negates = vNULL;
7449 2088342 : mark_ssa_maybe_undefs ();
7450 2088342 : }
7451 :
7452 : /* Cleanup after the reassociation pass, and print stats if
7453 : requested. */
7454 :
7455 : static void
7456 2088342 : fini_reassoc (void)
7457 : {
7458 2088342 : statistics_counter_event (cfun, "Linearized",
7459 : reassociate_stats.linearized);
7460 2088342 : statistics_counter_event (cfun, "Constants eliminated",
7461 : reassociate_stats.constants_eliminated);
7462 2088342 : statistics_counter_event (cfun, "Ops eliminated",
7463 : reassociate_stats.ops_eliminated);
7464 2088342 : statistics_counter_event (cfun, "Statements rewritten",
7465 : reassociate_stats.rewritten);
7466 2088342 : statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
7467 : reassociate_stats.pows_encountered);
7468 2088342 : statistics_counter_event (cfun, "Built-in powi calls created",
7469 : reassociate_stats.pows_created);
7470 :
7471 4176684 : delete operand_rank;
7472 2088342 : bitmap_clear (biased_names);
7473 2088342 : operand_entry_pool.release ();
7474 2088342 : free (bb_rank);
7475 2088342 : plus_negates.release ();
7476 2088342 : free_dominance_info (CDI_POST_DOMINATORS);
7477 2088342 : loop_optimizer_finalize ();
7478 2088342 : }
7479 :
7480 : /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7481 : insertion of __builtin_powi calls.
7482 :
7483 : Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7484 : optimization of a gimple conditional. Otherwise returns zero. */
7485 :
7486 : static unsigned int
7487 2088342 : execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
7488 : {
7489 2088342 : reassoc_insert_powi_p = insert_powi_p;
7490 2088342 : reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
7491 :
7492 2088342 : init_reassoc ();
7493 :
7494 2088342 : bool cfg_cleanup_needed;
7495 2088342 : cfg_cleanup_needed = do_reassoc ();
7496 2088342 : repropagate_negates ();
7497 2088342 : branch_fixup ();
7498 :
7499 2088342 : fini_reassoc ();
7500 2088342 : return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7501 : }
7502 :
7503 : namespace {
7504 :
7505 : const pass_data pass_data_reassoc =
7506 : {
7507 : GIMPLE_PASS, /* type */
7508 : "reassoc", /* name */
7509 : OPTGROUP_NONE, /* optinfo_flags */
7510 : TV_TREE_REASSOC, /* tv_id */
7511 : ( PROP_cfg | PROP_ssa ), /* properties_required */
7512 : 0, /* properties_provided */
7513 : 0, /* properties_destroyed */
7514 : 0, /* todo_flags_start */
7515 : TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7516 : };
7517 :
7518 : class pass_reassoc : public gimple_opt_pass
7519 : {
7520 : public:
7521 577534 : pass_reassoc (gcc::context *ctxt)
7522 1155068 : : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7523 : {}
7524 :
7525 : /* opt_pass methods: */
7526 288767 : opt_pass * clone () final override { return new pass_reassoc (m_ctxt); }
7527 577534 : void set_pass_param (unsigned int n, bool param) final override
7528 : {
7529 577534 : gcc_assert (n == 0);
7530 577534 : insert_powi_p = param;
7531 577534 : bias_loop_carried_phi_ranks_p = !param;
7532 577534 : }
7533 2088650 : bool gate (function *) final override { return flag_tree_reassoc != 0; }
7534 2088342 : unsigned int execute (function *) final override
7535 : {
7536 2088342 : return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7537 : }
7538 :
7539 : private:
7540 : /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7541 : point 3a in the pass header comment. */
7542 : bool insert_powi_p;
7543 : bool bias_loop_carried_phi_ranks_p;
7544 : }; // class pass_reassoc
7545 :
7546 : } // anon namespace
7547 :
7548 : gimple_opt_pass *
7549 288767 : make_pass_reassoc (gcc::context *ctxt)
7550 : {
7551 288767 : return new pass_reassoc (ctxt);
7552 : }
|