Line data Source code
1 : /* Reassociation for trees.
2 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
3 : Contributed by Daniel Berlin <dan@dberlin.org>
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify
8 : it under the terms of the GNU General Public License as published by
9 : the Free Software Foundation; either version 3, or (at your option)
10 : any later version.
11 :
12 : GCC is distributed in the hope that it will be useful,
13 : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : GNU General Public License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : #include "config.h"
22 : #include "system.h"
23 : #include "coretypes.h"
24 : #include "backend.h"
25 : #include "target.h"
26 : #include "rtl.h"
27 : #include "tree.h"
28 : #include "gimple.h"
29 : #include "cfghooks.h"
30 : #include "alloc-pool.h"
31 : #include "tree-pass.h"
32 : #include "memmodel.h"
33 : #include "tm_p.h"
34 : #include "ssa.h"
35 : #include "optabs-tree.h"
36 : #include "gimple-pretty-print.h"
37 : #include "diagnostic-core.h"
38 : #include "fold-const.h"
39 : #include "stor-layout.h"
40 : #include "cfganal.h"
41 : #include "gimple-iterator.h"
42 : #include "gimple-fold.h"
43 : #include "tree-eh.h"
44 : #include "gimplify-me.h"
45 : #include "tree-cfg.h"
46 : #include "tree-ssa-loop.h"
47 : #include "flags.h"
48 : #include "tree-ssa.h"
49 : #include "langhooks.h"
50 : #include "cfgloop.h"
51 : #include "builtins.h"
52 : #include "gimplify.h"
53 : #include "case-cfn-macros.h"
54 : #include "tree-ssa-reassoc.h"
55 : #include "tree-ssa-math-opts.h"
56 : #include "gimple-range.h"
57 : #include "internal-fn.h"
58 :
59 : /* This is a simple global reassociation pass. It is, in part, based
60 : on the LLVM pass of the same name (They do some things more/less
61 : than we do, in different orders, etc).
62 :
63 : It consists of five steps:
64 :
65 : 1. Breaking up subtract operations into addition + negate, where
66 : it would promote the reassociation of adds.
67 :
68 : 2. Left linearization of the expression trees, so that (A+B)+(C+D)
69 : becomes (((A+B)+C)+D), which is easier for us to rewrite later.
70 : During linearization, we place the operands of the binary
71 : expressions into a vector of operand_entry_*
72 :
73 : 3. Optimization of the operand lists, eliminating things like a +
74 : -a, a & a, etc.
75 :
76 : 3a. Combine repeated factors with the same occurrence counts
77 : into a __builtin_powi call that will later be optimized into
78 : an optimal number of multiplies.
79 :
80 : 4. Rewrite the expression trees we linearized and optimized so
81 : they are in proper rank order.
82 :
83 : 5. Repropagate negates, as nothing else will clean it up ATM.
84 :
85 : A bit of theory on #4, since nobody seems to write anything down
86 : about why it makes sense to do it the way they do it:
87 :
88 : We could do this much nicer theoretically, but don't (for reasons
89 : explained after how to do it theoretically nice :P).
90 :
91 : In order to promote the most redundancy elimination, you want
92 : binary expressions whose operands are the same rank (or
93 : preferably, the same value) exposed to the redundancy eliminator,
94 : for possible elimination.
95 :
96 : So the way to do this if we really cared, is to build the new op
97 : tree from the leaves to the roots, merging as you go, and putting the
98 : new op on the end of the worklist, until you are left with one
99 : thing on the worklist.
100 :
101 : IE if you have to rewrite the following set of operands (listed with
102 : rank in parentheses), with opcode PLUS_EXPR:
103 :
104 : a (1), b (1), c (1), d (2), e (2)
105 :
106 :
107 : We start with our merge worklist empty, and the ops list with all of
108 : those on it.
109 :
110 : You want to first merge all leaves of the same rank, as much as
111 : possible.
112 :
113 : So first build a binary op of
114 :
115 : mergetmp = a + b, and put "mergetmp" on the merge worklist.
116 :
117 : Because there is no three operand form of PLUS_EXPR, c is not going to
118 : be exposed to redundancy elimination as a rank 1 operand.
119 :
120 : So you might as well throw it on the merge worklist (you could also
121 : consider it to now be a rank two operand, and merge it with d and e,
122 : but in this case, you then have evicted e from a binary op. So at
123 : least in this situation, you can't win.)
124 :
125 : Then build a binary op of d + e
126 : mergetmp2 = d + e
127 :
128 : and put mergetmp2 on the merge worklist.
129 :
130 : so merge worklist = {mergetmp, c, mergetmp2}
131 :
132 : Continue building binary ops of these operations until you have only
133 : one operation left on the worklist.
134 :
135 : So we have
136 :
137 : build binary op
138 : mergetmp3 = mergetmp + c
139 :
140 : worklist = {mergetmp2, mergetmp3}
141 :
142 : mergetmp4 = mergetmp2 + mergetmp3
143 :
144 : worklist = {mergetmp4}
145 :
146 : because we have one operation left, we can now just set the original
147 : statement equal to the result of that operation.
148 :
149 : This will at least expose a + b and d + e to redundancy elimination
150 : as binary operations.
151 :
152 : For extra points, you can reuse the old statements to build the
153 : mergetmps, since you shouldn't run out.
154 :
155 : So why don't we do this?
156 :
157 : Because it's expensive, and rarely will help. Most trees we are
158 : reassociating have 3 or less ops. If they have 2 ops, they already
159 : will be written into a nice single binary op. If you have 3 ops, a
160 : single simple check suffices to tell you whether the first two are of the
161 : same rank. If so, you know to order it
162 :
163 : mergetmp = op1 + op2
164 : newstmt = mergetmp + op3
165 :
166 : instead of
167 : mergetmp = op2 + op3
168 : newstmt = mergetmp + op1
169 :
170 : If all three are of the same rank, you can't expose them all in a
171 : single binary operator anyway, so the above is *still* the best you
172 : can do.
173 :
174 : Thus, this is what we do. When we have three ops left, we check to see
175 : what order to put them in, and call it a day. As a nod to vector sum
176 : reduction, we check if any of the ops are really a phi node that is a
177 : destructive update for the associating op, and keep the destructive
178 : update together for vector sum reduction recognition. */
179 :
180 : /* Enable insertion of __builtin_powi calls during execute_reassoc. See
181 : point 3a in the pass header comment. */
182 : static bool reassoc_insert_powi_p;
183 :
184 : /* Enable biasing ranks of loop accumulators. We don't want this before
185 : vectorization, since it interferes with reduction chains. */
186 : static bool reassoc_bias_loop_carried_phi_ranks_p;
187 :
188 : /* Statistics */
189 : static struct
190 : {
191 : int linearized;
192 : int constants_eliminated;
193 : int ops_eliminated;
194 : int rewritten;
195 : int pows_encountered;
196 : int pows_created;
197 : } reassociate_stats;
198 :
199 :
200 : static object_allocator<operand_entry> operand_entry_pool
201 : ("operand entry pool");
202 :
203 : /* This is used to assign a unique ID to each struct operand_entry
204 : so that qsort results are identical on different hosts. */
205 : static unsigned int next_operand_entry_id;
206 :
207 : /* Starting rank number for a given basic block, so that we can rank
208 : operations using unmovable instructions in that BB based on the bb
209 : depth. */
210 : static int64_t *bb_rank;
211 :
212 : /* Operand->rank hashtable. */
213 : static hash_map<tree, int64_t> *operand_rank;
214 :
215 : /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
216 : biased. */
217 : static auto_bitmap biased_names;
218 :
219 : /* Vector of SSA_NAMEs on which after reassociate_bb is done with
220 : all basic blocks the CFG should be adjusted - basic blocks
221 : split right after that SSA_NAME's definition statement and before
222 : the only use, which must be a bit ior. */
223 : static vec<tree> reassoc_branch_fixups;
224 :
225 : /* Forward decls. */
226 : static int64_t get_rank (tree);
227 : static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
228 :
229 : /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
230 : possibly added by gsi_remove. */
231 :
232 : static bool
233 171839 : reassoc_remove_stmt (gimple_stmt_iterator *gsi)
234 : {
235 171839 : gimple *stmt = gsi_stmt (*gsi);
236 :
237 171839 : if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
238 72486 : return gsi_remove (gsi, true);
239 :
240 99353 : gimple_stmt_iterator prev = *gsi;
241 99353 : gsi_prev (&prev);
242 99353 : unsigned uid = gimple_uid (stmt);
243 99353 : basic_block bb = gimple_bb (stmt);
244 99353 : bool ret = gsi_remove (gsi, true);
245 99353 : if (!gsi_end_p (prev))
246 99142 : gsi_next (&prev);
247 : else
248 422 : prev = gsi_start_bb (bb);
249 99353 : gimple *end_stmt = gsi_stmt (*gsi);
250 103008 : while ((stmt = gsi_stmt (prev)) != end_stmt)
251 : {
252 3655 : gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
253 3655 : gimple_set_uid (stmt, uid);
254 3655 : gsi_next (&prev);
255 : }
256 : return ret;
257 : }
258 :
259 : /* Bias amount for loop-carried phis. We want this to be larger than
260 : the depth of any reassociation tree we can see, but not larger than
261 : the rank difference between two blocks. */
262 : #define PHI_LOOP_BIAS (1 << 15)
263 :
264 : /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
265 : operands to the STMT's left-hand side. The goal is to preserve bias in code
266 : like this:
267 :
268 : x_1 = phi(x_0, x_2)
269 : a = x_1 | 1
270 : b = a ^ 2
271 : .MEM = b
272 : c = b + d
273 : x_2 = c + e
274 :
275 : That is, we need to preserve bias along single-use chains originating from
276 : loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
277 : uses, because only they participate in rank propagation. */
278 : static bool
279 6733685 : propagate_bias_p (gimple *stmt)
280 : {
281 6733685 : use_operand_p use;
282 6733685 : imm_use_iterator use_iter;
283 6733685 : gimple *single_use_stmt = NULL;
284 :
285 6733685 : if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
286 : return false;
287 :
288 17046454 : FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
289 : {
290 7560770 : gimple *current_use_stmt = USE_STMT (use);
291 :
292 7560770 : if (is_gimple_assign (current_use_stmt)
293 7560770 : && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
294 : {
295 5811846 : if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
296 702584 : return false;
297 : single_use_stmt = current_use_stmt;
298 : }
299 702584 : }
300 :
301 4391550 : if (single_use_stmt == NULL)
302 : return false;
303 :
304 4391186 : if (gimple_bb (stmt)->loop_father
305 4391186 : != gimple_bb (single_use_stmt)->loop_father)
306 : return false;
307 :
308 : return true;
309 : }
310 :
311 : /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
312 : an innermost loop, and the phi has only a single use which is inside
313 : the loop, then the rank is the block rank of the loop latch plus an
314 : extra bias for the loop-carried dependence. This causes expressions
315 : calculated into an accumulator variable to be independent for each
316 : iteration of the loop. If STMT is some other phi, the rank is the
317 : block rank of its containing block. */
318 : static int64_t
319 1426094 : phi_rank (gimple *stmt)
320 : {
321 1426094 : basic_block bb = gimple_bb (stmt);
322 1426094 : class loop *father = bb->loop_father;
323 1426094 : tree res;
324 1426094 : unsigned i;
325 1426094 : use_operand_p use;
326 1426094 : gimple *use_stmt;
327 :
328 1426094 : if (!reassoc_bias_loop_carried_phi_ranks_p)
329 549574 : return bb_rank[bb->index];
330 :
331 : /* We only care about real loops (those with a latch). */
332 876520 : if (!father->latch)
333 1 : return bb_rank[bb->index];
334 :
335 : /* Interesting phis must be in headers of innermost loops. */
336 876519 : if (bb != father->header
337 690193 : || father->inner)
338 340092 : return bb_rank[bb->index];
339 :
340 : /* Ignore virtual SSA_NAMEs. */
341 536427 : res = gimple_phi_result (stmt);
342 1072854 : if (virtual_operand_p (res))
343 0 : return bb_rank[bb->index];
344 :
345 : /* The phi definition must have a single use, and that use must be
346 : within the loop. Otherwise this isn't an accumulator pattern. */
347 536427 : if (!single_imm_use (res, &use, &use_stmt)
348 536427 : || gimple_bb (use_stmt)->loop_father != father)
349 467278 : return bb_rank[bb->index];
350 :
351 : /* Look for phi arguments from within the loop. If found, bias this phi. */
352 78876 : for (i = 0; i < gimple_phi_num_args (stmt); i++)
353 : {
354 78668 : tree arg = gimple_phi_arg_def (stmt, i);
355 78668 : if (TREE_CODE (arg) == SSA_NAME
356 78668 : && !SSA_NAME_IS_DEFAULT_DEF (arg))
357 : {
358 73828 : gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
359 73828 : if (gimple_bb (def_stmt)->loop_father == father)
360 68941 : return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
361 : }
362 : }
363 :
364 : /* Must be an uninteresting phi. */
365 208 : return bb_rank[bb->index];
366 : }
367 :
368 : /* Return the maximum of RANK and the rank that should be propagated
369 : from expression OP. For most operands, this is just the rank of OP.
370 : For loop-carried phis, the value is zero to avoid undoing the bias
371 : in favor of the phi. */
372 : static int64_t
373 7339914 : propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
374 : {
375 7339914 : int64_t op_rank;
376 :
377 7339914 : op_rank = get_rank (op);
378 :
379 : /* Check whether op is biased after the get_rank () call, since it might have
380 : updated biased_names. */
381 7339914 : if (TREE_CODE (op) == SSA_NAME
382 7339914 : && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
383 : {
384 46277 : if (maybe_biased_p == NULL)
385 : return rank;
386 32176 : *maybe_biased_p = true;
387 : }
388 :
389 7325813 : return MAX (rank, op_rank);
390 : }
391 :
392 : /* Look up the operand rank structure for expression E. */
393 :
394 : static inline int64_t
395 13556718 : find_operand_rank (tree e)
396 : {
397 13556718 : int64_t *slot = operand_rank->get (e);
398 13556718 : return slot ? *slot : -1;
399 : }
400 :
401 : /* Insert {E,RANK} into the operand rank hashtable. */
402 :
403 : static inline void
404 14566743 : insert_operand_rank (tree e, int64_t rank)
405 : {
406 14566743 : gcc_assert (rank > 0);
407 14566743 : bool existed = operand_rank->put (e, rank);
408 14566743 : gcc_assert (!existed);
409 14566743 : }
410 :
411 : /* Given an expression E, return the rank of the expression. */
412 :
413 : static int64_t
414 16929842 : get_rank (tree e)
415 : {
416 : /* SSA_NAME's have the rank of the expression they are the result
417 : of.
418 : For globals and uninitialized values, the rank is 0.
419 : For function arguments, use the pre-setup rank.
420 : For PHI nodes, stores, asm statements, etc, we use the rank of
421 : the BB.
422 : For simple operations, the rank is the maximum rank of any of
423 : its operands, or the bb_rank, whichever is less.
424 : I make no claims that this is optimal, however, it gives good
425 : results. */
426 :
427 : /* We make an exception to the normal ranking system to break
428 : dependences of accumulator variables in loops. Suppose we
429 : have a simple one-block loop containing:
430 :
431 : x_1 = phi(x_0, x_2)
432 : b = a + x_1
433 : c = b + d
434 : x_2 = c + e
435 :
436 : As shown, each iteration of the calculation into x is fully
437 : dependent upon the iteration before it. We would prefer to
438 : see this in the form:
439 :
440 : x_1 = phi(x_0, x_2)
441 : b = a + d
442 : c = b + e
443 : x_2 = c + x_1
444 :
445 : If the loop is unrolled, the calculations of b and c from
446 : different iterations can be interleaved.
447 :
448 : To obtain this result during reassociation, we bias the rank
449 : of the phi definition x_1 upward, when it is recognized as an
450 : accumulator pattern. The artificial rank causes it to be
451 : added last, providing the desired independence. */
452 :
453 16929842 : if (TREE_CODE (e) == SSA_NAME)
454 : {
455 13556718 : ssa_op_iter iter;
456 13556718 : gimple *stmt;
457 13556718 : int64_t rank;
458 13556718 : tree op;
459 :
460 : /* If we already have a rank for this expression, use that. */
461 13556718 : rank = find_operand_rank (e);
462 13556718 : if (rank != -1)
463 : return rank;
464 :
465 8423862 : stmt = SSA_NAME_DEF_STMT (e);
466 8423862 : if (gimple_code (stmt) == GIMPLE_PHI)
467 : {
468 1426094 : rank = phi_rank (stmt);
469 1426094 : if (rank != bb_rank[gimple_bb (stmt)->index])
470 68941 : bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
471 : }
472 :
473 6997768 : else if (!is_gimple_assign (stmt))
474 264083 : rank = bb_rank[gimple_bb (stmt)->index];
475 :
476 : else
477 : {
478 6733685 : bool biased_p = false;
479 6733685 : bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
480 :
481 : /* Otherwise, find the maximum rank for the operands. As an
482 : exception, remove the bias from loop-carried phis when propagating
483 : the rank so that dependent operations are not also biased. */
484 : /* Simply walk over all SSA uses - this takes advatage of the
485 : fact that non-SSA operands are is_gimple_min_invariant and
486 : thus have rank 0. */
487 6733685 : rank = 0;
488 14073599 : FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
489 7339914 : rank = propagate_rank (rank, op, maybe_biased_p);
490 :
491 6733685 : rank += 1;
492 6733685 : if (biased_p)
493 30874 : bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
494 : }
495 :
496 8423862 : if (dump_file && (dump_flags & TDF_DETAILS))
497 : {
498 205 : fprintf (dump_file, "Rank for ");
499 205 : print_generic_expr (dump_file, e);
500 205 : fprintf (dump_file, " is %" PRId64 "\n", rank);
501 : }
502 :
503 : /* Note the rank in the hashtable so we don't recompute it. */
504 8423862 : insert_operand_rank (e, rank);
505 8423862 : return rank;
506 : }
507 :
508 : /* Constants, globals, etc., are rank 0 */
509 : return 0;
510 : }
511 :
512 :
513 : /* We want integer ones to end up last no matter what, since they are
514 : the ones we can do the most with. */
515 : #define INTEGER_CONST_TYPE 1 << 4
516 : #define FLOAT_ONE_CONST_TYPE 1 << 3
517 : #define FLOAT_CONST_TYPE 1 << 2
518 : #define OTHER_CONST_TYPE 1 << 1
519 :
520 : /* Classify an invariant tree into integer, float, or other, so that
521 : we can sort them to be near other constants of the same type. */
522 : static inline int
523 322638 : constant_type (tree t)
524 : {
525 322638 : if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
526 : return INTEGER_CONST_TYPE;
527 8998 : else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
528 : {
529 : /* Sort -1.0 and 1.0 constants last, while in some cases
530 : const_binop can't optimize some inexact operations, multiplication
531 : by -1.0 or 1.0 can be always merged with others. */
532 6588 : if (real_onep (t) || real_minus_onep (t))
533 816 : return FLOAT_ONE_CONST_TYPE;
534 : return FLOAT_CONST_TYPE;
535 : }
536 : else
537 : return OTHER_CONST_TYPE;
538 : }
539 :
540 : /* qsort comparison function to sort operand entries PA and PB by rank
541 : so that the sorted array is ordered by rank in decreasing order. */
542 : static int
543 23425414 : sort_by_operand_rank (const void *pa, const void *pb)
544 : {
545 23425414 : const operand_entry *oea = *(const operand_entry *const *)pa;
546 23425414 : const operand_entry *oeb = *(const operand_entry *const *)pb;
547 :
548 23425414 : if (oeb->rank != oea->rank)
549 34863055 : return oeb->rank > oea->rank ? 1 : -1;
550 :
551 : /* It's nicer for optimize_expression if constants that are likely
552 : to fold when added/multiplied/whatever are put next to each
553 : other. Since all constants have rank 0, order them by type. */
554 2741168 : if (oea->rank == 0)
555 : {
556 161261 : if (constant_type (oeb->op) != constant_type (oea->op))
557 58 : return constant_type (oea->op) - constant_type (oeb->op);
558 : else
559 : /* To make sorting result stable, we use unique IDs to determine
560 : order. */
561 259045 : return oeb->id > oea->id ? 1 : -1;
562 : }
563 :
564 2579907 : if (TREE_CODE (oea->op) != SSA_NAME)
565 : {
566 0 : if (TREE_CODE (oeb->op) != SSA_NAME)
567 0 : return oeb->id > oea->id ? 1 : -1;
568 : else
569 : return 1;
570 : }
571 2579907 : else if (TREE_CODE (oeb->op) != SSA_NAME)
572 : return -1;
573 :
574 : /* Lastly, make sure the versions that are the same go next to each
575 : other. */
576 2579907 : if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
577 : {
578 : /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
579 : versions of removed SSA_NAMEs, so if possible, prefer to sort
580 : based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
581 : See PR60418. */
582 2522826 : gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
583 2522826 : gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
584 2522826 : basic_block bba = gimple_bb (stmta);
585 2522826 : basic_block bbb = gimple_bb (stmtb);
586 2522826 : if (bbb != bba)
587 : {
588 : /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
589 : but the other might not. */
590 133683 : if (!bba)
591 : return 1;
592 129317 : if (!bbb)
593 : return -1;
594 : /* If neither is, compare bb_rank. */
595 123318 : if (bb_rank[bbb->index] != bb_rank[bba->index])
596 123318 : return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
597 : }
598 :
599 2389143 : bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
600 2389143 : bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
601 2389143 : if (da != db)
602 3661375 : return da ? 1 : -1;
603 :
604 55541 : return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
605 : }
606 :
607 57081 : return oeb->id > oea->id ? 1 : -1;
608 : }
609 :
610 : /* Add an operand entry to *OPS for the tree operand OP. */
611 :
612 : static void
613 9589483 : add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
614 : {
615 9589483 : operand_entry *oe = operand_entry_pool.allocate ();
616 :
617 9589483 : oe->op = op;
618 9589483 : oe->rank = get_rank (op);
619 9589483 : oe->id = next_operand_entry_id++;
620 9589483 : oe->count = 1;
621 9589483 : oe->stmt_to_insert = stmt_to_insert;
622 9589483 : ops->safe_push (oe);
623 9589483 : }
624 :
625 : /* Add an operand entry to *OPS for the tree operand OP with repeat
626 : count REPEAT. */
627 :
628 : static void
629 18 : add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
630 : HOST_WIDE_INT repeat)
631 : {
632 18 : operand_entry *oe = operand_entry_pool.allocate ();
633 :
634 18 : oe->op = op;
635 18 : oe->rank = get_rank (op);
636 18 : oe->id = next_operand_entry_id++;
637 18 : oe->count = repeat;
638 18 : oe->stmt_to_insert = NULL;
639 18 : ops->safe_push (oe);
640 :
641 18 : reassociate_stats.pows_encountered++;
642 18 : }
643 :
644 : /* Returns true if we can associate the SSA def OP. */
645 :
646 : static bool
647 31603444 : can_reassociate_op_p (tree op)
648 : {
649 31603444 : if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
650 : return false;
651 : /* Uninitialized variables can't participate in reassociation. */
652 31602630 : if (TREE_CODE (op) == SSA_NAME && ssa_name_maybe_undef_p (op))
653 : return false;
654 : /* Make sure asm goto outputs do not participate in reassociation since
655 : we have no way to find an insertion place after asm goto. */
656 31597919 : if (TREE_CODE (op) == SSA_NAME
657 23327267 : && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
658 31619772 : && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
659 70 : return false;
660 : return true;
661 : }
662 :
663 : /* Returns true if we can reassociate operations of TYPE.
664 : That is for integral or non-saturating fixed-point types, and for
665 : floating point type when associative-math is enabled. */
666 :
667 : static bool
668 57100288 : can_reassociate_type_p (tree type)
669 : {
670 57100288 : if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
671 35122195 : || NON_SAT_FIXED_POINT_TYPE_P (type)
672 92222483 : || (flag_associative_math && FLOAT_TYPE_P (type)))
673 22361250 : return true;
674 : return false;
675 : }
676 :
677 : /* Return true if STMT is reassociable operation containing a binary
678 : operation with tree code CODE, and is inside LOOP. */
679 :
680 : static bool
681 7711939 : is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
682 : {
683 7711939 : basic_block bb = gimple_bb (stmt);
684 :
685 7711939 : if (gimple_bb (stmt) == NULL)
686 : return false;
687 :
688 7521799 : if (!flow_bb_inside_loop_p (loop, bb))
689 : return false;
690 :
691 7315962 : if (is_gimple_assign (stmt)
692 5817444 : && gimple_assign_rhs_code (stmt) == code
693 8160882 : && has_single_use (gimple_assign_lhs (stmt)))
694 : {
695 622871 : tree rhs1 = gimple_assign_rhs1 (stmt);
696 622871 : tree rhs2 = gimple_assign_rhs2 (stmt);
697 622871 : if (!can_reassociate_op_p (rhs1)
698 622871 : || (rhs2 && !can_reassociate_op_p (rhs2)))
699 : return false;
700 : return true;
701 : }
702 :
703 : return false;
704 : }
705 :
706 :
707 : /* Return true if STMT is a nop-conversion. */
708 :
709 : static bool
710 7684678 : gimple_nop_conversion_p (gimple *stmt)
711 : {
712 7684678 : if (gassign *ass = dyn_cast <gassign *> (stmt))
713 : {
714 9088753 : if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
715 6759464 : && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
716 1476725 : TREE_TYPE (gimple_assign_rhs1 (ass))))
717 : return true;
718 : }
719 : return false;
720 : }
721 :
722 : /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
723 : operand of the negate operation. Otherwise, return NULL. */
724 :
725 : static tree
726 7594096 : get_unary_op (tree name, enum tree_code opcode)
727 : {
728 7594096 : gimple *stmt = SSA_NAME_DEF_STMT (name);
729 :
730 : /* Look through nop conversions (sign changes). */
731 7594096 : if (gimple_nop_conversion_p (stmt)
732 7594096 : && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
733 884361 : stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
734 :
735 7594096 : if (!is_gimple_assign (stmt))
736 : return NULL_TREE;
737 :
738 4825817 : if (gimple_assign_rhs_code (stmt) == opcode)
739 130194 : return gimple_assign_rhs1 (stmt);
740 : return NULL_TREE;
741 : }
742 :
743 : /* Return true if OP1 and OP2 have the same value if casted to either type. */
744 :
745 : static bool
746 46418 : ops_equal_values_p (tree op1, tree op2)
747 : {
748 46418 : if (op1 == op2)
749 : return true;
750 :
751 46225 : tree orig_op1 = op1;
752 46225 : if (TREE_CODE (op1) == SSA_NAME)
753 : {
754 46225 : gimple *stmt = SSA_NAME_DEF_STMT (op1);
755 46225 : if (gimple_nop_conversion_p (stmt))
756 : {
757 17991 : op1 = gimple_assign_rhs1 (stmt);
758 17991 : if (op1 == op2)
759 : return true;
760 : }
761 : }
762 :
763 44357 : if (TREE_CODE (op2) == SSA_NAME)
764 : {
765 44357 : gimple *stmt = SSA_NAME_DEF_STMT (op2);
766 44357 : if (gimple_nop_conversion_p (stmt))
767 : {
768 16490 : op2 = gimple_assign_rhs1 (stmt);
769 16490 : if (op1 == op2
770 16490 : || orig_op1 == op2)
771 : return true;
772 : }
773 : }
774 :
775 : return false;
776 : }
777 :
778 :
779 : /* If CURR and LAST are a pair of ops that OPCODE allows us to
780 : eliminate through equivalences, do so, remove them from OPS, and
781 : return true. Otherwise, return false. */
782 :
783 : static bool
784 9477668 : eliminate_duplicate_pair (enum tree_code opcode,
785 : vec<operand_entry *> *ops,
786 : bool *all_done,
787 : unsigned int i,
788 : operand_entry *curr,
789 : operand_entry *last)
790 : {
791 :
792 : /* If we have two of the same op, and the opcode is & |, min, or max,
793 : we can eliminate one of them.
794 : If we have two of the same op, and the opcode is ^, we can
795 : eliminate both of them. */
796 :
797 9477668 : if (last && last->op == curr->op)
798 : {
799 5483 : switch (opcode)
800 : {
801 30 : case MAX_EXPR:
802 30 : case MIN_EXPR:
803 30 : case BIT_IOR_EXPR:
804 30 : case BIT_AND_EXPR:
805 30 : if (dump_file && (dump_flags & TDF_DETAILS))
806 : {
807 1 : fprintf (dump_file, "Equivalence: ");
808 1 : print_generic_expr (dump_file, curr->op);
809 1 : fprintf (dump_file, " [&|minmax] ");
810 1 : print_generic_expr (dump_file, last->op);
811 1 : fprintf (dump_file, " -> ");
812 1 : print_generic_stmt (dump_file, last->op);
813 : }
814 :
815 30 : ops->ordered_remove (i);
816 30 : reassociate_stats.ops_eliminated ++;
817 :
818 30 : return true;
819 :
820 121 : case BIT_XOR_EXPR:
821 121 : if (dump_file && (dump_flags & TDF_DETAILS))
822 : {
823 0 : fprintf (dump_file, "Equivalence: ");
824 0 : print_generic_expr (dump_file, curr->op);
825 0 : fprintf (dump_file, " ^ ");
826 0 : print_generic_expr (dump_file, last->op);
827 0 : fprintf (dump_file, " -> nothing\n");
828 : }
829 :
830 121 : reassociate_stats.ops_eliminated += 2;
831 :
832 121 : if (ops->length () == 2)
833 : {
834 1 : ops->truncate (0);
835 1 : add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
836 1 : *all_done = true;
837 : }
838 : else
839 : {
840 120 : ops->ordered_remove (i-1);
841 120 : ops->ordered_remove (i-1);
842 : }
843 :
844 121 : return true;
845 :
846 : default:
847 : break;
848 : }
849 : }
850 : return false;
851 : }
852 :
853 : static vec<tree> plus_negates;
854 :
855 : /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
856 : expression, look in OPS for a corresponding positive operation to cancel
857 : it out. If we find one, remove the other from OPS, replace
858 : OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
859 : return false. */
860 :
861 : static bool
862 9477517 : eliminate_plus_minus_pair (enum tree_code opcode,
863 : vec<operand_entry *> *ops,
864 : unsigned int currindex,
865 : operand_entry *curr)
866 : {
867 9477517 : tree negateop;
868 9477517 : tree notop;
869 9477517 : unsigned int i;
870 9477517 : operand_entry *oe;
871 :
872 9477517 : if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
873 : return false;
874 :
875 3048640 : negateop = get_unary_op (curr->op, NEGATE_EXPR);
876 3048640 : notop = get_unary_op (curr->op, BIT_NOT_EXPR);
877 3048640 : if (negateop == NULL_TREE && notop == NULL_TREE)
878 : return false;
879 :
880 : /* Any non-negated version will have a rank that is one less than
881 : the current rank. So once we hit those ranks, if we don't find
882 : one, we can stop. */
883 :
884 138428 : for (i = currindex + 1;
885 204285 : ops->iterate (i, &oe)
886 250703 : && oe->rank >= curr->rank - 1 ;
887 : i++)
888 : {
889 46418 : if (negateop
890 46418 : && ops_equal_values_p (oe->op, negateop))
891 : {
892 1510 : if (dump_file && (dump_flags & TDF_DETAILS))
893 : {
894 0 : fprintf (dump_file, "Equivalence: ");
895 0 : print_generic_expr (dump_file, negateop);
896 0 : fprintf (dump_file, " + -");
897 0 : print_generic_expr (dump_file, oe->op);
898 0 : fprintf (dump_file, " -> 0\n");
899 : }
900 :
901 1510 : ops->ordered_remove (i);
902 1510 : add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
903 1510 : ops->ordered_remove (currindex);
904 1510 : reassociate_stats.ops_eliminated ++;
905 :
906 1510 : return true;
907 : }
908 44908 : else if (notop
909 44908 : && ops_equal_values_p (oe->op, notop))
910 : {
911 1864 : tree op_type = TREE_TYPE (oe->op);
912 :
913 1864 : if (dump_file && (dump_flags & TDF_DETAILS))
914 : {
915 0 : fprintf (dump_file, "Equivalence: ");
916 0 : print_generic_expr (dump_file, notop);
917 0 : fprintf (dump_file, " + ~");
918 0 : print_generic_expr (dump_file, oe->op);
919 0 : fprintf (dump_file, " -> -1\n");
920 : }
921 :
922 1864 : ops->ordered_remove (i);
923 1864 : add_to_ops_vec (ops, build_all_ones_cst (op_type));
924 1864 : ops->ordered_remove (currindex);
925 1864 : reassociate_stats.ops_eliminated ++;
926 :
927 1864 : return true;
928 : }
929 : }
930 :
931 : /* If CURR->OP is a negate expr without nop conversion in a plus expr:
932 : save it for later inspection in repropagate_negates(). */
933 92010 : if (negateop != NULL_TREE
934 92010 : && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
935 91489 : plus_negates.safe_push (curr->op);
936 :
937 : return false;
938 : }
939 :
940 : /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
941 : bitwise not expression, look in OPS for a corresponding operand to
942 : cancel it out. If we find one, remove the other from OPS, replace
943 : OPS[CURRINDEX] with 0, and return true. Otherwise, return
944 : false. */
945 :
946 : static bool
947 9477669 : eliminate_not_pairs (enum tree_code opcode,
948 : vec<operand_entry *> *ops,
949 : unsigned int currindex,
950 : operand_entry *curr)
951 : {
952 9477669 : tree notop;
953 9477669 : unsigned int i;
954 9477669 : operand_entry *oe;
955 :
956 9477669 : if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
957 1991319 : || TREE_CODE (curr->op) != SSA_NAME)
958 : return false;
959 :
960 1496816 : notop = get_unary_op (curr->op, BIT_NOT_EXPR);
961 1496816 : if (notop == NULL_TREE)
962 : return false;
963 :
964 : /* Any non-not version will have a rank that is one less than
965 : the current rank. So once we hit those ranks, if we don't find
966 : one, we can stop. */
967 :
968 41614 : for (i = currindex + 1;
969 9504987 : ops->iterate (i, &oe)
970 68932 : && oe->rank >= curr->rank - 1;
971 : i++)
972 : {
973 6805 : if (oe->op == notop)
974 : {
975 1 : if (dump_file && (dump_flags & TDF_DETAILS))
976 : {
977 0 : fprintf (dump_file, "Equivalence: ");
978 0 : print_generic_expr (dump_file, notop);
979 0 : if (opcode == BIT_AND_EXPR)
980 0 : fprintf (dump_file, " & ~");
981 0 : else if (opcode == BIT_IOR_EXPR)
982 0 : fprintf (dump_file, " | ~");
983 0 : print_generic_expr (dump_file, oe->op);
984 0 : if (opcode == BIT_AND_EXPR)
985 0 : fprintf (dump_file, " -> 0\n");
986 0 : else if (opcode == BIT_IOR_EXPR)
987 0 : fprintf (dump_file, " -> -1\n");
988 : }
989 :
990 1 : if (opcode == BIT_AND_EXPR)
991 1 : oe->op = build_zero_cst (TREE_TYPE (oe->op));
992 0 : else if (opcode == BIT_IOR_EXPR)
993 0 : oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
994 :
995 1 : reassociate_stats.ops_eliminated += ops->length () - 1;
996 1 : ops->truncate (0);
997 1 : ops->quick_push (oe);
998 1 : return true;
999 : }
1000 : }
1001 :
1002 : return false;
1003 : }
1004 :
1005 : /* Use constant value that may be present in OPS to try to eliminate
1006 : operands. Note that this function is only really used when we've
1007 : eliminated ops for other reasons, or merged constants. Across
1008 : single statements, fold already does all of this, plus more. There
1009 : is little point in duplicating logic, so I've only included the
1010 : identities that I could ever construct testcases to trigger. */
1011 :
1012 : static void
1013 4604990 : eliminate_using_constants (enum tree_code opcode,
1014 : vec<operand_entry *> *ops)
1015 : {
1016 4604990 : operand_entry *oelast = ops->last ();
1017 4604990 : tree type = TREE_TYPE (oelast->op);
1018 :
1019 4604990 : if (oelast->rank == 0
1020 4604990 : && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1021 : {
1022 3330408 : switch (opcode)
1023 : {
1024 426576 : case BIT_AND_EXPR:
1025 426576 : if (integer_zerop (oelast->op))
1026 : {
1027 0 : if (ops->length () != 1)
1028 : {
1029 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1030 0 : fprintf (dump_file, "Found & 0, removing all other ops\n");
1031 :
1032 0 : reassociate_stats.ops_eliminated += ops->length () - 1;
1033 :
1034 0 : ops->truncate (0);
1035 0 : ops->quick_push (oelast);
1036 1673 : return;
1037 : }
1038 : }
1039 426576 : else if (integer_all_onesp (oelast->op))
1040 : {
1041 4 : if (ops->length () != 1)
1042 : {
1043 4 : if (dump_file && (dump_flags & TDF_DETAILS))
1044 0 : fprintf (dump_file, "Found & -1, removing\n");
1045 4 : ops->pop ();
1046 4 : reassociate_stats.ops_eliminated++;
1047 : }
1048 : }
1049 : break;
1050 67420 : case BIT_IOR_EXPR:
1051 67420 : if (integer_all_onesp (oelast->op))
1052 : {
1053 0 : if (ops->length () != 1)
1054 : {
1055 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1056 0 : fprintf (dump_file, "Found | -1, removing all other ops\n");
1057 :
1058 0 : reassociate_stats.ops_eliminated += ops->length () - 1;
1059 :
1060 0 : ops->truncate (0);
1061 0 : ops->quick_push (oelast);
1062 0 : return;
1063 : }
1064 : }
1065 67420 : else if (integer_zerop (oelast->op))
1066 : {
1067 6 : if (ops->length () != 1)
1068 : {
1069 6 : if (dump_file && (dump_flags & TDF_DETAILS))
1070 0 : fprintf (dump_file, "Found | 0, removing\n");
1071 6 : ops->pop ();
1072 6 : reassociate_stats.ops_eliminated++;
1073 : }
1074 : }
1075 : break;
1076 927508 : case MULT_EXPR:
1077 927508 : if (integer_zerop (oelast->op)
1078 927508 : || (FLOAT_TYPE_P (type)
1079 1394 : && !HONOR_NANS (type)
1080 1304 : && !HONOR_SIGNED_ZEROS (type)
1081 1304 : && real_zerop (oelast->op)))
1082 : {
1083 0 : if (ops->length () != 1)
1084 : {
1085 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1086 0 : fprintf (dump_file, "Found * 0, removing all other ops\n");
1087 :
1088 0 : reassociate_stats.ops_eliminated += ops->length () - 1;
1089 0 : ops->truncate (0);
1090 0 : ops->quick_push (oelast);
1091 0 : return;
1092 : }
1093 : }
1094 927508 : else if (integer_onep (oelast->op)
1095 927508 : || (FLOAT_TYPE_P (type)
1096 1394 : && !HONOR_SNANS (type)
1097 1394 : && real_onep (oelast->op)))
1098 : {
1099 4 : if (ops->length () != 1)
1100 : {
1101 4 : if (dump_file && (dump_flags & TDF_DETAILS))
1102 0 : fprintf (dump_file, "Found * 1, removing\n");
1103 4 : ops->pop ();
1104 4 : reassociate_stats.ops_eliminated++;
1105 4 : return;
1106 : }
1107 : }
1108 : break;
1109 1778970 : case BIT_XOR_EXPR:
1110 1778970 : case PLUS_EXPR:
1111 1778970 : case MINUS_EXPR:
1112 1778970 : if (integer_zerop (oelast->op)
1113 1778970 : || (FLOAT_TYPE_P (type)
1114 738 : && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1115 738 : && fold_real_zero_addition_p (type, 0, oelast->op,
1116 : opcode == MINUS_EXPR)))
1117 : {
1118 1669 : if (ops->length () != 1)
1119 : {
1120 1669 : if (dump_file && (dump_flags & TDF_DETAILS))
1121 0 : fprintf (dump_file, "Found [|^+] 0, removing\n");
1122 1669 : ops->pop ();
1123 1669 : reassociate_stats.ops_eliminated++;
1124 1669 : return;
1125 : }
1126 : }
1127 : break;
1128 : default:
1129 : break;
1130 : }
1131 : }
1132 : }
1133 :
1134 :
1135 : static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1136 : bool, bool);
1137 :
1138 : /* Structure for tracking and counting operands. */
1139 : struct oecount {
1140 : unsigned int cnt;
1141 : unsigned int id;
1142 : enum tree_code oecode;
1143 : tree op;
1144 : };
1145 :
1146 :
1147 : /* The heap for the oecount hashtable and the sorted list of operands. */
1148 : static vec<oecount> cvec;
1149 :
1150 :
1151 : /* Oecount hashtable helpers. */
1152 :
1153 : struct oecount_hasher : int_hash <int, 0, 1>
1154 : {
1155 : static inline hashval_t hash (int);
1156 : static inline bool equal (int, int);
1157 : };
1158 :
1159 : /* Hash function for oecount. */
1160 :
1161 : inline hashval_t
1162 161299 : oecount_hasher::hash (int p)
1163 : {
1164 161299 : const oecount *c = &cvec[p - 42];
1165 161299 : return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1166 : }
1167 :
1168 : /* Comparison function for oecount. */
1169 :
1170 : inline bool
1171 86522 : oecount_hasher::equal (int p1, int p2)
1172 : {
1173 86522 : const oecount *c1 = &cvec[p1 - 42];
1174 86522 : const oecount *c2 = &cvec[p2 - 42];
1175 86522 : return c1->oecode == c2->oecode && c1->op == c2->op;
1176 : }
1177 :
1178 : /* Comparison function for qsort sorting oecount elements by count. */
1179 :
1180 : static int
1181 621793 : oecount_cmp (const void *p1, const void *p2)
1182 : {
1183 621793 : const oecount *c1 = (const oecount *)p1;
1184 621793 : const oecount *c2 = (const oecount *)p2;
1185 621793 : if (c1->cnt != c2->cnt)
1186 13451 : return c1->cnt > c2->cnt ? 1 : -1;
1187 : else
1188 : /* If counts are identical, use unique IDs to stabilize qsort. */
1189 897743 : return c1->id > c2->id ? 1 : -1;
1190 : }
1191 :
1192 : /* Return TRUE iff STMT represents a builtin call that raises OP
1193 : to some exponent. */
1194 :
1195 : static bool
1196 1135 : stmt_is_power_of_op (gimple *stmt, tree op)
1197 : {
1198 1135 : if (!is_gimple_call (stmt))
1199 : return false;
1200 :
1201 11 : switch (gimple_call_combined_fn (stmt))
1202 : {
1203 6 : CASE_CFN_POW:
1204 6 : CASE_CFN_POWI:
1205 6 : return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1206 :
1207 : default:
1208 : return false;
1209 : }
1210 : }
1211 :
1212 : /* Given STMT which is a __builtin_pow* call, decrement its exponent
1213 : in place and return the result. Assumes that stmt_is_power_of_op
1214 : was previously called for STMT and returned TRUE. */
1215 :
1216 : static HOST_WIDE_INT
1217 6 : decrement_power (gimple *stmt)
1218 : {
1219 6 : REAL_VALUE_TYPE c, cint;
1220 6 : HOST_WIDE_INT power;
1221 6 : tree arg1;
1222 :
1223 6 : switch (gimple_call_combined_fn (stmt))
1224 : {
1225 0 : CASE_CFN_POW:
1226 0 : arg1 = gimple_call_arg (stmt, 1);
1227 0 : c = TREE_REAL_CST (arg1);
1228 0 : power = real_to_integer (&c) - 1;
1229 0 : real_from_integer (&cint, VOIDmode, power, SIGNED);
1230 0 : gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1231 0 : return power;
1232 :
1233 6 : CASE_CFN_POWI:
1234 6 : arg1 = gimple_call_arg (stmt, 1);
1235 6 : power = TREE_INT_CST_LOW (arg1) - 1;
1236 6 : gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1237 6 : return power;
1238 :
1239 0 : default:
1240 0 : gcc_unreachable ();
1241 : }
1242 : }
1243 :
1244 : /* Replace SSA defined by STMT and replace all its uses with new
1245 : SSA. Also return the new SSA. */
1246 :
1247 : static tree
1248 325 : make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1249 : {
1250 325 : gimple *use_stmt;
1251 325 : use_operand_p use;
1252 325 : imm_use_iterator iter;
1253 325 : tree new_lhs, new_debug_lhs = NULL_TREE;
1254 325 : tree lhs = gimple_get_lhs (stmt);
1255 :
1256 325 : new_lhs = make_ssa_name (TREE_TYPE (lhs));
1257 325 : gimple_set_lhs (stmt, new_lhs);
1258 :
1259 : /* Also need to update GIMPLE_DEBUGs. */
1260 1032 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1261 : {
1262 382 : tree repl = new_lhs;
1263 382 : if (is_gimple_debug (use_stmt))
1264 : {
1265 57 : if (new_debug_lhs == NULL_TREE)
1266 : {
1267 21 : new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1268 21 : gdebug *def_temp
1269 21 : = gimple_build_debug_bind (new_debug_lhs,
1270 21 : build2 (opcode, TREE_TYPE (lhs),
1271 : new_lhs, op),
1272 : stmt);
1273 21 : gimple_set_uid (def_temp, gimple_uid (stmt));
1274 21 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1275 21 : gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1276 : }
1277 : repl = new_debug_lhs;
1278 : }
1279 1146 : FOR_EACH_IMM_USE_ON_STMT (use, iter)
1280 382 : SET_USE (use, repl);
1281 382 : update_stmt (use_stmt);
1282 325 : }
1283 325 : return new_lhs;
1284 : }
1285 :
1286 : /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1287 : uses with new SSAs. Also do this for the stmt that defines DEF
1288 : if *DEF is not OP. */
1289 :
1290 : static void
1291 258 : make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1292 : vec<gimple *> &stmts_to_fix)
1293 : {
1294 258 : unsigned i;
1295 258 : gimple *stmt;
1296 :
1297 258 : if (*def != op
1298 258 : && TREE_CODE (*def) == SSA_NAME
1299 258 : && (stmt = SSA_NAME_DEF_STMT (*def))
1300 516 : && gimple_code (stmt) != GIMPLE_NOP)
1301 258 : *def = make_new_ssa_for_def (stmt, opcode, op);
1302 :
1303 325 : FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1304 67 : make_new_ssa_for_def (stmt, opcode, op);
1305 258 : }
1306 :
1307 : /* Find the single immediate use of STMT's LHS, and replace it
1308 : with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1309 : replace *DEF with OP as well. */
1310 :
1311 : static void
1312 769 : propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1313 : {
1314 769 : tree lhs;
1315 769 : gimple *use_stmt;
1316 769 : use_operand_p use;
1317 769 : gimple_stmt_iterator gsi;
1318 :
1319 769 : if (is_gimple_call (stmt))
1320 1 : lhs = gimple_call_lhs (stmt);
1321 : else
1322 768 : lhs = gimple_assign_lhs (stmt);
1323 :
1324 769 : gcc_assert (has_single_use (lhs));
1325 769 : single_imm_use (lhs, &use, &use_stmt);
1326 769 : if (lhs == *def)
1327 520 : *def = op;
1328 769 : SET_USE (use, op);
1329 769 : if (TREE_CODE (op) != SSA_NAME)
1330 39 : update_stmt (use_stmt);
1331 769 : gsi = gsi_for_stmt (stmt);
1332 769 : unlink_stmt_vdef (stmt);
1333 769 : reassoc_remove_stmt (&gsi);
1334 769 : release_defs (stmt);
1335 769 : }
1336 :
1337 : /* Walks the linear chain with result *DEF searching for an operation
1338 : with operand OP and code OPCODE removing that from the chain. *DEF
1339 : is updated if there is only one operand but no operation left. */
1340 :
1341 : static void
1342 778 : zero_one_operation (tree *def, enum tree_code opcode, tree op)
1343 : {
1344 778 : tree orig_def = *def;
1345 778 : gimple *stmt = SSA_NAME_DEF_STMT (*def);
1346 : /* PR72835 - Record the stmt chain that has to be updated such that
1347 : we dont use the same LHS when the values computed are different. */
1348 778 : auto_vec<gimple *, 64> stmts_to_fix;
1349 :
1350 1410 : do
1351 : {
1352 1094 : tree name;
1353 :
1354 1094 : if (opcode == MULT_EXPR)
1355 : {
1356 1092 : if (stmt_is_power_of_op (stmt, op))
1357 : {
1358 6 : if (decrement_power (stmt) == 1)
1359 : {
1360 1 : if (stmts_to_fix.length () > 0)
1361 1 : stmts_to_fix.pop ();
1362 1 : propagate_op_to_single_use (op, stmt, def);
1363 : }
1364 : break;
1365 : }
1366 1086 : else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1367 : {
1368 15 : if (gimple_assign_rhs1 (stmt) == op)
1369 : {
1370 11 : tree cst = build_minus_one_cst (TREE_TYPE (op));
1371 11 : if (stmts_to_fix.length () > 0)
1372 11 : stmts_to_fix.pop ();
1373 11 : propagate_op_to_single_use (cst, stmt, def);
1374 11 : break;
1375 : }
1376 4 : else if (integer_minus_onep (op)
1377 4 : || real_minus_onep (op))
1378 : {
1379 4 : gimple_assign_set_rhs_code
1380 4 : (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1381 4 : break;
1382 : }
1383 : }
1384 : }
1385 :
1386 1073 : name = gimple_assign_rhs1 (stmt);
1387 :
1388 : /* If this is the operation we look for and one of the operands
1389 : is ours simply propagate the other operand into the stmts
1390 : single use. */
1391 1073 : if (gimple_assign_rhs_code (stmt) == opcode
1392 1073 : && (name == op
1393 874 : || gimple_assign_rhs2 (stmt) == op))
1394 : {
1395 757 : if (name == op)
1396 199 : name = gimple_assign_rhs2 (stmt);
1397 757 : if (stmts_to_fix.length () > 0)
1398 237 : stmts_to_fix.pop ();
1399 757 : propagate_op_to_single_use (name, stmt, def);
1400 757 : break;
1401 : }
1402 :
1403 : /* We might have a multiply of two __builtin_pow* calls, and
1404 : the operand might be hiding in the rightmost one. Likewise
1405 : this can happen for a negate. */
1406 316 : if (opcode == MULT_EXPR
1407 316 : && gimple_assign_rhs_code (stmt) == opcode
1408 316 : && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1409 567 : && has_single_use (gimple_assign_rhs2 (stmt)))
1410 : {
1411 43 : gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1412 43 : if (stmt_is_power_of_op (stmt2, op))
1413 : {
1414 0 : if (decrement_power (stmt2) == 1)
1415 0 : propagate_op_to_single_use (op, stmt2, def);
1416 : else
1417 0 : stmts_to_fix.safe_push (stmt2);
1418 0 : break;
1419 : }
1420 43 : else if (is_gimple_assign (stmt2)
1421 43 : && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1422 : {
1423 0 : if (gimple_assign_rhs1 (stmt2) == op)
1424 : {
1425 0 : tree cst = build_minus_one_cst (TREE_TYPE (op));
1426 0 : propagate_op_to_single_use (cst, stmt2, def);
1427 0 : break;
1428 : }
1429 0 : else if (integer_minus_onep (op)
1430 0 : || real_minus_onep (op))
1431 : {
1432 0 : stmts_to_fix.safe_push (stmt2);
1433 0 : gimple_assign_set_rhs_code
1434 0 : (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1435 0 : break;
1436 : }
1437 : }
1438 : }
1439 :
1440 : /* Continue walking the chain. */
1441 316 : gcc_assert (name != op
1442 : && TREE_CODE (name) == SSA_NAME);
1443 316 : stmt = SSA_NAME_DEF_STMT (name);
1444 316 : stmts_to_fix.safe_push (stmt);
1445 316 : }
1446 : while (1);
1447 :
1448 778 : if (stmts_to_fix.length () > 0 || *def == orig_def)
1449 258 : make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1450 778 : }
1451 :
1452 : /* Returns true if statement S1 dominates statement S2. Like
1453 : stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1454 :
1455 : static bool
1456 5462919 : reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1457 : {
1458 5462919 : basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1459 :
1460 : /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1461 : SSA_NAME. Assume it lives at the beginning of function and
1462 : thus dominates everything. */
1463 5462919 : if (!bb1 || s1 == s2)
1464 : return true;
1465 :
1466 : /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1467 5459758 : if (!bb2)
1468 : return false;
1469 :
1470 5451662 : if (bb1 == bb2)
1471 : {
1472 : /* PHIs in the same basic block are assumed to be
1473 : executed all in parallel, if only one stmt is a PHI,
1474 : it dominates the other stmt in the same basic block. */
1475 5321768 : if (gimple_code (s1) == GIMPLE_PHI)
1476 : return true;
1477 :
1478 5207297 : if (gimple_code (s2) == GIMPLE_PHI)
1479 : return false;
1480 :
1481 5158583 : gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1482 :
1483 5158583 : if (gimple_uid (s1) < gimple_uid (s2))
1484 : return true;
1485 :
1486 2794008 : if (gimple_uid (s1) > gimple_uid (s2))
1487 : return false;
1488 :
1489 36527 : gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1490 36527 : unsigned int uid = gimple_uid (s1);
1491 76631 : for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1492 : {
1493 74820 : gimple *s = gsi_stmt (gsi);
1494 74820 : if (gimple_uid (s) != uid)
1495 : break;
1496 43093 : if (s == s2)
1497 : return true;
1498 : }
1499 :
1500 : return false;
1501 : }
1502 :
1503 129894 : return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1504 : }
1505 :
1506 : /* Insert STMT after INSERT_POINT. */
1507 :
1508 : static void
1509 42947 : insert_stmt_after (gimple *stmt, gimple *insert_point)
1510 : {
1511 42947 : gimple_stmt_iterator gsi;
1512 42947 : basic_block bb;
1513 :
1514 42947 : if (gimple_code (insert_point) == GIMPLE_PHI)
1515 28 : bb = gimple_bb (insert_point);
1516 42919 : else if (!stmt_ends_bb_p (insert_point))
1517 : {
1518 42916 : gsi = gsi_for_stmt (insert_point);
1519 42916 : gimple_set_uid (stmt, gimple_uid (insert_point));
1520 42916 : gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1521 42916 : return;
1522 : }
1523 3 : else if (gimple_code (insert_point) == GIMPLE_ASM
1524 3 : && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0)
1525 : /* We have no idea where to insert - it depends on where the
1526 : uses will be placed. */
1527 0 : gcc_unreachable ();
1528 : else
1529 : /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1530 : thus if it must end a basic block, it should be a call that can
1531 : throw, or some assignment that can throw. If it throws, the LHS
1532 : of it will not be initialized though, so only valid places using
1533 : the SSA_NAME should be dominated by the fallthru edge. */
1534 3 : bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1535 31 : gsi = gsi_after_labels (bb);
1536 31 : if (gsi_end_p (gsi))
1537 : {
1538 0 : gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1539 0 : gimple_set_uid (stmt,
1540 0 : gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1541 : }
1542 : else
1543 31 : gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1544 31 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1545 : }
1546 :
1547 : /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1548 : the result. Places the statement after the definition of either
1549 : OP1 or OP2. Returns the new statement. */
1550 :
1551 : static gimple *
1552 8395 : build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1553 : {
1554 8395 : gimple *op1def = NULL, *op2def = NULL;
1555 8395 : gimple_stmt_iterator gsi;
1556 8395 : tree op;
1557 8395 : gassign *sum;
1558 :
1559 : /* Create the addition statement. */
1560 8395 : op = make_ssa_name (type);
1561 8395 : sum = gimple_build_assign (op, opcode, op1, op2);
1562 :
1563 : /* Find an insertion place and insert. */
1564 8395 : if (TREE_CODE (op1) == SSA_NAME)
1565 8394 : op1def = SSA_NAME_DEF_STMT (op1);
1566 8395 : if (TREE_CODE (op2) == SSA_NAME)
1567 8131 : op2def = SSA_NAME_DEF_STMT (op2);
1568 8394 : if ((!op1def || gimple_nop_p (op1def))
1569 8488 : && (!op2def || gimple_nop_p (op2def)))
1570 : {
1571 93 : gsi = gsi_start_nondebug_after_labels_bb
1572 93 : (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1573 93 : if (!gsi_end_p (gsi)
1574 93 : && is_gimple_call (gsi_stmt (gsi))
1575 105 : && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE))
1576 : {
1577 : /* Don't add statements before a returns_twice call at the start
1578 : of a function. */
1579 2 : split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1580 2 : gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1581 : }
1582 93 : if (gsi_end_p (gsi))
1583 : {
1584 2 : gimple_stmt_iterator gsi2
1585 2 : = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1586 2 : gimple_set_uid (sum,
1587 2 : gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1588 : }
1589 : else
1590 91 : gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1591 93 : gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1592 : }
1593 : else
1594 : {
1595 8302 : gimple *insert_point;
1596 8301 : if ((!op1def || gimple_nop_p (op1def))
1597 16603 : || (op2def && !gimple_nop_p (op2def)
1598 8018 : && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1599 : insert_point = op2def;
1600 : else
1601 : insert_point = op1def;
1602 8302 : insert_stmt_after (sum, insert_point);
1603 : }
1604 8395 : update_stmt (sum);
1605 :
1606 8395 : return sum;
1607 : }
1608 :
1609 : /* Perform un-distribution of divisions and multiplications.
1610 : A * X + B * X is transformed into (A + B) * X and A / X + B / X
1611 : to (A + B) / X for real X.
1612 :
1613 : The algorithm is organized as follows.
1614 :
1615 : - First we walk the addition chain *OPS looking for summands that
1616 : are defined by a multiplication or a real division. This results
1617 : in the candidates bitmap with relevant indices into *OPS.
1618 :
1619 : - Second we build the chains of multiplications or divisions for
1620 : these candidates, counting the number of occurrences of (operand, code)
1621 : pairs in all of the candidates chains.
1622 :
1623 : - Third we sort the (operand, code) pairs by number of occurrence and
1624 : process them starting with the pair with the most uses.
1625 :
1626 : * For each such pair we walk the candidates again to build a
1627 : second candidate bitmap noting all multiplication/division chains
1628 : that have at least one occurrence of (operand, code).
1629 :
1630 : * We build an alternate addition chain only covering these
1631 : candidates with one (operand, code) operation removed from their
1632 : multiplication/division chain.
1633 :
1634 : * The first candidate gets replaced by the alternate addition chain
1635 : multiplied/divided by the operand.
1636 :
1637 : * All candidate chains get disabled for further processing and
1638 : processing of (operand, code) pairs continues.
1639 :
1640 : The alternate addition chains built are re-processed by the main
1641 : reassociation algorithm which allows optimizing a * x * y + b * y * x
1642 : to (a + b ) * x * y in one invocation of the reassociation pass. */
1643 :
1644 : static bool
1645 4603047 : undistribute_ops_list (enum tree_code opcode,
1646 : vec<operand_entry *> *ops, class loop *loop)
1647 : {
1648 4603047 : unsigned int length = ops->length ();
1649 4603047 : operand_entry *oe1;
1650 4603047 : unsigned i, j;
1651 4603047 : unsigned nr_candidates, nr_candidates2;
1652 4603047 : sbitmap_iterator sbi0;
1653 4603047 : vec<operand_entry *> *subops;
1654 4603047 : bool changed = false;
1655 4603047 : unsigned int next_oecount_id = 0;
1656 :
1657 4603047 : if (length <= 1
1658 4603047 : || opcode != PLUS_EXPR)
1659 : return false;
1660 :
1661 : /* Build a list of candidates to process. */
1662 2309039 : auto_sbitmap candidates (length);
1663 2309039 : bitmap_clear (candidates);
1664 2309039 : nr_candidates = 0;
1665 7117084 : FOR_EACH_VEC_ELT (*ops, i, oe1)
1666 : {
1667 4808045 : enum tree_code dcode;
1668 4808045 : gimple *oe1def;
1669 :
1670 4808045 : if (TREE_CODE (oe1->op) != SSA_NAME)
1671 1766697 : continue;
1672 3041348 : oe1def = SSA_NAME_DEF_STMT (oe1->op);
1673 3041348 : if (!is_gimple_assign (oe1def))
1674 1092811 : continue;
1675 1948537 : dcode = gimple_assign_rhs_code (oe1def);
1676 3668672 : if ((dcode != MULT_EXPR
1677 1948537 : && dcode != RDIV_EXPR)
1678 1948537 : || !is_reassociable_op (oe1def, dcode, loop))
1679 1720135 : continue;
1680 :
1681 228402 : bitmap_set_bit (candidates, i);
1682 228402 : nr_candidates++;
1683 : }
1684 :
1685 2309039 : if (nr_candidates < 2)
1686 : return false;
1687 :
1688 17312 : if (dump_file && (dump_flags & TDF_DETAILS))
1689 : {
1690 1 : fprintf (dump_file, "searching for un-distribute opportunities ");
1691 2 : print_generic_expr (dump_file,
1692 1 : (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1693 1 : fprintf (dump_file, " %d\n", nr_candidates);
1694 : }
1695 :
1696 : /* Build linearized sub-operand lists and the counting table. */
1697 17312 : cvec.create (0);
1698 :
1699 17312 : hash_table<oecount_hasher> ctable (15);
1700 :
1701 : /* ??? Macro arguments cannot have multi-argument template types in
1702 : them. This typedef is needed to workaround that limitation. */
1703 17312 : typedef vec<operand_entry *> vec_operand_entry_t_heap;
1704 34624 : subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1705 76511 : EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1706 : {
1707 41887 : gimple *oedef;
1708 41887 : enum tree_code oecode;
1709 41887 : unsigned j;
1710 :
1711 41887 : oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1712 41887 : oecode = gimple_assign_rhs_code (oedef);
1713 83774 : linearize_expr_tree (&subops[i], oedef,
1714 41887 : associative_tree_code (oecode), false);
1715 :
1716 168324 : FOR_EACH_VEC_ELT (subops[i], j, oe1)
1717 : {
1718 84550 : oecount c;
1719 84550 : int *slot;
1720 84550 : int idx;
1721 84550 : c.oecode = oecode;
1722 84550 : c.cnt = 1;
1723 84550 : c.id = next_oecount_id++;
1724 84550 : c.op = oe1->op;
1725 84550 : cvec.safe_push (c);
1726 84550 : idx = cvec.length () + 41;
1727 84550 : slot = ctable.find_slot (idx, INSERT);
1728 84550 : if (!*slot)
1729 : {
1730 83583 : *slot = idx;
1731 : }
1732 : else
1733 : {
1734 967 : cvec.pop ();
1735 967 : cvec[*slot - 42].cnt++;
1736 : }
1737 : }
1738 : }
1739 :
1740 : /* Sort the counting table. */
1741 17312 : cvec.qsort (oecount_cmp);
1742 :
1743 17312 : if (dump_file && (dump_flags & TDF_DETAILS))
1744 : {
1745 1 : oecount *c;
1746 1 : fprintf (dump_file, "Candidates:\n");
1747 5 : FOR_EACH_VEC_ELT (cvec, j, c)
1748 : {
1749 3 : fprintf (dump_file, " %u %s: ", c->cnt,
1750 3 : c->oecode == MULT_EXPR
1751 : ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1752 3 : print_generic_expr (dump_file, c->op);
1753 3 : fprintf (dump_file, "\n");
1754 : }
1755 : }
1756 :
1757 : /* Process the (operand, code) pairs in order of most occurrence. */
1758 17312 : auto_sbitmap candidates2 (length);
1759 17971 : while (!cvec.is_empty ())
1760 : {
1761 17898 : oecount *c = &cvec.last ();
1762 17898 : if (c->cnt < 2)
1763 : break;
1764 :
1765 : /* Now collect the operands in the outer chain that contain
1766 : the common operand in their inner chain. */
1767 659 : bitmap_clear (candidates2);
1768 659 : nr_candidates2 = 0;
1769 4532 : EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1770 : {
1771 3214 : gimple *oedef;
1772 3214 : enum tree_code oecode;
1773 3214 : unsigned j;
1774 3214 : tree op = (*ops)[i]->op;
1775 :
1776 : /* If we undistributed in this chain already this may be
1777 : a constant. */
1778 3214 : if (TREE_CODE (op) != SSA_NAME)
1779 762 : continue;
1780 :
1781 2452 : oedef = SSA_NAME_DEF_STMT (op);
1782 2452 : oecode = gimple_assign_rhs_code (oedef);
1783 2452 : if (oecode != c->oecode)
1784 0 : continue;
1785 :
1786 9016 : FOR_EACH_VEC_ELT (subops[i], j, oe1)
1787 : {
1788 4462 : if (oe1->op == c->op)
1789 : {
1790 1112 : bitmap_set_bit (candidates2, i);
1791 1112 : ++nr_candidates2;
1792 1112 : break;
1793 : }
1794 : }
1795 : }
1796 :
1797 659 : if (nr_candidates2 >= 2)
1798 : {
1799 279 : operand_entry *oe1, *oe2;
1800 279 : gimple *prod;
1801 279 : int first = bitmap_first_set_bit (candidates2);
1802 :
1803 : /* Build the new addition chain. */
1804 279 : oe1 = (*ops)[first];
1805 279 : if (dump_file && (dump_flags & TDF_DETAILS))
1806 : {
1807 0 : fprintf (dump_file, "Building (");
1808 0 : print_generic_expr (dump_file, oe1->op);
1809 : }
1810 279 : zero_one_operation (&oe1->op, c->oecode, c->op);
1811 778 : EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1812 : {
1813 499 : gimple *sum;
1814 499 : oe2 = (*ops)[i];
1815 499 : if (dump_file && (dump_flags & TDF_DETAILS))
1816 : {
1817 0 : fprintf (dump_file, " + ");
1818 0 : print_generic_expr (dump_file, oe2->op);
1819 : }
1820 499 : zero_one_operation (&oe2->op, c->oecode, c->op);
1821 499 : sum = build_and_add_sum (TREE_TYPE (oe1->op),
1822 : oe1->op, oe2->op, opcode);
1823 499 : oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1824 499 : oe2->rank = 0;
1825 499 : oe1->op = gimple_get_lhs (sum);
1826 : }
1827 :
1828 : /* Apply the multiplication/division. */
1829 279 : prod = build_and_add_sum (TREE_TYPE (oe1->op),
1830 : oe1->op, c->op, c->oecode);
1831 279 : if (dump_file && (dump_flags & TDF_DETAILS))
1832 : {
1833 0 : fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1834 0 : print_generic_expr (dump_file, c->op);
1835 0 : fprintf (dump_file, "\n");
1836 : }
1837 :
1838 : /* Record it in the addition chain and disable further
1839 : undistribution with this op. */
1840 279 : oe1->op = gimple_assign_lhs (prod);
1841 279 : oe1->rank = get_rank (oe1->op);
1842 279 : subops[first].release ();
1843 :
1844 279 : changed = true;
1845 : }
1846 :
1847 659 : cvec.pop ();
1848 : }
1849 :
1850 74642 : for (i = 0; i < ops->length (); ++i)
1851 57330 : subops[i].release ();
1852 17312 : free (subops);
1853 17312 : cvec.release ();
1854 :
1855 17312 : return changed;
1856 2309039 : }
1857 :
1858 : /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1859 : first: element index for each relevant BIT_FIELD_REF.
1860 : second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1861 : typedef std::pair<unsigned, unsigned> v_info_elem;
1862 6957 : struct v_info {
1863 : tree vec_type;
1864 : auto_vec<v_info_elem, 32> vec;
1865 : };
1866 : typedef v_info *v_info_ptr;
1867 :
1868 : /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1869 : static int
1870 10605 : sort_by_mach_mode (const void *p_i, const void *p_j)
1871 : {
1872 10605 : const tree tr1 = *((const tree *) p_i);
1873 10605 : const tree tr2 = *((const tree *) p_j);
1874 10605 : unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1875 10605 : unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1876 10605 : if (mode1 > mode2)
1877 : return 1;
1878 10565 : else if (mode1 < mode2)
1879 : return -1;
1880 10516 : if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1881 : return -1;
1882 5069 : else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1883 5069 : return 1;
1884 : return 0;
1885 : }
1886 :
1887 : /* Cleanup hash map for VECTOR information. */
1888 : static void
1889 4424171 : cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1890 : {
1891 4431128 : for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1892 4438085 : it != info_map.end (); ++it)
1893 : {
1894 6957 : v_info_ptr info = (*it).second;
1895 6957 : delete info;
1896 6957 : (*it).second = NULL;
1897 : }
1898 4424171 : }
1899 :
1900 : /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1901 : V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1902 : is transformed to
1903 : Vs = (V1 + V2 + ... + Vn)
1904 : Vs[0] + Vs[1] + ... + Vs[k]
1905 :
1906 : The basic steps are listed below:
1907 :
1908 : 1) Check the addition chain *OPS by looking those summands coming from
1909 : VECTOR bit_field_ref on VECTOR type. Put the information into
1910 : v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1911 :
1912 : 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1913 : continuous, they can cover the whole VECTOR perfectly without any holes.
1914 : Obtain one VECTOR list which contain candidates to be transformed.
1915 :
1916 : 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1917 : candidates with same mode, build the addition statements for them and
1918 : generate BIT_FIELD_REFs accordingly.
1919 :
1920 : TODO:
1921 : The current implementation requires the whole VECTORs should be fully
1922 : covered, but it can be extended to support partial, checking adjacent
1923 : but not fill the whole, it may need some cost model to define the
1924 : boundary to do or not.
1925 : */
1926 : static bool
1927 4603047 : undistribute_bitref_for_vector (enum tree_code opcode,
1928 : vec<operand_entry *> *ops, struct loop *loop)
1929 : {
1930 4603047 : if (ops->length () <= 1)
1931 : return false;
1932 :
1933 4599672 : if (opcode != PLUS_EXPR
1934 4599672 : && opcode != MULT_EXPR
1935 : && opcode != BIT_XOR_EXPR
1936 1194435 : && opcode != BIT_IOR_EXPR
1937 816677 : && opcode != BIT_AND_EXPR)
1938 : return false;
1939 :
1940 4424171 : hash_map<tree, v_info_ptr> v_info_map;
1941 4424171 : operand_entry *oe1;
1942 4424171 : unsigned i;
1943 :
1944 : /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1945 : information into map. */
1946 13537056 : FOR_EACH_VEC_ELT (*ops, i, oe1)
1947 : {
1948 9112885 : enum tree_code dcode;
1949 9112885 : gimple *oe1def;
1950 :
1951 9112885 : if (TREE_CODE (oe1->op) != SSA_NAME)
1952 3199159 : continue;
1953 5913726 : oe1def = SSA_NAME_DEF_STMT (oe1->op);
1954 5913726 : if (!is_gimple_assign (oe1def))
1955 1502241 : continue;
1956 4411485 : dcode = gimple_assign_rhs_code (oe1def);
1957 4411485 : if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1958 4354865 : continue;
1959 :
1960 56620 : tree rhs = gimple_assign_rhs1 (oe1def);
1961 56620 : tree vec = TREE_OPERAND (rhs, 0);
1962 56620 : tree vec_type = TREE_TYPE (vec);
1963 :
1964 56620 : if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1965 36509 : continue;
1966 :
1967 : /* Ignore it if target machine can't support this VECTOR type. */
1968 20111 : if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1969 5426 : continue;
1970 :
1971 : /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1972 14685 : if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1973 : continue;
1974 :
1975 14685 : if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1976 14685 : || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1977 5440 : continue;
1978 :
1979 : /* The type of BIT_FIELD_REF might not be equal to the element type of
1980 : the vector. We want to use a vector type with element type the
1981 : same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1982 9245 : if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1983 : {
1984 1383 : machine_mode simd_mode;
1985 1383 : unsigned HOST_WIDE_INT size, nunits;
1986 1383 : unsigned HOST_WIDE_INT elem_size
1987 1383 : = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1988 2766 : if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1989 9104094 : continue;
1990 1383 : if (size <= elem_size || (size % elem_size) != 0)
1991 0 : continue;
1992 1383 : nunits = size / elem_size;
1993 1383 : if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1994 1383 : nunits).exists (&simd_mode))
1995 0 : continue;
1996 1383 : vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1997 :
1998 : /* Ignore it if target machine can't support this VECTOR type. */
1999 1383 : if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
2000 0 : continue;
2001 :
2002 : /* Check const vector type, constrain BIT_FIELD_REF offset and
2003 : size. */
2004 1383 : if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
2005 : continue;
2006 :
2007 2766 : if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
2008 2766 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
2009 0 : continue;
2010 : }
2011 :
2012 9245 : tree elem_type = TREE_TYPE (vec_type);
2013 9245 : unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
2014 9245 : if (maybe_ne (bit_field_size (rhs), elem_size))
2015 0 : continue;
2016 :
2017 9245 : unsigned idx;
2018 9245 : if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2019 0 : continue;
2020 :
2021 : /* Ignore it if target machine can't support this type of VECTOR
2022 : operation. */
2023 9245 : optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2024 9245 : if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2025 454 : continue;
2026 :
2027 8791 : bool existed;
2028 8791 : v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2029 8791 : if (!existed)
2030 : {
2031 6957 : info = new v_info;
2032 6957 : info->vec_type = vec_type;
2033 : }
2034 1834 : else if (!types_compatible_p (vec_type, info->vec_type))
2035 0 : continue;
2036 8791 : info->vec.safe_push (std::make_pair (idx, i));
2037 : }
2038 :
2039 : /* At least two VECTOR to combine. */
2040 4424171 : if (v_info_map.elements () <= 1)
2041 : {
2042 4423903 : cleanup_vinfo_map (v_info_map);
2043 4423903 : return false;
2044 : }
2045 :
2046 : /* Verify all VECTOR candidates by checking two conditions:
2047 : 1) sorted offsets are adjacent, no holes.
2048 : 2) can fill the whole VECTOR perfectly.
2049 : And add the valid candidates to a vector for further handling. */
2050 268 : auto_vec<tree> valid_vecs (v_info_map.elements ());
2051 1242 : for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2052 2216 : it != v_info_map.end (); ++it)
2053 : {
2054 974 : tree cand_vec = (*it).first;
2055 974 : v_info_ptr cand_info = (*it).second;
2056 974 : unsigned int num_elems
2057 974 : = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2058 1948 : if (cand_info->vec.length () != num_elems)
2059 600 : continue;
2060 374 : sbitmap holes = sbitmap_alloc (num_elems);
2061 374 : bitmap_ones (holes);
2062 374 : bool valid = true;
2063 374 : v_info_elem *curr;
2064 2204 : FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2065 : {
2066 1456 : if (!bitmap_bit_p (holes, curr->first))
2067 : {
2068 : valid = false;
2069 : break;
2070 : }
2071 : else
2072 1456 : bitmap_clear_bit (holes, curr->first);
2073 : }
2074 374 : if (valid && bitmap_empty_p (holes))
2075 374 : valid_vecs.quick_push (cand_vec);
2076 374 : sbitmap_free (holes);
2077 : }
2078 :
2079 : /* At least two VECTOR to combine. */
2080 268 : if (valid_vecs.length () <= 1)
2081 : {
2082 228 : cleanup_vinfo_map (v_info_map);
2083 228 : return false;
2084 : }
2085 :
2086 40 : valid_vecs.qsort (sort_by_mach_mode);
2087 : /* Go through all candidates by machine mode order, query the mode_to_total
2088 : to get the total number for each mode and skip the single one. */
2089 86 : for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2090 : {
2091 46 : tree tvec = valid_vecs[i];
2092 46 : enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2093 :
2094 : /* Skip modes with only a single candidate. */
2095 46 : if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2096 4 : continue;
2097 :
2098 42 : unsigned int idx, j;
2099 42 : gimple *sum = NULL;
2100 42 : tree sum_vec = tvec;
2101 42 : v_info_ptr info_ptr = *(v_info_map.get (tvec));
2102 42 : v_info_elem *elem;
2103 42 : tree vec_type = info_ptr->vec_type;
2104 :
2105 : /* Build the sum for all candidates with same mode. */
2106 325 : do
2107 : {
2108 975 : sum = build_and_add_sum (vec_type, sum_vec,
2109 325 : valid_vecs[i + 1], opcode);
2110 : /* Update the operands only after build_and_add_sum,
2111 : so that we don't have to repeat the placement algorithm
2112 : of build_and_add_sum. */
2113 325 : if (sum_vec == tvec
2114 325 : && !useless_type_conversion_p (vec_type, TREE_TYPE (sum_vec)))
2115 : {
2116 18 : gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2117 18 : tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2118 18 : tree lhs = make_ssa_name (vec_type);
2119 18 : gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2120 18 : gimple_set_uid (g, gimple_uid (sum));
2121 18 : gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2122 18 : gimple_assign_set_rhs1 (sum, lhs);
2123 18 : update_stmt (sum);
2124 : }
2125 325 : if (!useless_type_conversion_p (vec_type,
2126 325 : TREE_TYPE (valid_vecs[i + 1])))
2127 : {
2128 270 : gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2129 810 : tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2130 270 : valid_vecs[i + 1]);
2131 270 : tree lhs = make_ssa_name (vec_type);
2132 270 : gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2133 270 : gimple_set_uid (g, gimple_uid (sum));
2134 270 : gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2135 270 : gimple_assign_set_rhs2 (sum, lhs);
2136 270 : update_stmt (sum);
2137 : }
2138 325 : sum_vec = gimple_get_lhs (sum);
2139 325 : info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2140 325 : gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2141 : /* Update those related ops of current candidate VECTOR. */
2142 1575 : FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2143 : {
2144 1250 : idx = elem->second;
2145 1250 : gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2146 : /* Set this then op definition will get DCEd later. */
2147 1250 : gimple_set_visited (def, true);
2148 1250 : if (opcode == PLUS_EXPR
2149 1250 : || opcode == BIT_XOR_EXPR
2150 100 : || opcode == BIT_IOR_EXPR)
2151 1190 : (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2152 60 : else if (opcode == MULT_EXPR)
2153 24 : (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2154 : else
2155 : {
2156 36 : gcc_assert (opcode == BIT_AND_EXPR);
2157 36 : (*ops)[idx]->op
2158 36 : = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2159 : }
2160 1250 : (*ops)[idx]->rank = 0;
2161 : }
2162 325 : if (dump_file && (dump_flags & TDF_DETAILS))
2163 : {
2164 0 : fprintf (dump_file, "Generating addition -> ");
2165 0 : print_gimple_stmt (dump_file, sum, 0);
2166 : }
2167 325 : i++;
2168 : }
2169 325 : while ((i < valid_vecs.length () - 1)
2170 367 : && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2171 :
2172 : /* Referring to first valid VECTOR with this mode, generate the
2173 : BIT_FIELD_REF statements accordingly. */
2174 42 : info_ptr = *(v_info_map.get (tvec));
2175 42 : gcc_assert (sum);
2176 42 : tree elem_type = TREE_TYPE (vec_type);
2177 232 : FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2178 : {
2179 148 : idx = elem->second;
2180 148 : tree dst = make_ssa_name (elem_type);
2181 148 : tree pos = bitsize_int (elem->first
2182 : * tree_to_uhwi (TYPE_SIZE (elem_type)));
2183 148 : tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2184 148 : TYPE_SIZE (elem_type), pos);
2185 148 : gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2186 148 : insert_stmt_after (gs, sum);
2187 148 : gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2188 : /* Set this then op definition will get DCEd later. */
2189 148 : gimple_set_visited (def, true);
2190 148 : (*ops)[idx]->op = gimple_assign_lhs (gs);
2191 148 : (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2192 148 : if (dump_file && (dump_flags & TDF_DETAILS))
2193 : {
2194 0 : fprintf (dump_file, "Generating bit_field_ref -> ");
2195 0 : print_gimple_stmt (dump_file, gs, 0);
2196 : }
2197 : }
2198 : }
2199 :
2200 40 : if (dump_file && (dump_flags & TDF_DETAILS))
2201 0 : fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2202 :
2203 40 : cleanup_vinfo_map (v_info_map);
2204 :
2205 40 : return true;
2206 4424439 : }
2207 :
2208 : /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2209 : expression, examine the other OPS to see if any of them are comparisons
2210 : of the same values, which we may be able to combine or eliminate.
2211 : For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2212 :
2213 : static bool
2214 9474143 : eliminate_redundant_comparison (enum tree_code opcode,
2215 : vec<operand_entry *> *ops,
2216 : unsigned int currindex,
2217 : operand_entry *curr)
2218 : {
2219 9474143 : tree op1, op2;
2220 9474143 : enum tree_code lcode, rcode;
2221 9474143 : gimple *def1, *def2;
2222 9474143 : int i;
2223 9474143 : operand_entry *oe;
2224 :
2225 9474143 : if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2226 : return false;
2227 :
2228 : /* Check that CURR is a comparison. */
2229 1991289 : if (TREE_CODE (curr->op) != SSA_NAME)
2230 : return false;
2231 1496786 : def1 = SSA_NAME_DEF_STMT (curr->op);
2232 1496786 : if (!is_gimple_assign (def1))
2233 : return false;
2234 1286458 : lcode = gimple_assign_rhs_code (def1);
2235 1286458 : if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2236 : return false;
2237 510442 : op1 = gimple_assign_rhs1 (def1);
2238 510442 : op2 = gimple_assign_rhs2 (def1);
2239 :
2240 : /* Now look for a similar comparison in the remaining OPS. */
2241 1070560 : for (i = currindex + 1; ops->iterate (i, &oe); i++)
2242 : {
2243 560324 : tree t;
2244 :
2245 560324 : if (TREE_CODE (oe->op) != SSA_NAME)
2246 37 : continue;
2247 560287 : def2 = SSA_NAME_DEF_STMT (oe->op);
2248 560287 : if (!is_gimple_assign (def2))
2249 7449 : continue;
2250 552838 : rcode = gimple_assign_rhs_code (def2);
2251 552838 : if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2252 6537 : continue;
2253 :
2254 : /* If we got here, we have a match. See if we can combine the
2255 : two comparisons. */
2256 546301 : tree type = TREE_TYPE (gimple_assign_lhs (def1));
2257 546301 : if (opcode == BIT_IOR_EXPR)
2258 418008 : t = maybe_fold_or_comparisons (type,
2259 : lcode, op1, op2,
2260 : rcode, gimple_assign_rhs1 (def2),
2261 : gimple_assign_rhs2 (def2));
2262 : else
2263 128293 : t = maybe_fold_and_comparisons (type,
2264 : lcode, op1, op2,
2265 : rcode, gimple_assign_rhs1 (def2),
2266 : gimple_assign_rhs2 (def2));
2267 546301 : if (!t)
2268 546075 : continue;
2269 :
2270 : /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2271 : always give us a boolean_type_node value back. If the original
2272 : BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2273 : we need to convert. */
2274 226 : if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2275 : {
2276 2 : if (!fold_convertible_p (TREE_TYPE (curr->op), t))
2277 0 : continue;
2278 2 : t = fold_convert (TREE_TYPE (curr->op), t);
2279 : }
2280 :
2281 226 : if (TREE_CODE (t) != INTEGER_CST
2282 226 : && !operand_equal_p (t, curr->op, 0))
2283 : {
2284 220 : enum tree_code subcode;
2285 220 : tree newop1, newop2;
2286 220 : if (!COMPARISON_CLASS_P (t))
2287 20 : continue;
2288 220 : extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2289 220 : STRIP_USELESS_TYPE_CONVERSION (newop1);
2290 220 : STRIP_USELESS_TYPE_CONVERSION (newop2);
2291 220 : if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2292 0 : continue;
2293 220 : if (lcode == TREE_CODE (t)
2294 99 : && operand_equal_p (op1, newop1, 0)
2295 319 : && operand_equal_p (op2, newop2, 0))
2296 59 : t = curr->op;
2297 181 : else if ((TREE_CODE (newop1) == SSA_NAME
2298 161 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1))
2299 302 : || (TREE_CODE (newop2) == SSA_NAME
2300 102 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2)))
2301 20 : continue;
2302 : }
2303 :
2304 206 : if (dump_file && (dump_flags & TDF_DETAILS))
2305 : {
2306 6 : fprintf (dump_file, "Equivalence: ");
2307 6 : print_generic_expr (dump_file, curr->op);
2308 6 : fprintf (dump_file, " %s ", op_symbol_code (opcode));
2309 6 : print_generic_expr (dump_file, oe->op);
2310 6 : fprintf (dump_file, " -> ");
2311 6 : print_generic_expr (dump_file, t);
2312 6 : fprintf (dump_file, "\n");
2313 : }
2314 :
2315 : /* Now we can delete oe, as it has been subsumed by the new combined
2316 : expression t. */
2317 206 : ops->ordered_remove (i);
2318 206 : reassociate_stats.ops_eliminated ++;
2319 :
2320 : /* If t is the same as curr->op, we're done. Otherwise we must
2321 : replace curr->op with t. Special case is if we got a constant
2322 : back, in which case we add it to the end instead of in place of
2323 : the current entry. */
2324 206 : if (TREE_CODE (t) == INTEGER_CST)
2325 : {
2326 6 : ops->ordered_remove (currindex);
2327 6 : add_to_ops_vec (ops, t);
2328 : }
2329 200 : else if (!operand_equal_p (t, curr->op, 0))
2330 : {
2331 141 : gimple *sum;
2332 141 : enum tree_code subcode;
2333 141 : tree newop1;
2334 141 : tree newop2;
2335 141 : gcc_assert (COMPARISON_CLASS_P (t));
2336 141 : extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2337 141 : STRIP_USELESS_TYPE_CONVERSION (newop1);
2338 141 : STRIP_USELESS_TYPE_CONVERSION (newop2);
2339 141 : gcc_checking_assert (is_gimple_val (newop1)
2340 : && is_gimple_val (newop2));
2341 141 : sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2342 141 : curr->op = gimple_get_lhs (sum);
2343 : }
2344 : return true;
2345 : }
2346 :
2347 : return false;
2348 : }
2349 :
2350 :
2351 : /* Transform repeated addition of same values into multiply with
2352 : constant. */
2353 : static bool
2354 2312173 : transform_add_to_multiply (vec<operand_entry *> *ops)
2355 : {
2356 2312173 : operand_entry *oe;
2357 2312173 : tree op = NULL_TREE;
2358 2312173 : int j;
2359 2312173 : int i, start = -1, end = 0, count = 0;
2360 2312173 : auto_vec<std::pair <int, int> > indxs;
2361 2312173 : bool changed = false;
2362 :
2363 2312173 : if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2364 69692 : && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2365 31444 : || !flag_unsafe_math_optimizations))
2366 : return false;
2367 :
2368 : /* Look for repeated operands. */
2369 6997375 : FOR_EACH_VEC_ELT (*ops, i, oe)
2370 : {
2371 4723509 : if (start == -1)
2372 : {
2373 2273866 : count = 1;
2374 2273866 : op = oe->op;
2375 2273866 : start = i;
2376 : }
2377 2449643 : else if (operand_equal_p (oe->op, op, 0))
2378 : {
2379 155 : count++;
2380 155 : end = i;
2381 : }
2382 : else
2383 : {
2384 2449488 : if (count > 1)
2385 47 : indxs.safe_push (std::make_pair (start, end));
2386 2449488 : count = 1;
2387 2449488 : op = oe->op;
2388 2449488 : start = i;
2389 : }
2390 : }
2391 :
2392 2273866 : if (count > 1)
2393 38 : indxs.safe_push (std::make_pair (start, end));
2394 :
2395 2274024 : for (j = indxs.length () - 1; j >= 0; --j)
2396 : {
2397 : /* Convert repeated operand addition to multiplication. */
2398 85 : start = indxs[j].first;
2399 85 : end = indxs[j].second;
2400 85 : op = (*ops)[start]->op;
2401 85 : count = end - start + 1;
2402 325 : for (i = end; i >= start; --i)
2403 240 : ops->unordered_remove (i);
2404 85 : tree tmp = make_ssa_name (TREE_TYPE (op));
2405 85 : tree cst = build_int_cst (integer_type_node, count);
2406 85 : gassign *mul_stmt
2407 85 : = gimple_build_assign (tmp, MULT_EXPR,
2408 85 : op, fold_convert (TREE_TYPE (op), cst));
2409 85 : gimple_set_visited (mul_stmt, true);
2410 85 : add_to_ops_vec (ops, tmp, mul_stmt);
2411 85 : changed = true;
2412 : }
2413 :
2414 : return changed;
2415 2312173 : }
2416 :
2417 :
2418 : /* Perform various identities and other optimizations on the list of
2419 : operand entries, stored in OPS. The tree code for the binary
2420 : operation between all the operands is OPCODE. */
2421 :
2422 : static void
2423 4603345 : optimize_ops_list (enum tree_code opcode,
2424 : vec<operand_entry *> *ops)
2425 : {
2426 4621210 : unsigned int length = ops->length ();
2427 4621210 : unsigned int i;
2428 4621210 : operand_entry *oe;
2429 9240463 : operand_entry *oelast = NULL;
2430 9240463 : bool iterate = false;
2431 :
2432 4621210 : if (length == 1)
2433 4603345 : return;
2434 :
2435 4619253 : oelast = ops->last ();
2436 :
2437 : /* If the last two are constants, pop the constants off, merge them
2438 : and try the next two. */
2439 4619253 : if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2440 : {
2441 3345343 : operand_entry *oelm1 = (*ops)[length - 2];
2442 :
2443 3345343 : if (oelm1->rank == 0
2444 14281 : && is_gimple_min_invariant (oelm1->op)
2445 3359624 : && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2446 14281 : TREE_TYPE (oelast->op)))
2447 : {
2448 14281 : tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2449 : oelm1->op, oelast->op);
2450 :
2451 14281 : if (folded && is_gimple_min_invariant (folded))
2452 : {
2453 14263 : if (dump_file && (dump_flags & TDF_DETAILS))
2454 0 : fprintf (dump_file, "Merging constants\n");
2455 :
2456 14263 : ops->pop ();
2457 14263 : ops->pop ();
2458 :
2459 14263 : add_to_ops_vec (ops, folded);
2460 14263 : reassociate_stats.constants_eliminated++;
2461 :
2462 14263 : optimize_ops_list (opcode, ops);
2463 14263 : return;
2464 : }
2465 : }
2466 : }
2467 :
2468 4604990 : eliminate_using_constants (opcode, ops);
2469 4604990 : oelast = NULL;
2470 :
2471 14082657 : for (i = 0; ops->iterate (i, &oe);)
2472 : {
2473 9477669 : bool done = false;
2474 :
2475 9477669 : if (eliminate_not_pairs (opcode, ops, i, oe))
2476 2 : return;
2477 9477668 : if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2478 9477517 : || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2479 18951811 : || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2480 : {
2481 3731 : if (done)
2482 : return;
2483 3730 : iterate = true;
2484 3730 : oelast = NULL;
2485 3730 : continue;
2486 : }
2487 9473937 : oelast = oe;
2488 9473937 : i++;
2489 : }
2490 :
2491 4604988 : if (iterate)
2492 : optimize_ops_list (opcode, ops);
2493 : }
2494 :
2495 : /* The following functions are subroutines to optimize_range_tests and allow
2496 : it to try to change a logical combination of comparisons into a range
2497 : test.
2498 :
2499 : For example, both
2500 : X == 2 || X == 5 || X == 3 || X == 4
2501 : and
2502 : X >= 2 && X <= 5
2503 : are converted to
2504 : (unsigned) (X - 2) <= 3
2505 :
2506 : For more information see comments above fold_test_range in fold-const.cc,
2507 : this implementation is for GIMPLE. */
2508 :
2509 :
2510 :
2511 : /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2512 :
2513 : void
2514 141 : dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2515 : {
2516 141 : if (!skip_exp)
2517 59 : print_generic_expr (file, r->exp);
2518 251 : fprintf (file, " %c[", r->in_p ? '+' : '-');
2519 141 : print_generic_expr (file, r->low);
2520 141 : fputs (", ", file);
2521 141 : print_generic_expr (file, r->high);
2522 141 : fputc (']', file);
2523 141 : }
2524 :
2525 : /* Dump the range entry R to STDERR. */
2526 :
2527 : DEBUG_FUNCTION void
2528 0 : debug_range_entry (struct range_entry *r)
2529 : {
2530 0 : dump_range_entry (stderr, r, false);
2531 0 : fputc ('\n', stderr);
2532 0 : }
2533 :
2534 : /* This is similar to make_range in fold-const.cc, but on top of
2535 : GIMPLE instead of trees. If EXP is non-NULL, it should be
2536 : an SSA_NAME and STMT argument is ignored, otherwise STMT
2537 : argument should be a GIMPLE_COND. */
2538 :
2539 : void
2540 5558333 : init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2541 : {
2542 5558333 : int in_p;
2543 5558333 : tree low, high;
2544 5558333 : bool is_bool, strict_overflow_p;
2545 :
2546 5558333 : r->exp = NULL_TREE;
2547 5558333 : r->in_p = false;
2548 5558333 : r->strict_overflow_p = false;
2549 5558333 : r->low = NULL_TREE;
2550 5558333 : r->high = NULL_TREE;
2551 5558333 : if (exp != NULL_TREE
2552 5558333 : && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2553 793509 : return;
2554 :
2555 : /* Start with simply saying "EXP != 0" and then look at the code of EXP
2556 : and see if we can refine the range. Some of the cases below may not
2557 : happen, but it doesn't seem worth worrying about this. We "continue"
2558 : the outer loop when we've changed something; otherwise we "break"
2559 : the switch, which will "break" the while. */
2560 5068489 : low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2561 5068489 : high = low;
2562 5068489 : in_p = 0;
2563 5068489 : strict_overflow_p = false;
2564 5068489 : is_bool = false;
2565 5068489 : if (exp == NULL_TREE)
2566 : is_bool = true;
2567 1551328 : else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2568 : {
2569 642911 : if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2570 : is_bool = true;
2571 : else
2572 : return;
2573 : }
2574 908417 : else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2575 0 : is_bool = true;
2576 :
2577 8029760 : while (1)
2578 : {
2579 8029760 : enum tree_code code;
2580 8029760 : tree arg0, arg1, exp_type;
2581 8029760 : tree nexp;
2582 8029760 : location_t loc;
2583 :
2584 8029760 : if (exp != NULL_TREE)
2585 : {
2586 4512599 : if (TREE_CODE (exp) != SSA_NAME
2587 4512599 : || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2588 : break;
2589 :
2590 4512599 : stmt = SSA_NAME_DEF_STMT (exp);
2591 4512599 : if (!is_gimple_assign (stmt))
2592 : break;
2593 :
2594 2764332 : code = gimple_assign_rhs_code (stmt);
2595 2764332 : arg0 = gimple_assign_rhs1 (stmt);
2596 2764332 : arg1 = gimple_assign_rhs2 (stmt);
2597 2764332 : exp_type = TREE_TYPE (exp);
2598 : }
2599 : else
2600 : {
2601 3517161 : code = gimple_cond_code (stmt);
2602 3517161 : arg0 = gimple_cond_lhs (stmt);
2603 3517161 : arg1 = gimple_cond_rhs (stmt);
2604 3517161 : exp_type = boolean_type_node;
2605 : }
2606 :
2607 6281493 : if (TREE_CODE (arg0) != SSA_NAME
2608 5004432 : || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0)
2609 11285459 : || ssa_name_maybe_undef_p (arg0))
2610 : break;
2611 4997309 : loc = gimple_location (stmt);
2612 4997309 : switch (code)
2613 : {
2614 33123 : case BIT_NOT_EXPR:
2615 33123 : if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2616 : /* Ensure the range is either +[-,0], +[0,0],
2617 : -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2618 : -[1,1]. If it is e.g. +[-,-] or -[-,-]
2619 : or similar expression of unconditional true or
2620 : false, it should not be negated. */
2621 33123 : && ((high && integer_zerop (high))
2622 0 : || (low && integer_onep (low))))
2623 : {
2624 5883 : in_p = !in_p;
2625 5883 : exp = arg0;
2626 5883 : continue;
2627 : }
2628 : break;
2629 2435 : case SSA_NAME:
2630 2435 : exp = arg0;
2631 2435 : continue;
2632 231451 : CASE_CONVERT:
2633 231451 : if (is_bool)
2634 : {
2635 124008 : if ((TYPE_PRECISION (exp_type) == 1
2636 117782 : || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2637 124008 : && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2638 : return;
2639 : }
2640 107443 : else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2641 : {
2642 4194 : if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2643 : is_bool = true;
2644 : else
2645 : return;
2646 : }
2647 103249 : else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2648 125529 : is_bool = true;
2649 228778 : goto do_default;
2650 : case EQ_EXPR:
2651 : case NE_EXPR:
2652 : case LT_EXPR:
2653 : case LE_EXPR:
2654 : case GE_EXPR:
2655 : case GT_EXPR:
2656 : is_bool = true;
2657 : /* FALLTHRU */
2658 652459 : default:
2659 652459 : if (!is_bool)
2660 : return;
2661 351499 : do_default:
2662 4658118 : nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2663 : &low, &high, &in_p,
2664 : &strict_overflow_p);
2665 4658118 : if (nexp != NULL_TREE)
2666 : {
2667 2952985 : exp = nexp;
2668 2952985 : gcc_assert (TREE_CODE (exp) == SSA_NAME);
2669 2952985 : continue;
2670 : }
2671 : break;
2672 : }
2673 : break;
2674 : }
2675 4764824 : if (is_bool)
2676 : {
2677 4161561 : r->exp = exp;
2678 4161561 : r->in_p = in_p;
2679 4161561 : r->low = low;
2680 4161561 : r->high = high;
2681 4161561 : r->strict_overflow_p = strict_overflow_p;
2682 : }
2683 : }
2684 :
2685 : /* Comparison function for qsort. Sort entries
2686 : without SSA_NAME exp first, then with SSA_NAMEs sorted
2687 : by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2688 : by increasing ->low and if ->low is the same, by increasing
2689 : ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2690 : maximum. */
2691 :
2692 : static int
2693 6283645 : range_entry_cmp (const void *a, const void *b)
2694 : {
2695 6283645 : const struct range_entry *p = (const struct range_entry *) a;
2696 6283645 : const struct range_entry *q = (const struct range_entry *) b;
2697 :
2698 6283645 : if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2699 : {
2700 2813981 : if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2701 : {
2702 : /* Group range_entries for the same SSA_NAME together. */
2703 2745214 : if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2704 : return -1;
2705 1177217 : else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2706 : return 1;
2707 : /* If ->low is different, NULL low goes first, then by
2708 : ascending low. */
2709 156247 : if (p->low != NULL_TREE)
2710 : {
2711 139145 : if (q->low != NULL_TREE)
2712 : {
2713 131544 : tree tem = fold_binary (LT_EXPR, boolean_type_node,
2714 : p->low, q->low);
2715 131544 : if (tem && integer_onep (tem))
2716 : return -1;
2717 63555 : tem = fold_binary (GT_EXPR, boolean_type_node,
2718 : p->low, q->low);
2719 63555 : if (tem && integer_onep (tem))
2720 : return 1;
2721 : }
2722 : else
2723 : return 1;
2724 : }
2725 17102 : else if (q->low != NULL_TREE)
2726 : return -1;
2727 : /* If ->high is different, NULL high goes last, before that by
2728 : ascending high. */
2729 28837 : if (p->high != NULL_TREE)
2730 : {
2731 28703 : if (q->high != NULL_TREE)
2732 : {
2733 28408 : tree tem = fold_binary (LT_EXPR, boolean_type_node,
2734 : p->high, q->high);
2735 28408 : if (tem && integer_onep (tem))
2736 : return -1;
2737 8509 : tem = fold_binary (GT_EXPR, boolean_type_node,
2738 : p->high, q->high);
2739 8509 : if (tem && integer_onep (tem))
2740 : return 1;
2741 : }
2742 : else
2743 : return -1;
2744 : }
2745 134 : else if (q->high != NULL_TREE)
2746 : return 1;
2747 : /* If both ranges are the same, sort below by ascending idx. */
2748 : }
2749 : else
2750 : return 1;
2751 : }
2752 3469664 : else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2753 : return -1;
2754 :
2755 3382015 : if (p->idx < q->idx)
2756 : return -1;
2757 : else
2758 : {
2759 1706128 : gcc_checking_assert (p->idx > q->idx);
2760 : return 1;
2761 : }
2762 : }
2763 :
2764 : /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2765 : insert needed statements BEFORE or after GSI. */
2766 :
2767 : static tree
2768 24533 : force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2769 : {
2770 24533 : enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2771 24533 : tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2772 24533 : if (TREE_CODE (ret) != SSA_NAME)
2773 : {
2774 37 : gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2775 37 : if (before)
2776 37 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
2777 : else
2778 0 : gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2779 37 : ret = gimple_assign_lhs (g);
2780 : }
2781 24533 : return ret;
2782 : }
2783 :
2784 : /* Helper routine of optimize_range_test.
2785 : [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2786 : RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2787 : OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2788 : is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2789 : an array of COUNT pointers to other ranges. Return
2790 : true if the range merge has been successful.
2791 : If OPCODE is ERROR_MARK, this is called from within
2792 : maybe_optimize_range_tests and is performing inter-bb range optimization.
2793 : In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2794 : oe->rank. */
2795 :
2796 : static bool
2797 24533 : update_range_test (struct range_entry *range, struct range_entry *otherrange,
2798 : struct range_entry **otherrangep,
2799 : unsigned int count, enum tree_code opcode,
2800 : vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2801 : bool in_p, tree low, tree high, bool strict_overflow_p)
2802 : {
2803 24533 : unsigned int idx = range->idx;
2804 24533 : struct range_entry *swap_with = NULL;
2805 24533 : basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL;
2806 24533 : if (opcode == ERROR_MARK)
2807 : {
2808 : /* For inter-bb range test optimization, pick from the range tests
2809 : the one which is tested in the earliest condition (one dominating
2810 : the others), because otherwise there could be some UB (e.g. signed
2811 : overflow) in following bbs that we'd expose which wasn't there in
2812 : the original program. See PR104196. */
2813 14092 : basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id);
2814 14092 : basic_block range_bb = orig_range_bb;
2815 31090 : for (unsigned int i = 0; i < count; i++)
2816 : {
2817 16998 : struct range_entry *this_range;
2818 16998 : if (otherrange)
2819 10391 : this_range = otherrange + i;
2820 : else
2821 6607 : this_range = otherrangep[i];
2822 16998 : operand_entry *oe = (*ops)[this_range->idx];
2823 16998 : basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id);
2824 16998 : if (range_bb != this_bb
2825 16998 : && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb))
2826 : {
2827 6831 : swap_with = this_range;
2828 6831 : range_bb = this_bb;
2829 6831 : idx = this_range->idx;
2830 : }
2831 : }
2832 : /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2833 : only defined in later blocks. In this case we can't move the
2834 : merged comparison earlier, so instead check if there are any stmts
2835 : that might trigger signed integer overflow in between and rewrite
2836 : them. But only after we check if the optimization is possible. */
2837 14092 : if (seq && swap_with)
2838 : {
2839 3399 : rewrite_bb_first = range_bb;
2840 3399 : rewrite_bb_last = orig_range_bb;
2841 3399 : idx = range->idx;
2842 3399 : swap_with = NULL;
2843 : }
2844 : }
2845 24533 : operand_entry *oe = (*ops)[idx];
2846 24533 : tree op = oe->op;
2847 24533 : gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2848 12091 : : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2849 24533 : location_t loc = gimple_location (stmt);
2850 24533 : tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2851 24533 : tree tem = build_range_check (loc, optype, unshare_expr (exp),
2852 : in_p, low, high);
2853 24533 : enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2854 24533 : gimple_stmt_iterator gsi;
2855 24533 : unsigned int i, uid;
2856 :
2857 24533 : if (tem == NULL_TREE)
2858 : return false;
2859 :
2860 : /* If op is default def SSA_NAME, there is no place to insert the
2861 : new comparison. Give up, unless we can use OP itself as the
2862 : range test. */
2863 36975 : if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2864 : {
2865 0 : if (op == range->exp
2866 0 : && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2867 0 : || TREE_CODE (optype) == BOOLEAN_TYPE)
2868 0 : && (op == tem
2869 0 : || (TREE_CODE (tem) == EQ_EXPR
2870 0 : && TREE_OPERAND (tem, 0) == op
2871 0 : && integer_onep (TREE_OPERAND (tem, 1))))
2872 0 : && opcode != BIT_IOR_EXPR
2873 0 : && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2874 : {
2875 : stmt = NULL;
2876 : tem = op;
2877 : }
2878 : else
2879 0 : return false;
2880 : }
2881 :
2882 24533 : if (swap_with)
2883 1851 : std::swap (range->idx, swap_with->idx);
2884 :
2885 24533 : if (strict_overflow_p && issue_strict_overflow_warning (wc))
2886 0 : warning_at (loc, OPT_Wstrict_overflow,
2887 : "assuming signed overflow does not occur "
2888 : "when simplifying range test");
2889 :
2890 24533 : if (dump_file && (dump_flags & TDF_DETAILS))
2891 : {
2892 39 : struct range_entry *r;
2893 39 : fprintf (dump_file, "Optimizing range tests ");
2894 39 : dump_range_entry (dump_file, range, false);
2895 180 : for (i = 0; i < count; i++)
2896 : {
2897 102 : if (otherrange)
2898 82 : r = otherrange + i;
2899 : else
2900 20 : r = otherrangep[i];
2901 102 : if (r->exp
2902 102 : && r->exp != range->exp
2903 20 : && TREE_CODE (r->exp) == SSA_NAME)
2904 : {
2905 20 : fprintf (dump_file, " and ");
2906 20 : dump_range_entry (dump_file, r, false);
2907 : }
2908 : else
2909 : {
2910 82 : fprintf (dump_file, " and");
2911 82 : dump_range_entry (dump_file, r, true);
2912 : }
2913 : }
2914 39 : fprintf (dump_file, "\n into ");
2915 39 : print_generic_expr (dump_file, tem);
2916 39 : fprintf (dump_file, "\n");
2917 : }
2918 :
2919 : /* In inter-bb range optimization mode, if we have a seq, we can't
2920 : move the merged comparison to the earliest bb from the comparisons
2921 : being replaced, so instead rewrite stmts that could trigger signed
2922 : integer overflow. */
2923 6324 : for (basic_block bb = rewrite_bb_last;
2924 30857 : bb != rewrite_bb_first; bb = single_pred (bb))
2925 12648 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2926 24981 : !gsi_end_p (gsi); gsi_next (&gsi))
2927 : {
2928 18657 : gimple *stmt = gsi_stmt (gsi);
2929 18657 : if (gimple_needing_rewrite_undefined (stmt))
2930 : {
2931 57 : gimple_stmt_iterator gsip = gsi;
2932 57 : gimple_stmt_iterator gsin = gsi;
2933 57 : gsi_prev (&gsip);
2934 57 : gsi_next (&gsin);
2935 57 : rewrite_to_defined_unconditional (&gsi);
2936 57 : unsigned uid = gimple_uid (stmt);
2937 57 : if (gsi_end_p (gsip))
2938 31 : gsip = gsi_after_labels (bb);
2939 : else
2940 26 : gsi_next (&gsip);
2941 222 : for (; gsi_stmt (gsip) != gsi_stmt (gsin);
2942 165 : gsi_next (&gsip))
2943 165 : gimple_set_uid (gsi_stmt (gsip), uid);
2944 : }
2945 : }
2946 :
2947 24533 : if (opcode == BIT_IOR_EXPR
2948 18561 : || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2949 17136 : tem = invert_truthvalue_loc (loc, tem);
2950 :
2951 24533 : tem = fold_convert_loc (loc, optype, tem);
2952 24533 : if (stmt)
2953 : {
2954 24533 : gsi = gsi_for_stmt (stmt);
2955 24533 : uid = gimple_uid (stmt);
2956 : }
2957 : else
2958 : {
2959 0 : gsi = gsi_none ();
2960 0 : uid = 0;
2961 : }
2962 24533 : if (stmt == NULL)
2963 0 : gcc_checking_assert (tem == op);
2964 : /* In rare cases range->exp can be equal to lhs of stmt.
2965 : In that case we have to insert after the stmt rather then before
2966 : it. If stmt is a PHI, insert it at the start of the basic block. */
2967 24533 : else if (op != range->exp)
2968 : {
2969 24533 : gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2970 24533 : tem = force_into_ssa_name (&gsi, tem, true);
2971 24533 : gsi_prev (&gsi);
2972 : }
2973 0 : else if (gimple_code (stmt) != GIMPLE_PHI)
2974 : {
2975 0 : gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2976 0 : tem = force_into_ssa_name (&gsi, tem, false);
2977 : }
2978 : else
2979 : {
2980 0 : gsi = gsi_after_labels (gimple_bb (stmt));
2981 0 : if (!gsi_end_p (gsi))
2982 0 : uid = gimple_uid (gsi_stmt (gsi));
2983 : else
2984 : {
2985 0 : gsi = gsi_start_bb (gimple_bb (stmt));
2986 0 : uid = 1;
2987 0 : while (!gsi_end_p (gsi))
2988 : {
2989 0 : uid = gimple_uid (gsi_stmt (gsi));
2990 0 : gsi_next (&gsi);
2991 : }
2992 : }
2993 0 : gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2994 0 : tem = force_into_ssa_name (&gsi, tem, true);
2995 0 : if (gsi_end_p (gsi))
2996 0 : gsi = gsi_last_bb (gimple_bb (stmt));
2997 : else
2998 24533 : gsi_prev (&gsi);
2999 : }
3000 152785 : for (; !gsi_end_p (gsi); gsi_prev (&gsi))
3001 84897 : if (gimple_uid (gsi_stmt (gsi)))
3002 : break;
3003 : else
3004 64126 : gimple_set_uid (gsi_stmt (gsi), uid);
3005 :
3006 24533 : oe->op = tem;
3007 24533 : range->exp = exp;
3008 24533 : range->low = low;
3009 24533 : range->high = high;
3010 24533 : range->in_p = in_p;
3011 24533 : range->strict_overflow_p = false;
3012 :
3013 53004 : for (i = 0; i < count; i++)
3014 : {
3015 28471 : if (otherrange)
3016 18057 : range = otherrange + i;
3017 : else
3018 10414 : range = otherrangep[i];
3019 28471 : oe = (*ops)[range->idx];
3020 : /* Now change all the other range test immediate uses, so that
3021 : those tests will be optimized away. */
3022 28471 : if (opcode == ERROR_MARK)
3023 : {
3024 16998 : if (oe->op)
3025 2133 : oe->op = build_int_cst (TREE_TYPE (oe->op),
3026 2133 : oe->rank == BIT_IOR_EXPR ? 0 : 1);
3027 : else
3028 14865 : oe->op = (oe->rank == BIT_IOR_EXPR
3029 14865 : ? boolean_false_node : boolean_true_node);
3030 : }
3031 : else
3032 11473 : oe->op = error_mark_node;
3033 28471 : range->exp = NULL_TREE;
3034 28471 : range->low = NULL_TREE;
3035 28471 : range->high = NULL_TREE;
3036 : }
3037 : return true;
3038 : }
3039 :
3040 : /* Optimize X == CST1 || X == CST2
3041 : if popcount (CST1 ^ CST2) == 1 into
3042 : (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3043 : Similarly for ranges. E.g.
3044 : X != 2 && X != 3 && X != 10 && X != 11
3045 : will be transformed by the previous optimization into
3046 : !((X - 2U) <= 1U || (X - 10U) <= 1U)
3047 : and this loop can transform that into
3048 : !(((X & ~8) - 2U) <= 1U). */
3049 :
3050 : static bool
3051 23046 : optimize_range_tests_xor (enum tree_code opcode, tree type,
3052 : tree lowi, tree lowj, tree highi, tree highj,
3053 : vec<operand_entry *> *ops,
3054 : struct range_entry *rangei,
3055 : struct range_entry *rangej)
3056 : {
3057 23046 : tree lowxor, highxor, tem, exp;
3058 : /* Check lowi ^ lowj == highi ^ highj and
3059 : popcount (lowi ^ lowj) == 1. */
3060 23046 : lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
3061 23046 : if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
3062 : return false;
3063 23046 : if (!integer_pow2p (lowxor))
3064 : return false;
3065 3104 : highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
3066 3104 : if (!tree_int_cst_equal (lowxor, highxor))
3067 : return false;
3068 :
3069 2709 : exp = rangei->exp;
3070 2709 : scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3071 2709 : int prec = GET_MODE_PRECISION (mode);
3072 2709 : if (TYPE_PRECISION (type) < prec
3073 2708 : || (wi::to_wide (TYPE_MIN_VALUE (type))
3074 8125 : != wi::min_value (prec, TYPE_SIGN (type)))
3075 8125 : || (wi::to_wide (TYPE_MAX_VALUE (type))
3076 8125 : != wi::max_value (prec, TYPE_SIGN (type))))
3077 : {
3078 1 : type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
3079 1 : exp = fold_convert (type, exp);
3080 1 : lowxor = fold_convert (type, lowxor);
3081 1 : lowi = fold_convert (type, lowi);
3082 1 : highi = fold_convert (type, highi);
3083 : }
3084 2709 : tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
3085 2709 : exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
3086 2709 : lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
3087 2709 : highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
3088 2709 : if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
3089 2709 : NULL, rangei->in_p, lowj, highj,
3090 2709 : rangei->strict_overflow_p
3091 2709 : || rangej->strict_overflow_p))
3092 : return true;
3093 : return false;
3094 : }
3095 :
3096 : /* Optimize X == CST1 || X == CST2
3097 : if popcount (CST2 - CST1) == 1 into
3098 : ((X - CST1) & ~(CST2 - CST1)) == 0.
3099 : Similarly for ranges. E.g.
3100 : X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3101 : || X == 75 || X == 45
3102 : will be transformed by the previous optimization into
3103 : (X - 43U) <= 3U || (X - 75U) <= 3U
3104 : and this loop can transform that into
3105 : ((X - 43U) & ~(75U - 43U)) <= 3U. */
3106 : static bool
3107 17959 : optimize_range_tests_diff (enum tree_code opcode, tree type,
3108 : tree lowi, tree lowj, tree highi, tree highj,
3109 : vec<operand_entry *> *ops,
3110 : struct range_entry *rangei,
3111 : struct range_entry *rangej)
3112 : {
3113 17959 : tree tem1, tem2, mask;
3114 : /* Check highi - lowi == highj - lowj. */
3115 17959 : tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3116 17959 : if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3117 : return false;
3118 17959 : tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3119 17959 : if (!tree_int_cst_equal (tem1, tem2))
3120 : return false;
3121 : /* Check popcount (lowj - lowi) == 1. */
3122 11744 : tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3123 11744 : if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3124 : return false;
3125 11744 : if (!integer_pow2p (tem1))
3126 : return false;
3127 :
3128 1940 : scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3129 1940 : int prec = GET_MODE_PRECISION (mode);
3130 1940 : if (TYPE_PRECISION (type) < prec
3131 1936 : || (wi::to_wide (TYPE_MIN_VALUE (type))
3132 5812 : != wi::min_value (prec, TYPE_SIGN (type)))
3133 5812 : || (wi::to_wide (TYPE_MAX_VALUE (type))
3134 5812 : != wi::max_value (prec, TYPE_SIGN (type))))
3135 4 : type = build_nonstandard_integer_type (prec, 1);
3136 : else
3137 1936 : type = unsigned_type_for (type);
3138 1940 : tem1 = fold_convert (type, tem1);
3139 1940 : tem2 = fold_convert (type, tem2);
3140 1940 : lowi = fold_convert (type, lowi);
3141 1940 : mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3142 1940 : tem1 = fold_build2 (MINUS_EXPR, type,
3143 : fold_convert (type, rangei->exp), lowi);
3144 1940 : tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3145 1940 : lowj = build_int_cst (type, 0);
3146 1940 : if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3147 1940 : NULL, rangei->in_p, lowj, tem2,
3148 1940 : rangei->strict_overflow_p
3149 1940 : || rangej->strict_overflow_p))
3150 : return true;
3151 : return false;
3152 : }
3153 :
3154 : /* It does some common checks for function optimize_range_tests_xor and
3155 : optimize_range_tests_diff.
3156 : If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3157 : Else it calls optimize_range_tests_diff. */
3158 :
3159 : static bool
3160 2127275 : optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3161 : bool optimize_xor, vec<operand_entry *> *ops,
3162 : struct range_entry *ranges)
3163 : {
3164 2127275 : int i, j;
3165 2127275 : bool any_changes = false;
3166 3716265 : for (i = first; i < length; i++)
3167 : {
3168 1588990 : tree lowi, highi, lowj, highj, type, tem;
3169 :
3170 1588990 : if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3171 980368 : continue;
3172 608622 : type = TREE_TYPE (ranges[i].exp);
3173 608622 : if (!INTEGRAL_TYPE_P (type))
3174 50292 : continue;
3175 558330 : lowi = ranges[i].low;
3176 558330 : if (lowi == NULL_TREE)
3177 35228 : lowi = TYPE_MIN_VALUE (type);
3178 558330 : highi = ranges[i].high;
3179 558330 : if (highi == NULL_TREE)
3180 6842 : continue;
3181 925172 : for (j = i + 1; j < length && j < i + 64; j++)
3182 : {
3183 378333 : bool changes;
3184 378333 : if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3185 337328 : continue;
3186 41005 : lowj = ranges[j].low;
3187 41005 : if (lowj == NULL_TREE)
3188 0 : continue;
3189 41005 : highj = ranges[j].high;
3190 41005 : if (highj == NULL_TREE)
3191 144 : highj = TYPE_MAX_VALUE (type);
3192 : /* Check lowj > highi. */
3193 41005 : tem = fold_binary (GT_EXPR, boolean_type_node,
3194 : lowj, highi);
3195 41005 : if (tem == NULL_TREE || !integer_onep (tem))
3196 0 : continue;
3197 41005 : if (optimize_xor)
3198 23046 : changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3199 : highi, highj, ops,
3200 : ranges + i, ranges + j);
3201 : else
3202 17959 : changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3203 : highi, highj, ops,
3204 : ranges + i, ranges + j);
3205 41005 : if (changes)
3206 : {
3207 : any_changes = true;
3208 : break;
3209 : }
3210 : }
3211 : }
3212 2127275 : return any_changes;
3213 : }
3214 :
3215 : /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3216 : range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3217 : EXP on success, NULL otherwise. */
3218 :
3219 : static tree
3220 169337 : extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3221 : wide_int *mask, tree *totallowp)
3222 : {
3223 169337 : tree tem = int_const_binop (MINUS_EXPR, high, low);
3224 169337 : if (tem == NULL_TREE
3225 169337 : || TREE_CODE (tem) != INTEGER_CST
3226 169337 : || TREE_OVERFLOW (tem)
3227 158387 : || tree_int_cst_sgn (tem) == -1
3228 327724 : || compare_tree_int (tem, prec) != -1)
3229 14598 : return NULL_TREE;
3230 :
3231 154739 : unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3232 154739 : *mask = wi::shifted_mask (0, max, false, prec);
3233 154739 : if (TREE_CODE (exp) == BIT_AND_EXPR
3234 154739 : && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3235 : {
3236 4851 : widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3237 4851 : msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3238 4851 : if (wi::popcount (msk) == 1
3239 4851 : && wi::ltu_p (msk, prec - max))
3240 : {
3241 4049 : *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3242 4049 : max += msk.to_uhwi ();
3243 4049 : exp = TREE_OPERAND (exp, 0);
3244 4049 : if (integer_zerop (low)
3245 2113 : && TREE_CODE (exp) == PLUS_EXPR
3246 5811 : && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3247 : {
3248 1762 : tree ret = TREE_OPERAND (exp, 0);
3249 1762 : STRIP_NOPS (ret);
3250 1762 : widest_int bias
3251 1762 : = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3252 3524 : TYPE_PRECISION (TREE_TYPE (low))));
3253 1762 : tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3254 1762 : if (totallowp)
3255 : {
3256 1730 : *totallowp = tbias;
3257 1730 : return ret;
3258 : }
3259 32 : else if (!tree_int_cst_lt (totallow, tbias))
3260 : return NULL_TREE;
3261 32 : bias = wi::to_widest (tbias);
3262 32 : bias -= wi::to_widest (totallow);
3263 32 : if (bias >= 0 && bias < prec - max)
3264 : {
3265 22 : *mask = wi::lshift (*mask, bias);
3266 22 : return ret;
3267 : }
3268 1762 : }
3269 : }
3270 4851 : }
3271 152987 : if (totallowp)
3272 : return exp;
3273 13658 : if (!tree_int_cst_lt (totallow, low))
3274 : return exp;
3275 13636 : tem = int_const_binop (MINUS_EXPR, low, totallow);
3276 13636 : if (tem == NULL_TREE
3277 13636 : || TREE_CODE (tem) != INTEGER_CST
3278 13636 : || TREE_OVERFLOW (tem)
3279 27115 : || compare_tree_int (tem, prec - max) == 1)
3280 4103 : return NULL_TREE;
3281 :
3282 9533 : *mask = wi::lshift (*mask, wi::to_widest (tem));
3283 9533 : return exp;
3284 : }
3285 :
3286 : /* Attempt to optimize small range tests using bit test.
3287 : E.g.
3288 : X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3289 : && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3290 : has been by earlier optimizations optimized into:
3291 : ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3292 : As all the 43 through 82 range is less than 64 numbers,
3293 : for 64-bit word targets optimize that into:
3294 : (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3295 :
3296 : static bool
3297 1063646 : optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3298 : vec<operand_entry *> *ops,
3299 : struct range_entry *ranges)
3300 : {
3301 1063646 : int i, j;
3302 1063646 : bool any_changes = false;
3303 1063646 : int prec = GET_MODE_BITSIZE (word_mode);
3304 1063646 : auto_vec<struct range_entry *, 64> candidates;
3305 :
3306 1493418 : for (i = first; i < length - 1; i++)
3307 : {
3308 429772 : tree lowi, highi, lowj, highj, type;
3309 :
3310 429772 : if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3311 288713 : continue;
3312 173273 : type = TREE_TYPE (ranges[i].exp);
3313 173273 : if (!INTEGRAL_TYPE_P (type))
3314 15311 : continue;
3315 157962 : lowi = ranges[i].low;
3316 157962 : if (lowi == NULL_TREE)
3317 11054 : lowi = TYPE_MIN_VALUE (type);
3318 157962 : highi = ranges[i].high;
3319 157962 : if (highi == NULL_TREE)
3320 2441 : continue;
3321 155521 : wide_int mask;
3322 155521 : tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3323 : highi, &mask, &lowi);
3324 155521 : if (exp == NULL_TREE)
3325 14462 : continue;
3326 141059 : bool strict_overflow_p = ranges[i].strict_overflow_p;
3327 141059 : candidates.truncate (0);
3328 141059 : int end = MIN (i + 64, length);
3329 305293 : for (j = i + 1; j < end; j++)
3330 : {
3331 164234 : tree exp2;
3332 164234 : if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3333 154691 : continue;
3334 94779 : if (ranges[j].exp == exp)
3335 : ;
3336 81238 : else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3337 : {
3338 1242 : exp2 = TREE_OPERAND (ranges[j].exp, 0);
3339 1242 : if (exp2 == exp)
3340 : ;
3341 1011 : else if (TREE_CODE (exp2) == PLUS_EXPR)
3342 : {
3343 810 : exp2 = TREE_OPERAND (exp2, 0);
3344 810 : STRIP_NOPS (exp2);
3345 810 : if (exp2 != exp)
3346 766 : continue;
3347 : }
3348 : else
3349 201 : continue;
3350 : }
3351 : else
3352 79996 : continue;
3353 13816 : lowj = ranges[j].low;
3354 13816 : if (lowj == NULL_TREE)
3355 0 : continue;
3356 13816 : highj = ranges[j].high;
3357 13816 : if (highj == NULL_TREE)
3358 72 : highj = TYPE_MAX_VALUE (TREE_TYPE (lowj));
3359 13816 : wide_int mask2;
3360 13816 : exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3361 : highj, &mask2, NULL);
3362 13816 : if (exp2 != exp)
3363 4273 : continue;
3364 9543 : mask |= mask2;
3365 9543 : strict_overflow_p |= ranges[j].strict_overflow_p;
3366 9543 : candidates.safe_push (&ranges[j]);
3367 13816 : }
3368 :
3369 : /* If every possible relative value of the expression is a valid shift
3370 : amount, then we can merge the entry test in the bit test. In this
3371 : case, if we would need otherwise 2 or more comparisons, then use
3372 : the bit test; in the other cases, the threshold is 3 comparisons. */
3373 141059 : bool entry_test_needed;
3374 141059 : int_range_max r;
3375 282118 : if (TREE_CODE (exp) == SSA_NAME
3376 280628 : && get_range_query (cfun)->range_of_expr (r, exp)
3377 140314 : && !r.undefined_p ()
3378 140314 : && !r.varying_p ()
3379 326218 : && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3380 : {
3381 6199 : wide_int min = r.lower_bound ();
3382 6199 : wide_int ilowi = wi::to_wide (lowi);
3383 6199 : if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3384 : {
3385 490 : lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3386 490 : mask = wi::lshift (mask, ilowi - min);
3387 : }
3388 5709 : else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3389 : {
3390 1 : lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3391 1 : mask = wi::lrshift (mask, min - ilowi);
3392 : }
3393 6199 : entry_test_needed = false;
3394 6199 : }
3395 : else
3396 : entry_test_needed = true;
3397 288317 : if (candidates.length () >= (entry_test_needed ? 2 : 1))
3398 : {
3399 852 : tree high = wide_int_to_tree (TREE_TYPE (lowi),
3400 426 : wi::to_widest (lowi)
3401 1278 : + prec - 1 - wi::clz (mask));
3402 426 : operand_entry *oe = (*ops)[ranges[i].idx];
3403 426 : tree op = oe->op;
3404 426 : gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3405 52 : : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3406 374 : (cfun, oe->id));
3407 426 : location_t loc = gimple_location (stmt);
3408 426 : tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3409 :
3410 : /* See if it isn't cheaper to pretend the minimum value of the
3411 : range is 0, if maximum value is small enough.
3412 : We can avoid then subtraction of the minimum value, but the
3413 : mask constant could be perhaps more expensive. */
3414 426 : if (compare_tree_int (lowi, 0) > 0
3415 340 : && compare_tree_int (high, prec) < 0
3416 866 : && (entry_test_needed || wi::ltu_p (r.upper_bound (), prec)))
3417 : {
3418 139 : int cost_diff;
3419 139 : HOST_WIDE_INT m = tree_to_uhwi (lowi);
3420 139 : rtx reg = gen_raw_REG (word_mode, 10000);
3421 139 : bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3422 139 : cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3423 : GEN_INT (-m)),
3424 : word_mode, speed_p);
3425 139 : rtx r = immed_wide_int_const (mask, word_mode);
3426 139 : cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3427 : word_mode, speed_p);
3428 139 : r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3429 139 : cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3430 : word_mode, speed_p);
3431 139 : if (cost_diff > 0)
3432 : {
3433 58 : mask = wi::lshift (mask, m);
3434 58 : lowi = build_zero_cst (TREE_TYPE (lowi));
3435 : }
3436 : }
3437 :
3438 426 : tree tem;
3439 426 : if (entry_test_needed)
3440 : {
3441 365 : tem = build_range_check (loc, optype, unshare_expr (exp),
3442 : false, lowi, high);
3443 365 : if (tem == NULL_TREE || is_gimple_val (tem))
3444 0 : continue;
3445 : }
3446 : else
3447 61 : tem = NULL_TREE;
3448 426 : tree etype = unsigned_type_for (TREE_TYPE (exp));
3449 426 : exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3450 : fold_convert_loc (loc, etype, exp),
3451 : fold_convert_loc (loc, etype, lowi));
3452 426 : exp = fold_convert_loc (loc, integer_type_node, exp);
3453 426 : tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3454 426 : exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3455 : build_int_cst (word_type, 1), exp);
3456 852 : exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3457 426 : wide_int_to_tree (word_type, mask));
3458 426 : exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3459 : build_zero_cst (word_type));
3460 426 : if (is_gimple_val (exp))
3461 0 : continue;
3462 :
3463 : /* The shift might have undefined behavior if TEM is true,
3464 : but reassociate_bb isn't prepared to have basic blocks
3465 : split when it is running. So, temporarily emit a code
3466 : with BIT_IOR_EXPR instead of &&, and fix it up in
3467 : branch_fixup. */
3468 426 : gimple_seq seq = NULL;
3469 426 : if (tem)
3470 : {
3471 365 : tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3472 365 : gcc_assert (TREE_CODE (tem) == SSA_NAME);
3473 365 : gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3474 : }
3475 426 : gimple_seq seq2;
3476 426 : exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3477 426 : gimple_seq_add_seq_without_update (&seq, seq2);
3478 426 : gcc_assert (TREE_CODE (exp) == SSA_NAME);
3479 426 : gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3480 426 : if (tem)
3481 : {
3482 365 : gimple *g = gimple_build_assign (make_ssa_name (optype),
3483 : BIT_IOR_EXPR, tem, exp);
3484 365 : gimple_set_location (g, loc);
3485 365 : gimple_seq_add_stmt_without_update (&seq, g);
3486 365 : exp = gimple_assign_lhs (g);
3487 : }
3488 426 : tree val = build_zero_cst (optype);
3489 1278 : if (update_range_test (&ranges[i], NULL, candidates.address (),
3490 : candidates.length (), opcode, ops, exp,
3491 : seq, false, val, val, strict_overflow_p))
3492 : {
3493 426 : any_changes = true;
3494 426 : if (tem)
3495 365 : reassoc_branch_fixups.safe_push (tem);
3496 : }
3497 : else
3498 0 : gimple_seq_discard (seq);
3499 : }
3500 155521 : }
3501 1063646 : return any_changes;
3502 1063646 : }
3503 :
3504 : /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3505 : and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3506 : Also, handle x < C && y < C && z < C where C is power of two as
3507 : (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3508 : as (x | y | z) < 0. */
3509 :
3510 : static bool
3511 1063646 : optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3512 : vec<operand_entry *> *ops,
3513 : struct range_entry *ranges)
3514 : {
3515 1063646 : int i;
3516 1063646 : unsigned int b;
3517 1063646 : bool any_changes = false;
3518 1063646 : auto_vec<int, 128> buckets;
3519 1063646 : auto_vec<int, 32> chains;
3520 1063646 : auto_vec<struct range_entry *, 32> candidates;
3521 :
3522 1858148 : for (i = first; i < length; i++)
3523 : {
3524 794502 : int idx;
3525 :
3526 1149753 : if (ranges[i].exp == NULL_TREE
3527 774983 : || TREE_CODE (ranges[i].exp) != SSA_NAME
3528 770445 : || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3529 1233753 : || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3530 355251 : continue;
3531 :
3532 439251 : if (ranges[i].low != NULL_TREE
3533 412551 : && ranges[i].high != NULL_TREE
3534 357373 : && ranges[i].in_p
3535 641234 : && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3536 : {
3537 173934 : idx = !integer_zerop (ranges[i].low);
3538 173934 : if (idx && !integer_all_onesp (ranges[i].low))
3539 95898 : continue;
3540 : }
3541 265317 : else if (ranges[i].high != NULL_TREE
3542 210104 : && TREE_CODE (ranges[i].high) == INTEGER_CST
3543 210104 : && ranges[i].in_p)
3544 : {
3545 37135 : wide_int w = wi::to_wide (ranges[i].high);
3546 37135 : int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3547 37135 : int l = wi::clz (w);
3548 37135 : idx = 2;
3549 100996 : if (l <= 0
3550 37135 : || l >= prec
3551 68197 : || w != wi::mask (prec - l, false, prec))
3552 26726 : continue;
3553 10409 : if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3554 6457 : && ranges[i].low == NULL_TREE)
3555 10409 : || (ranges[i].low
3556 8496 : && integer_zerop (ranges[i].low))))
3557 3531 : continue;
3558 37135 : }
3559 432193 : else if (ranges[i].high == NULL_TREE
3560 55213 : && ranges[i].low != NULL_TREE
3561 : /* Perform this optimization only in the last
3562 : reassoc pass, as it interferes with the reassociation
3563 : itself or could also with VRP etc. which might not
3564 : be able to virtually undo the optimization. */
3565 55178 : && !reassoc_insert_powi_p
3566 27846 : && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3567 255548 : && integer_zerop (ranges[i].low))
3568 : idx = 3;
3569 : else
3570 204011 : continue;
3571 :
3572 109085 : b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3573 109085 : if (buckets.length () <= b)
3574 94610 : buckets.safe_grow_cleared (b + 1, true);
3575 109085 : if (chains.length () <= (unsigned) i)
3576 109085 : chains.safe_grow (i + 1, true);
3577 109085 : chains[i] = buckets[b];
3578 109085 : buckets[b] = i + 1;
3579 : }
3580 :
3581 17094596 : FOR_EACH_VEC_ELT (buckets, b, i)
3582 16030950 : if (i && chains[i - 1])
3583 : {
3584 6640 : int j, k = i;
3585 6640 : if ((b % 4) == 2)
3586 : {
3587 : /* When ranges[X - 1].high + 1 is a power of two,
3588 : we need to process the same bucket up to
3589 : precision - 1 times, each time split the entries
3590 : with the same high bound into one chain and the
3591 : rest into another one to be processed later. */
3592 : int this_prev = i;
3593 : int other_prev = 0;
3594 146 : for (j = chains[i - 1]; j; j = chains[j - 1])
3595 : {
3596 79 : if (tree_int_cst_equal (ranges[i - 1].high,
3597 79 : ranges[j - 1].high))
3598 : {
3599 70 : chains[this_prev - 1] = j;
3600 70 : this_prev = j;
3601 : }
3602 9 : else if (other_prev == 0)
3603 : {
3604 7 : buckets[b] = j;
3605 7 : other_prev = j;
3606 : }
3607 : else
3608 : {
3609 2 : chains[other_prev - 1] = j;
3610 2 : other_prev = j;
3611 : }
3612 : }
3613 67 : chains[this_prev - 1] = 0;
3614 67 : if (other_prev)
3615 7 : chains[other_prev - 1] = 0;
3616 67 : if (chains[i - 1] == 0)
3617 : {
3618 5 : if (other_prev)
3619 5 : b--;
3620 5 : continue;
3621 : }
3622 : }
3623 16112 : for (j = chains[i - 1]; j; j = chains[j - 1])
3624 : {
3625 9477 : gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3626 9477 : gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3627 9477 : if (reassoc_stmt_dominates_stmt_p (gk, gj))
3628 2561 : k = j;
3629 : }
3630 6635 : tree type1 = TREE_TYPE (ranges[k - 1].exp);
3631 6635 : tree type2 = NULL_TREE;
3632 6635 : bool strict_overflow_p = false;
3633 6635 : candidates.truncate (0);
3634 6635 : if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3635 607 : type1 = pointer_sized_int_node;
3636 22747 : for (j = i; j; j = chains[j - 1])
3637 : {
3638 16112 : tree type = TREE_TYPE (ranges[j - 1].exp);
3639 16112 : strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3640 16112 : if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3641 1229 : type = pointer_sized_int_node;
3642 16112 : if ((b % 4) == 3)
3643 : {
3644 : /* For the signed < 0 cases, the types should be
3645 : really compatible (all signed with the same precision,
3646 : instead put ranges that have different in_p from
3647 : k first. */
3648 3613 : if (!useless_type_conversion_p (type1, type))
3649 0 : continue;
3650 3613 : if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3651 1009 : candidates.safe_push (&ranges[j - 1]);
3652 3613 : type2 = type1;
3653 3613 : continue;
3654 : }
3655 12499 : if (j == k
3656 12499 : || useless_type_conversion_p (type1, type))
3657 : ;
3658 423 : else if (type2 == NULL_TREE
3659 423 : || useless_type_conversion_p (type2, type))
3660 : {
3661 423 : if (type2 == NULL_TREE)
3662 415 : type2 = type;
3663 423 : candidates.safe_push (&ranges[j - 1]);
3664 : }
3665 : }
3666 6635 : unsigned l = candidates.length ();
3667 22747 : for (j = i; j; j = chains[j - 1])
3668 : {
3669 16112 : tree type = TREE_TYPE (ranges[j - 1].exp);
3670 16112 : if (j == k)
3671 6635 : continue;
3672 9477 : if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3673 622 : type = pointer_sized_int_node;
3674 9477 : if ((b % 4) == 3)
3675 : {
3676 1997 : if (!useless_type_conversion_p (type1, type))
3677 0 : continue;
3678 1997 : if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3679 988 : candidates.safe_push (&ranges[j - 1]);
3680 1997 : continue;
3681 : }
3682 7480 : if (useless_type_conversion_p (type1, type))
3683 : ;
3684 846 : else if (type2 == NULL_TREE
3685 423 : || useless_type_conversion_p (type2, type))
3686 423 : continue;
3687 7057 : candidates.safe_push (&ranges[j - 1]);
3688 : }
3689 6635 : gimple_seq seq = NULL;
3690 6635 : tree op = NULL_TREE;
3691 6635 : unsigned int id;
3692 6635 : struct range_entry *r;
3693 6635 : candidates.safe_push (&ranges[k - 1]);
3694 22747 : FOR_EACH_VEC_ELT (candidates, id, r)
3695 : {
3696 16112 : gimple *g;
3697 16112 : enum tree_code code;
3698 16112 : if (id == 0)
3699 : {
3700 6635 : op = r->exp;
3701 6635 : continue;
3702 : }
3703 9477 : if (id == l
3704 8053 : || POINTER_TYPE_P (TREE_TYPE (op))
3705 17029 : || TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3706 : {
3707 1930 : code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3708 1930 : tree type3 = id >= l ? type1 : pointer_sized_int_node;
3709 1930 : if (code == BIT_NOT_EXPR
3710 1930 : && TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3711 : {
3712 0 : g = gimple_build_assign (make_ssa_name (type3),
3713 : NOP_EXPR, op);
3714 0 : gimple_seq_add_stmt_without_update (&seq, g);
3715 0 : op = gimple_assign_lhs (g);
3716 : }
3717 1930 : g = gimple_build_assign (make_ssa_name (type3), code, op);
3718 1930 : gimple_seq_add_stmt_without_update (&seq, g);
3719 1930 : op = gimple_assign_lhs (g);
3720 : }
3721 9477 : tree type = TREE_TYPE (r->exp);
3722 9477 : tree exp = r->exp;
3723 9477 : if (POINTER_TYPE_P (type)
3724 8833 : || TREE_CODE (type) == OFFSET_TYPE
3725 18305 : || (id >= l && !useless_type_conversion_p (type1, type)))
3726 : {
3727 649 : tree type3 = id >= l ? type1 : pointer_sized_int_node;
3728 649 : g = gimple_build_assign (make_ssa_name (type3), NOP_EXPR, exp);
3729 649 : gimple_seq_add_stmt_without_update (&seq, g);
3730 649 : exp = gimple_assign_lhs (g);
3731 : }
3732 9477 : if ((b % 4) == 3)
3733 3271 : code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3734 : else
3735 7480 : code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3736 18954 : g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3737 : code, op, exp);
3738 9477 : gimple_seq_add_stmt_without_update (&seq, g);
3739 9477 : op = gimple_assign_lhs (g);
3740 : }
3741 6635 : type1 = TREE_TYPE (ranges[k - 1].exp);
3742 6635 : if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3743 : {
3744 607 : gimple *g
3745 607 : = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3746 607 : gimple_seq_add_stmt_without_update (&seq, g);
3747 607 : op = gimple_assign_lhs (g);
3748 : }
3749 6635 : candidates.pop ();
3750 6635 : if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3751 : candidates.length (), opcode, ops, op,
3752 6635 : seq, ranges[k - 1].in_p, ranges[k - 1].low,
3753 : ranges[k - 1].high, strict_overflow_p))
3754 : any_changes = true;
3755 : else
3756 0 : gimple_seq_discard (seq);
3757 6697 : if ((b % 4) == 2 && buckets[b] != i)
3758 : /* There is more work to do for this bucket. */
3759 2 : b--;
3760 : }
3761 :
3762 1063646 : return any_changes;
3763 1063646 : }
3764 :
3765 : /* Attempt to optimize for signed a and b where b is known to be >= 0:
3766 : a >= 0 && a < b into (unsigned) a < (unsigned) b
3767 : a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3768 :
3769 : static bool
3770 1063646 : optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3771 : vec<operand_entry *> *ops,
3772 : struct range_entry *ranges,
3773 : basic_block first_bb)
3774 : {
3775 1063646 : int i;
3776 1063646 : bool any_changes = false;
3777 1063646 : hash_map<tree, int> *map = NULL;
3778 :
3779 1858148 : for (i = first; i < length; i++)
3780 : {
3781 794502 : if (ranges[i].exp == NULL_TREE
3782 775508 : || TREE_CODE (ranges[i].exp) != SSA_NAME
3783 770970 : || !ranges[i].in_p)
3784 321402 : continue;
3785 :
3786 473100 : tree type = TREE_TYPE (ranges[i].exp);
3787 900204 : if (!INTEGRAL_TYPE_P (type)
3788 464670 : || TYPE_UNSIGNED (type)
3789 176089 : || ranges[i].low == NULL_TREE
3790 167003 : || !integer_zerop (ranges[i].low)
3791 546836 : || ranges[i].high != NULL_TREE)
3792 427104 : continue;
3793 : /* EXP >= 0 here. */
3794 45996 : if (map == NULL)
3795 44408 : map = new hash_map <tree, int>;
3796 45996 : map->put (ranges[i].exp, i);
3797 : }
3798 :
3799 1063646 : if (map == NULL)
3800 : return false;
3801 :
3802 136872 : for (i = 0; i < length; i++)
3803 : {
3804 92464 : bool in_p = ranges[i].in_p;
3805 92464 : if (ranges[i].low == NULL_TREE
3806 91752 : || ranges[i].high == NULL_TREE)
3807 91939 : continue;
3808 43962 : if (!integer_zerop (ranges[i].low)
3809 43962 : || !integer_zerop (ranges[i].high))
3810 : {
3811 8784 : if (ranges[i].exp
3812 4392 : && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3813 0 : && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3814 0 : && integer_onep (ranges[i].low)
3815 4392 : && integer_onep (ranges[i].high))
3816 0 : in_p = !in_p;
3817 : else
3818 4392 : continue;
3819 : }
3820 :
3821 39570 : gimple *stmt;
3822 39570 : tree_code ccode;
3823 39570 : tree rhs1, rhs2;
3824 39570 : if (ranges[i].exp)
3825 : {
3826 38561 : if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3827 5 : continue;
3828 38556 : stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3829 38556 : if (!is_gimple_assign (stmt))
3830 862 : continue;
3831 37694 : ccode = gimple_assign_rhs_code (stmt);
3832 37694 : rhs1 = gimple_assign_rhs1 (stmt);
3833 37694 : rhs2 = gimple_assign_rhs2 (stmt);
3834 : }
3835 : else
3836 : {
3837 1009 : operand_entry *oe = (*ops)[ranges[i].idx];
3838 1009 : stmt = last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3839 1009 : if (gimple_code (stmt) != GIMPLE_COND)
3840 0 : continue;
3841 1009 : ccode = gimple_cond_code (stmt);
3842 1009 : rhs1 = gimple_cond_lhs (stmt);
3843 1009 : rhs2 = gimple_cond_rhs (stmt);
3844 : }
3845 :
3846 38703 : if (TREE_CODE (rhs1) != SSA_NAME
3847 38177 : || rhs2 == NULL_TREE
3848 38097 : || TREE_CODE (rhs2) != SSA_NAME)
3849 687 : continue;
3850 :
3851 38016 : switch (ccode)
3852 : {
3853 36904 : case GT_EXPR:
3854 36904 : case GE_EXPR:
3855 36904 : case LT_EXPR:
3856 36904 : case LE_EXPR:
3857 36904 : break;
3858 1112 : default:
3859 1112 : continue;
3860 : }
3861 36904 : if (in_p)
3862 801 : ccode = invert_tree_comparison (ccode, false);
3863 36904 : switch (ccode)
3864 : {
3865 15032 : case GT_EXPR:
3866 15032 : case GE_EXPR:
3867 15032 : std::swap (rhs1, rhs2);
3868 15032 : ccode = swap_tree_comparison (ccode);
3869 15032 : break;
3870 : case LT_EXPR:
3871 : case LE_EXPR:
3872 : break;
3873 0 : default:
3874 0 : gcc_unreachable ();
3875 : }
3876 :
3877 36904 : int *idx = map->get (rhs1);
3878 36904 : if (idx == NULL)
3879 930 : continue;
3880 :
3881 : /* maybe_optimize_range_tests allows statements without side-effects
3882 : in the basic blocks as long as they are consumed in the same bb.
3883 : Make sure rhs2's def stmt is not among them, otherwise we can't
3884 : use safely get_nonzero_bits on it. E.g. in:
3885 : # RANGE [-83, 1] NONZERO 173
3886 : # k_32 = PHI <k_47(13), k_12(9)>
3887 : ...
3888 : if (k_32 >= 0)
3889 : goto <bb 5>; [26.46%]
3890 : else
3891 : goto <bb 9>; [73.54%]
3892 :
3893 : <bb 5> [local count: 140323371]:
3894 : # RANGE [0, 1] NONZERO 1
3895 : _5 = (int) k_32;
3896 : # RANGE [0, 4] NONZERO 4
3897 : _21 = _5 << 2;
3898 : # RANGE [0, 4] NONZERO 4
3899 : iftmp.0_44 = (char) _21;
3900 : if (k_32 < iftmp.0_44)
3901 : goto <bb 6>; [84.48%]
3902 : else
3903 : goto <bb 9>; [15.52%]
3904 : the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3905 : k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3906 : to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3907 : those stmts even for negative k_32 and the value ranges would be no
3908 : longer guaranteed and so the optimization would be invalid. */
3909 35974 : while (opcode == ERROR_MARK)
3910 : {
3911 605 : gimple *g = SSA_NAME_DEF_STMT (rhs2);
3912 605 : basic_block bb2 = gimple_bb (g);
3913 605 : if (bb2
3914 605 : && bb2 != first_bb
3915 605 : && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3916 : {
3917 : /* As an exception, handle a few common cases. */
3918 512 : if (gimple_assign_cast_p (g)
3919 512 : && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3920 : {
3921 40 : tree op0 = gimple_assign_rhs1 (g);
3922 40 : if (TYPE_UNSIGNED (TREE_TYPE (op0))
3923 40 : && (TYPE_PRECISION (TREE_TYPE (rhs2))
3924 10 : > TYPE_PRECISION (TREE_TYPE (op0))))
3925 : /* Zero-extension is always ok. */
3926 : break;
3927 30 : else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3928 30 : == TYPE_PRECISION (TREE_TYPE (op0))
3929 30 : && TREE_CODE (op0) == SSA_NAME)
3930 : {
3931 : /* Cast from signed to unsigned or vice versa. Retry
3932 : with the op0 as new rhs2. */
3933 0 : rhs2 = op0;
3934 0 : continue;
3935 : }
3936 : }
3937 472 : else if (is_gimple_assign (g)
3938 472 : && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3939 0 : && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3940 944 : && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3941 : /* Masking with INTEGER_CST with MSB clear is always ok
3942 : too. */
3943 : break;
3944 : rhs2 = NULL_TREE;
3945 : }
3946 : break;
3947 : }
3948 35472 : if (rhs2 == NULL_TREE)
3949 502 : continue;
3950 :
3951 35997 : wide_int nz = get_nonzero_bits (rhs2);
3952 35472 : if (wi::neg_p (nz))
3953 34947 : continue;
3954 :
3955 : /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3956 : and RHS2 is known to be RHS2 >= 0. */
3957 525 : tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3958 :
3959 525 : enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3960 525 : if ((ranges[*idx].strict_overflow_p
3961 525 : || ranges[i].strict_overflow_p)
3962 0 : && issue_strict_overflow_warning (wc))
3963 0 : warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3964 : "assuming signed overflow does not occur "
3965 : "when simplifying range test");
3966 :
3967 525 : if (dump_file && (dump_flags & TDF_DETAILS))
3968 : {
3969 7 : struct range_entry *r = &ranges[*idx];
3970 7 : fprintf (dump_file, "Optimizing range test ");
3971 7 : print_generic_expr (dump_file, r->exp);
3972 7 : fprintf (dump_file, " +[");
3973 7 : print_generic_expr (dump_file, r->low);
3974 7 : fprintf (dump_file, ", ");
3975 7 : print_generic_expr (dump_file, r->high);
3976 7 : fprintf (dump_file, "] and comparison ");
3977 7 : print_generic_expr (dump_file, rhs1);
3978 7 : fprintf (dump_file, " %s ", op_symbol_code (ccode));
3979 7 : print_generic_expr (dump_file, rhs2);
3980 7 : fprintf (dump_file, "\n into (");
3981 7 : print_generic_expr (dump_file, utype);
3982 7 : fprintf (dump_file, ") ");
3983 7 : print_generic_expr (dump_file, rhs1);
3984 7 : fprintf (dump_file, " %s (", op_symbol_code (ccode));
3985 7 : print_generic_expr (dump_file, utype);
3986 7 : fprintf (dump_file, ") ");
3987 7 : print_generic_expr (dump_file, rhs2);
3988 7 : fprintf (dump_file, "\n");
3989 : }
3990 :
3991 525 : operand_entry *oe = (*ops)[ranges[i].idx];
3992 525 : ranges[i].in_p = 0;
3993 525 : if (opcode == BIT_IOR_EXPR
3994 486 : || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3995 : {
3996 40 : ranges[i].in_p = 1;
3997 40 : ccode = invert_tree_comparison (ccode, false);
3998 : }
3999 :
4000 525 : unsigned int uid = gimple_uid (stmt);
4001 525 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4002 525 : gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
4003 525 : gimple_set_uid (g, uid);
4004 525 : rhs1 = gimple_assign_lhs (g);
4005 525 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4006 525 : if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
4007 : {
4008 525 : g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
4009 525 : gimple_set_uid (g, uid);
4010 525 : rhs2 = gimple_assign_lhs (g);
4011 525 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4012 : }
4013 525 : if (tree_swap_operands_p (rhs1, rhs2))
4014 : {
4015 478 : std::swap (rhs1, rhs2);
4016 478 : ccode = swap_tree_comparison (ccode);
4017 : }
4018 525 : if (gimple_code (stmt) == GIMPLE_COND)
4019 : {
4020 8 : gcond *c = as_a <gcond *> (stmt);
4021 8 : gimple_cond_set_code (c, ccode);
4022 8 : gimple_cond_set_lhs (c, rhs1);
4023 8 : gimple_cond_set_rhs (c, rhs2);
4024 8 : update_stmt (stmt);
4025 : }
4026 : else
4027 : {
4028 517 : tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
4029 517 : if (!INTEGRAL_TYPE_P (ctype)
4030 517 : || (TREE_CODE (ctype) != BOOLEAN_TYPE
4031 3 : && TYPE_PRECISION (ctype) != 1))
4032 3 : ctype = boolean_type_node;
4033 517 : g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
4034 517 : gimple_set_uid (g, uid);
4035 517 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4036 517 : if (oe->op && ctype != TREE_TYPE (oe->op))
4037 : {
4038 3 : g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
4039 : NOP_EXPR, gimple_assign_lhs (g));
4040 3 : gimple_set_uid (g, uid);
4041 3 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4042 : }
4043 517 : ranges[i].exp = gimple_assign_lhs (g);
4044 517 : oe->op = ranges[i].exp;
4045 517 : ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
4046 517 : ranges[i].high = ranges[i].low;
4047 : }
4048 525 : ranges[i].strict_overflow_p = false;
4049 525 : oe = (*ops)[ranges[*idx].idx];
4050 : /* Now change all the other range test immediate uses, so that
4051 : those tests will be optimized away. */
4052 525 : if (opcode == ERROR_MARK)
4053 : {
4054 11 : if (oe->op)
4055 3 : oe->op = build_int_cst (TREE_TYPE (oe->op),
4056 3 : oe->rank == BIT_IOR_EXPR ? 0 : 1);
4057 : else
4058 8 : oe->op = (oe->rank == BIT_IOR_EXPR
4059 8 : ? boolean_false_node : boolean_true_node);
4060 : }
4061 : else
4062 514 : oe->op = error_mark_node;
4063 525 : ranges[*idx].exp = NULL_TREE;
4064 525 : ranges[*idx].low = NULL_TREE;
4065 525 : ranges[*idx].high = NULL_TREE;
4066 525 : any_changes = true;
4067 : }
4068 :
4069 44408 : delete map;
4070 44408 : return any_changes;
4071 : }
4072 :
4073 : /* Optimize range tests, similarly how fold_range_test optimizes
4074 : it on trees. The tree code for the binary
4075 : operation between all the operands is OPCODE.
4076 : If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4077 : maybe_optimize_range_tests for inter-bb range optimization.
4078 : In that case if oe->op is NULL, oe->id is bb->index whose
4079 : GIMPLE_COND is && or ||ed into the test, and oe->rank says
4080 : the actual opcode.
4081 : FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4082 :
4083 : static bool
4084 1063802 : optimize_range_tests (enum tree_code opcode,
4085 : vec<operand_entry *> *ops, basic_block first_bb)
4086 : {
4087 1063802 : unsigned int length = ops->length (), i, j, first;
4088 1063802 : operand_entry *oe;
4089 1063802 : struct range_entry *ranges;
4090 2127448 : bool any_changes = false;
4091 :
4092 1063802 : if (length == 1)
4093 : return false;
4094 :
4095 1063646 : ranges = XNEWVEC (struct range_entry, length);
4096 4381905 : for (i = 0; i < length; i++)
4097 : {
4098 2254613 : oe = (*ops)[i];
4099 2254613 : ranges[i].idx = i;
4100 2254613 : init_range_entry (ranges + i, oe->op,
4101 2254613 : oe->op
4102 : ? NULL
4103 270675 : : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
4104 : /* For | invert it now, we will invert it again before emitting
4105 : the optimized expression. */
4106 2254613 : if (opcode == BIT_IOR_EXPR
4107 1579204 : || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4108 870494 : ranges[i].in_p = !ranges[i].in_p;
4109 : }
4110 :
4111 1063646 : qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
4112 3587403 : for (i = 0; i < length; i++)
4113 1824841 : if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
4114 : break;
4115 :
4116 : /* Try to merge ranges. */
4117 1844740 : for (first = i; i < length; i++)
4118 : {
4119 781094 : tree low = ranges[i].low;
4120 781094 : tree high = ranges[i].high;
4121 781094 : int in_p = ranges[i].in_p;
4122 781094 : bool strict_overflow_p = ranges[i].strict_overflow_p;
4123 781094 : int update_fail_count = 0;
4124 :
4125 794502 : for (j = i + 1; j < length; j++)
4126 : {
4127 429772 : if (ranges[i].exp != ranges[j].exp)
4128 : break;
4129 34554 : if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
4130 34554 : ranges[j].in_p, ranges[j].low, ranges[j].high))
4131 : break;
4132 13408 : strict_overflow_p |= ranges[j].strict_overflow_p;
4133 : }
4134 :
4135 781094 : if (j == i + 1)
4136 768271 : continue;
4137 :
4138 12823 : if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4139 : opcode, ops, ranges[i].exp, NULL, in_p,
4140 : low, high, strict_overflow_p))
4141 : {
4142 12823 : i = j - 1;
4143 12823 : any_changes = true;
4144 : }
4145 : /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4146 : while update_range_test would fail. */
4147 : else if (update_fail_count == 64)
4148 : i = j - 1;
4149 : else
4150 12823 : ++update_fail_count;
4151 : }
4152 :
4153 1063646 : any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4154 : ops, ranges);
4155 :
4156 1063646 : if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4157 1063629 : any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4158 : ops, ranges);
4159 1063646 : if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4160 1063646 : any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4161 : ops, ranges);
4162 1063646 : any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4163 : ranges, first_bb);
4164 1063646 : any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4165 : ops, ranges);
4166 :
4167 1063646 : if (any_changes && opcode != ERROR_MARK)
4168 : {
4169 : j = 0;
4170 35303 : FOR_EACH_VEC_ELT (*ops, i, oe)
4171 : {
4172 24576 : if (oe->op == error_mark_node)
4173 11987 : continue;
4174 12589 : else if (i != j)
4175 5159 : (*ops)[j] = oe;
4176 12589 : j++;
4177 : }
4178 10727 : ops->truncate (j);
4179 : }
4180 :
4181 1063646 : XDELETEVEC (ranges);
4182 1063646 : return any_changes;
4183 : }
4184 :
4185 : /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4186 : the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4187 : otherwise the comparison code. TYPE is a return value that is set
4188 : to type of comparison. */
4189 :
4190 : static tree_code
4191 49619 : ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4192 : tree *lhs, tree *rhs, gassign **vcond)
4193 : {
4194 49619 : if (TREE_CODE (var) != SSA_NAME)
4195 : return ERROR_MARK;
4196 :
4197 44464 : gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4198 31104 : if (stmt == NULL)
4199 : return ERROR_MARK;
4200 31104 : if (vcond)
4201 31104 : *vcond = stmt;
4202 :
4203 : /* ??? If we start creating more COND_EXPR, we could perform
4204 : this same optimization with them. For now, simplify. */
4205 41504 : if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4206 : return ERROR_MARK;
4207 :
4208 1240 : tree cond = gimple_assign_rhs1 (stmt);
4209 1240 : tree_code cmp = TREE_CODE (cond);
4210 1240 : if (cmp != SSA_NAME)
4211 : return ERROR_MARK;
4212 :
4213 50815 : gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4214 1214 : if (assign == NULL
4215 1214 : || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4216 : return ERROR_MARK;
4217 :
4218 1104 : cmp = gimple_assign_rhs_code (assign);
4219 1104 : if (lhs)
4220 1104 : *lhs = gimple_assign_rhs1 (assign);
4221 1104 : if (rhs)
4222 2208 : *rhs = gimple_assign_rhs2 (assign);
4223 :
4224 : /* ??? For now, allow only canonical true and false result vectors.
4225 : We could expand this to other constants should the need arise,
4226 : but at the moment we don't create them. */
4227 1104 : tree t = gimple_assign_rhs2 (stmt);
4228 1104 : tree f = gimple_assign_rhs3 (stmt);
4229 1104 : bool inv;
4230 1104 : if (integer_all_onesp (t))
4231 : inv = false;
4232 1068 : else if (integer_all_onesp (f))
4233 : {
4234 1 : cmp = invert_tree_comparison (cmp, false);
4235 1 : inv = true;
4236 : }
4237 : else
4238 : return ERROR_MARK;
4239 37 : if (!integer_zerop (f))
4240 : return ERROR_MARK;
4241 :
4242 : /* Success! */
4243 18 : if (rets)
4244 18 : *rets = assign;
4245 18 : if (reti)
4246 18 : *reti = inv;
4247 18 : if (type)
4248 18 : *type = TREE_TYPE (cond);
4249 : return cmp;
4250 : }
4251 :
4252 : /* Optimize the condition of VEC_COND_EXPRs which have been combined
4253 : with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4254 :
4255 : static bool
4256 23257 : optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4257 : {
4258 23257 : unsigned int length = ops->length (), i, j;
4259 23257 : bool any_changes = false;
4260 :
4261 23257 : if (length == 1)
4262 : return false;
4263 :
4264 72855 : for (i = 0; i < length; ++i)
4265 : {
4266 49601 : tree elt0 = (*ops)[i]->op;
4267 :
4268 49601 : gassign *stmt0, *vcond0;
4269 49601 : bool invert;
4270 49601 : tree type, lhs0, rhs0;
4271 49601 : tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4272 : &rhs0, &vcond0);
4273 49601 : if (cmp0 == ERROR_MARK)
4274 49583 : continue;
4275 :
4276 36 : for (j = i + 1; j < length; ++j)
4277 : {
4278 18 : tree &elt1 = (*ops)[j]->op;
4279 :
4280 18 : gassign *stmt1, *vcond1;
4281 18 : tree lhs1, rhs1;
4282 18 : tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4283 : &rhs1, &vcond1);
4284 18 : if (cmp1 == ERROR_MARK)
4285 18 : continue;
4286 :
4287 0 : tree comb;
4288 0 : if (opcode == BIT_AND_EXPR)
4289 0 : comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4290 : cmp1, lhs1, rhs1);
4291 0 : else if (opcode == BIT_IOR_EXPR)
4292 0 : comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4293 : cmp1, lhs1, rhs1);
4294 : else
4295 0 : gcc_unreachable ();
4296 0 : if (comb == NULL)
4297 0 : continue;
4298 :
4299 : /* Success! */
4300 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4301 : {
4302 0 : fprintf (dump_file, "Transforming ");
4303 0 : print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4304 0 : fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4305 0 : print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4306 0 : fprintf (dump_file, " into ");
4307 0 : print_generic_expr (dump_file, comb);
4308 0 : fputc ('\n', dump_file);
4309 : }
4310 :
4311 0 : gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4312 0 : tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4313 : true, GSI_SAME_STMT);
4314 0 : if (invert)
4315 0 : swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4316 : gimple_assign_rhs3_ptr (vcond0));
4317 0 : gimple_assign_set_rhs1 (vcond0, exp);
4318 0 : update_stmt (vcond0);
4319 :
4320 0 : elt1 = error_mark_node;
4321 0 : any_changes = true;
4322 : }
4323 : }
4324 :
4325 23254 : if (any_changes)
4326 : {
4327 : operand_entry *oe;
4328 : j = 0;
4329 0 : FOR_EACH_VEC_ELT (*ops, i, oe)
4330 : {
4331 0 : if (oe->op == error_mark_node)
4332 0 : continue;
4333 0 : else if (i != j)
4334 0 : (*ops)[j] = oe;
4335 0 : j++;
4336 : }
4337 0 : ops->truncate (j);
4338 : }
4339 :
4340 : return any_changes;
4341 : }
4342 :
4343 : /* Return true if STMT is a cast like:
4344 : <bb N>:
4345 : ...
4346 : _123 = (int) _234;
4347 :
4348 : <bb M>:
4349 : # _345 = PHI <_123(N), 1(...), 1(...)>
4350 : where _234 has bool type, _123 has single use and
4351 : bb N has a single successor M. This is commonly used in
4352 : the last block of a range test.
4353 :
4354 : Also Return true if STMT is tcc_compare like:
4355 : <bb N>:
4356 : ...
4357 : _234 = a_2(D) == 2;
4358 :
4359 : <bb M>:
4360 : # _345 = PHI <_234(N), 1(...), 1(...)>
4361 : _346 = (int) _345;
4362 : where _234 has booltype, single use and
4363 : bb N has a single successor M. This is commonly used in
4364 : the last block of a range test. */
4365 :
4366 : static bool
4367 15483344 : final_range_test_p (gimple *stmt)
4368 : {
4369 15483344 : basic_block bb, rhs_bb, lhs_bb;
4370 15483344 : edge e;
4371 15483344 : tree lhs, rhs;
4372 15483344 : use_operand_p use_p;
4373 15483344 : gimple *use_stmt;
4374 :
4375 15483344 : if (!gimple_assign_cast_p (stmt)
4376 15483344 : && (!is_gimple_assign (stmt)
4377 4903425 : || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4378 : != tcc_comparison)))
4379 : return false;
4380 568199 : bb = gimple_bb (stmt);
4381 15848510 : if (!single_succ_p (bb))
4382 : return false;
4383 567916 : e = single_succ_edge (bb);
4384 567916 : if (e->flags & EDGE_COMPLEX)
4385 : return false;
4386 :
4387 567916 : lhs = gimple_assign_lhs (stmt);
4388 567916 : rhs = gimple_assign_rhs1 (stmt);
4389 567916 : if (gimple_assign_cast_p (stmt)
4390 567916 : && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4391 396942 : || TREE_CODE (rhs) != SSA_NAME
4392 379899 : || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4393 : return false;
4394 :
4395 210344 : if (!gimple_assign_cast_p (stmt)
4396 210344 : && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4397 : return false;
4398 :
4399 : /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4400 210310 : if (!single_imm_use (lhs, &use_p, &use_stmt))
4401 : return false;
4402 :
4403 204395 : if (gimple_code (use_stmt) != GIMPLE_PHI
4404 204395 : || gimple_bb (use_stmt) != e->dest)
4405 : return false;
4406 :
4407 : /* And that the rhs is defined in the same loop. */
4408 202793 : if (gimple_assign_cast_p (stmt))
4409 : {
4410 71919 : if (TREE_CODE (rhs) != SSA_NAME
4411 71919 : || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4412 143832 : || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4413 43 : return false;
4414 : }
4415 : else
4416 : {
4417 130874 : if (TREE_CODE (lhs) != SSA_NAME
4418 130874 : || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4419 261748 : || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4420 0 : return false;
4421 : }
4422 :
4423 : return true;
4424 : }
4425 :
4426 : /* Return true if BB is suitable basic block for inter-bb range test
4427 : optimization. If BACKWARD is true, BB should be the only predecessor
4428 : of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4429 : or compared with to find a common basic block to which all conditions
4430 : branch to if true resp. false. If BACKWARD is false, TEST_BB should
4431 : be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4432 : TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4433 : block points to an empty block that falls through into *OTHER_BB and
4434 : the phi args match that path. */
4435 :
4436 : static bool
4437 11455661 : suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4438 : bool *test_swapped_p, bool backward)
4439 : {
4440 11455661 : edge_iterator ei, ei2;
4441 11455661 : edge e, e2;
4442 11455661 : gimple *stmt;
4443 11455661 : gphi_iterator gsi;
4444 11455661 : bool other_edge_seen = false;
4445 11455661 : bool is_cond;
4446 :
4447 11455661 : if (test_bb == bb)
4448 : return false;
4449 : /* Check last stmt first. */
4450 11455661 : stmt = last_nondebug_stmt (bb);
4451 11455661 : if (stmt == NULL
4452 10592416 : || (gimple_code (stmt) != GIMPLE_COND
4453 528971 : && (backward || !final_range_test_p (stmt)))
4454 10103425 : || gimple_visited_p (stmt)
4455 10042203 : || stmt_could_throw_p (cfun, stmt)
4456 21497738 : || *other_bb == bb)
4457 1413587 : return false;
4458 10042074 : is_cond = gimple_code (stmt) == GIMPLE_COND;
4459 10042074 : if (is_cond)
4460 : {
4461 : /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4462 : goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4463 : to *OTHER_BB (if not set yet, try to find it out). */
4464 19081129 : if (EDGE_COUNT (bb->succs) != 2)
4465 : return false;
4466 19734455 : FOR_EACH_EDGE (e, ei, bb->succs)
4467 : {
4468 16294885 : if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4469 : return false;
4470 16294885 : if (e->dest == test_bb)
4471 : {
4472 5428810 : if (backward)
4473 5426380 : continue;
4474 : else
4475 : return false;
4476 : }
4477 10866075 : if (e->dest == bb)
4478 : return false;
4479 10712808 : if (*other_bb == NULL)
4480 : {
4481 26271999 : FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4482 17514666 : if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4483 : return false;
4484 17514666 : else if (e->dest == e2->dest)
4485 2390199 : *other_bb = e->dest;
4486 8757333 : if (*other_bb == NULL)
4487 : return false;
4488 : }
4489 4345674 : if (e->dest == *other_bb)
4490 : other_edge_seen = true;
4491 946353 : else if (backward)
4492 : return false;
4493 : }
4494 3439570 : if (*other_bb == NULL || !other_edge_seen)
4495 : return false;
4496 : }
4497 39854 : else if (single_succ (bb) != *other_bb)
4498 : return false;
4499 :
4500 : /* Now check all PHIs of *OTHER_BB. */
4501 3438431 : e = find_edge (bb, *other_bb);
4502 3438431 : e2 = find_edge (test_bb, *other_bb);
4503 3445259 : retry:;
4504 5053213 : for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4505 : {
4506 2676461 : gphi *phi = gsi.phi ();
4507 : /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4508 : corresponding to BB and TEST_BB predecessor must be the same. */
4509 2676461 : if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4510 2676461 : gimple_phi_arg_def (phi, e2->dest_idx), 0))
4511 : {
4512 : /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4513 : one of the PHIs should have the lhs of the last stmt in
4514 : that block as PHI arg and that PHI should have 0 or 1
4515 : corresponding to it in all other range test basic blocks
4516 : considered. */
4517 1137196 : if (!is_cond)
4518 : {
4519 41730 : if (gimple_phi_arg_def (phi, e->dest_idx)
4520 41730 : == gimple_assign_lhs (stmt)
4521 41730 : && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4522 18150 : || integer_onep (gimple_phi_arg_def (phi,
4523 18150 : e2->dest_idx))))
4524 37199 : continue;
4525 : }
4526 : else
4527 : {
4528 1095466 : gimple *test_last = last_nondebug_stmt (test_bb);
4529 1095466 : if (gimple_code (test_last) == GIMPLE_COND)
4530 : {
4531 1059488 : if (backward ? e2->src != test_bb : e->src != bb)
4532 : return false;
4533 :
4534 : /* For last_bb, handle also:
4535 : if (x_3(D) == 3)
4536 : goto <bb 6>; [34.00%]
4537 : else
4538 : goto <bb 7>; [66.00%]
4539 :
4540 : <bb 6> [local count: 79512730]:
4541 :
4542 : <bb 7> [local count: 1073741824]:
4543 : # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4544 : where bb 7 is *OTHER_BB, but the PHI values from the
4545 : earlier bbs match the path through the empty bb
4546 : in between. */
4547 1054602 : edge e3;
4548 1054602 : if (backward)
4549 1400073 : e3 = EDGE_SUCC (test_bb,
4550 : e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4551 : else
4552 21978 : e3 = EDGE_SUCC (bb,
4553 : e == EDGE_SUCC (bb, 0) ? 1 : 0);
4554 1054602 : if (empty_block_p (e3->dest)
4555 36019 : && single_succ_p (e3->dest)
4556 36019 : && single_succ (e3->dest) == *other_bb
4557 1091398 : && single_pred_p (e3->dest)
4558 1089207 : && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4559 : {
4560 6828 : if (backward)
4561 6165 : e2 = single_succ_edge (e3->dest);
4562 : else
4563 663 : e = single_succ_edge (e3->dest);
4564 6828 : if (test_swapped_p)
4565 292 : *test_swapped_p = true;
4566 6828 : goto retry;
4567 : }
4568 : }
4569 35978 : else if (gimple_phi_arg_def (phi, e2->dest_idx)
4570 35978 : == gimple_assign_lhs (test_last)
4571 68520 : && (integer_zerop (gimple_phi_arg_def (phi,
4572 32542 : e->dest_idx))
4573 14960 : || integer_onep (gimple_phi_arg_def (phi,
4574 14960 : e->dest_idx))))
4575 31490 : continue;
4576 : }
4577 :
4578 1056793 : return false;
4579 : }
4580 : }
4581 : return true;
4582 : }
4583 :
4584 : /* Return true if BB doesn't have side-effects that would disallow
4585 : range test optimization, all SSA_NAMEs set in the bb are consumed
4586 : in the bb and there are no PHIs. */
4587 :
4588 : bool
4589 5302046 : no_side_effect_bb (basic_block bb)
4590 : {
4591 5302046 : gimple_stmt_iterator gsi;
4592 5302046 : gimple *last;
4593 :
4594 5302046 : if (!gimple_seq_empty_p (phi_nodes (bb)))
4595 : return false;
4596 4145846 : last = last_nondebug_stmt (bb);
4597 14021722 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4598 : {
4599 9875876 : gimple *stmt = gsi_stmt (gsi);
4600 9875876 : tree lhs;
4601 9875876 : imm_use_iterator imm_iter;
4602 9875876 : use_operand_p use_p;
4603 :
4604 9875876 : if (is_gimple_debug (stmt))
4605 4074107 : continue;
4606 5801769 : if (gimple_has_side_effects (stmt))
4607 4145846 : return false;
4608 4899567 : if (stmt == last)
4609 : return true;
4610 3928038 : if (!is_gimple_assign (stmt))
4611 : return false;
4612 3275656 : lhs = gimple_assign_lhs (stmt);
4613 3275656 : if (TREE_CODE (lhs) != SSA_NAME)
4614 : return false;
4615 3038369 : if (gimple_assign_rhs_could_trap_p (stmt))
4616 : return false;
4617 6154876 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4618 : {
4619 2379763 : gimple *use_stmt = USE_STMT (use_p);
4620 2379763 : if (is_gimple_debug (use_stmt))
4621 160003 : continue;
4622 2219760 : if (gimple_bb (use_stmt) != bb)
4623 463267 : return false;
4624 2119190 : }
4625 : }
4626 : return false;
4627 : }
4628 :
4629 : /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4630 : return true and fill in *OPS recursively. */
4631 :
4632 : static bool
4633 103474 : get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4634 : class loop *loop)
4635 : {
4636 103474 : gimple *stmt = SSA_NAME_DEF_STMT (var);
4637 103474 : tree rhs[2];
4638 103474 : int i;
4639 :
4640 103474 : if (!is_reassociable_op (stmt, code, loop))
4641 : return false;
4642 :
4643 23944 : rhs[0] = gimple_assign_rhs1 (stmt);
4644 23944 : rhs[1] = gimple_assign_rhs2 (stmt);
4645 23944 : gimple_set_visited (stmt, true);
4646 71832 : for (i = 0; i < 2; i++)
4647 47888 : if (TREE_CODE (rhs[i]) == SSA_NAME
4648 47888 : && !get_ops (rhs[i], code, ops, loop)
4649 87191 : && has_single_use (rhs[i]))
4650 : {
4651 38605 : operand_entry *oe = operand_entry_pool.allocate ();
4652 :
4653 38605 : oe->op = rhs[i];
4654 38605 : oe->rank = code;
4655 38605 : oe->id = 0;
4656 38605 : oe->count = 1;
4657 38605 : oe->stmt_to_insert = NULL;
4658 38605 : ops->safe_push (oe);
4659 : }
4660 : return true;
4661 : }
4662 :
4663 : /* Find the ops that were added by get_ops starting from VAR, see if
4664 : they were changed during update_range_test and if yes, create new
4665 : stmts. */
4666 :
4667 : static tree
4668 10253 : update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4669 : unsigned int *pidx, class loop *loop)
4670 : {
4671 10253 : gimple *stmt = SSA_NAME_DEF_STMT (var);
4672 10253 : tree rhs[4];
4673 10253 : int i;
4674 :
4675 10253 : if (!is_reassociable_op (stmt, code, loop))
4676 : return NULL;
4677 :
4678 3394 : rhs[0] = gimple_assign_rhs1 (stmt);
4679 3394 : rhs[1] = gimple_assign_rhs2 (stmt);
4680 3394 : rhs[2] = rhs[0];
4681 3394 : rhs[3] = rhs[1];
4682 10182 : for (i = 0; i < 2; i++)
4683 6788 : if (TREE_CODE (rhs[i]) == SSA_NAME)
4684 : {
4685 6788 : rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4686 6788 : if (rhs[2 + i] == NULL_TREE)
4687 : {
4688 6512 : if (has_single_use (rhs[i]))
4689 6486 : rhs[2 + i] = ops[(*pidx)++]->op;
4690 : else
4691 26 : rhs[2 + i] = rhs[i];
4692 : }
4693 : }
4694 3394 : if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4695 3069 : && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4696 : {
4697 3069 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4698 3069 : var = make_ssa_name (TREE_TYPE (var));
4699 3069 : gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4700 : rhs[2], rhs[3]);
4701 3069 : gimple_set_uid (g, gimple_uid (stmt));
4702 3069 : gimple_set_visited (g, true);
4703 3069 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4704 3069 : gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4705 3069 : if (fold_stmt_inplace (&gsi2))
4706 2018 : update_stmt (g);
4707 : }
4708 : return var;
4709 : }
4710 :
4711 : /* Structure to track the initial value passed to get_ops and
4712 : the range in the ops vector for each basic block. */
4713 :
4714 : struct inter_bb_range_test_entry
4715 : {
4716 : tree op;
4717 : unsigned int first_idx, last_idx;
4718 : };
4719 :
4720 : /* Inter-bb range test optimization.
4721 :
4722 : Returns TRUE if a gimple conditional is optimized to a true/false,
4723 : otherwise return FALSE.
4724 :
4725 : This indicates to the caller that it should run a CFG cleanup pass
4726 : once reassociation is completed. */
4727 :
4728 : static bool
4729 18884054 : maybe_optimize_range_tests (gimple *stmt)
4730 : {
4731 18884054 : basic_block first_bb = gimple_bb (stmt);
4732 18884054 : basic_block last_bb = first_bb;
4733 18884054 : basic_block other_bb = NULL;
4734 18884054 : basic_block bb;
4735 18884054 : edge_iterator ei;
4736 18884054 : edge e;
4737 18884054 : auto_vec<operand_entry *> ops;
4738 18884054 : auto_vec<inter_bb_range_test_entry> bbinfo;
4739 18884054 : bool any_changes = false;
4740 18884054 : bool cfg_cleanup_needed = false;
4741 :
4742 : /* Consider only basic blocks that end with GIMPLE_COND or
4743 : a cast statement satisfying final_range_test_p. All
4744 : but the last bb in the first_bb .. last_bb range
4745 : should end with GIMPLE_COND. */
4746 18884054 : if (gimple_code (stmt) == GIMPLE_COND)
4747 : {
4748 27646622 : if (EDGE_COUNT (first_bb->succs) != 2)
4749 : return cfg_cleanup_needed;
4750 : }
4751 10107752 : else if (final_range_test_p (stmt))
4752 84978 : other_bb = single_succ (first_bb);
4753 : else
4754 : return cfg_cleanup_needed;
4755 :
4756 8861280 : if (stmt_could_throw_p (cfun, stmt))
4757 : return cfg_cleanup_needed;
4758 :
4759 : /* As relative ordering of post-dominator sons isn't fixed,
4760 : maybe_optimize_range_tests can be called first on any
4761 : bb in the range we want to optimize. So, start searching
4762 : backwards, if first_bb can be set to a predecessor. */
4763 8863429 : while (single_pred_p (first_bb))
4764 : {
4765 5860419 : basic_block pred_bb = single_pred (first_bb);
4766 5860419 : if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4767 : break;
4768 632456 : if (!no_side_effect_bb (first_bb))
4769 : break;
4770 : first_bb = pred_bb;
4771 : }
4772 : /* If first_bb is last_bb, other_bb hasn't been computed yet.
4773 : Before starting forward search in last_bb successors, find
4774 : out the other_bb. */
4775 8861112 : if (first_bb == last_bb)
4776 : {
4777 8859017 : other_bb = NULL;
4778 : /* As non-GIMPLE_COND last stmt always terminates the range,
4779 : if forward search didn't discover anything, just give up. */
4780 8859017 : if (gimple_code (stmt) != GIMPLE_COND)
4781 : return cfg_cleanup_needed;
4782 : /* Look at both successors. Either it ends with a GIMPLE_COND
4783 : and satisfies suitable_cond_bb, or ends with a cast and
4784 : other_bb is that cast's successor. */
4785 24407781 : FOR_EACH_EDGE (e, ei, first_bb->succs)
4786 16899329 : if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4787 16899329 : || e->dest == first_bb)
4788 : return cfg_cleanup_needed;
4789 25594818 : else if (single_pred_p (e->dest))
4790 : {
4791 9961305 : stmt = last_nondebug_stmt (e->dest);
4792 9961305 : if (stmt
4793 9777260 : && gimple_code (stmt) == GIMPLE_COND
4794 14449204 : && EDGE_COUNT (e->dest->succs) == 2)
4795 : {
4796 4487899 : if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4797 : NULL, true))
4798 : break;
4799 : else
4800 3738902 : other_bb = NULL;
4801 : }
4802 5473406 : else if (stmt
4803 5289361 : && final_range_test_p (stmt)
4804 5551198 : && find_edge (first_bb, single_succ (e->dest)))
4805 : {
4806 38362 : other_bb = single_succ (e->dest);
4807 38362 : if (other_bb == first_bb)
4808 0 : other_bb = NULL;
4809 : }
4810 : }
4811 8257449 : if (other_bb == NULL)
4812 : return cfg_cleanup_needed;
4813 : }
4814 : /* Now do the forward search, moving last_bb to successor bbs
4815 : that aren't other_bb. */
4816 1784740 : while (EDGE_COUNT (last_bb->succs) == 2)
4817 : {
4818 1666076 : FOR_EACH_EDGE (e, ei, last_bb->succs)
4819 1666076 : if (e->dest != other_bb)
4820 : break;
4821 995286 : if (e == NULL)
4822 : break;
4823 995286 : if (!single_pred_p (e->dest))
4824 : break;
4825 957934 : if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4826 : break;
4827 845890 : if (!no_side_effect_bb (e->dest))
4828 : break;
4829 210982 : last_bb = e->dest;
4830 : }
4831 789454 : if (first_bb == last_bb)
4832 : return cfg_cleanup_needed;
4833 : /* Here basic blocks first_bb through last_bb's predecessor
4834 : end with GIMPLE_COND, all of them have one of the edges to
4835 : other_bb and another to another block in the range,
4836 : all blocks except first_bb don't have side-effects and
4837 : last_bb ends with either GIMPLE_COND, or cast satisfying
4838 : final_range_test_p. */
4839 213299 : for (bb = last_bb; ; bb = single_pred (bb))
4840 : {
4841 367858 : enum tree_code code;
4842 367858 : tree lhs, rhs;
4843 367858 : inter_bb_range_test_entry bb_ent;
4844 :
4845 367858 : bb_ent.op = NULL_TREE;
4846 367858 : bb_ent.first_idx = ops.length ();
4847 367858 : bb_ent.last_idx = bb_ent.first_idx;
4848 367858 : e = find_edge (bb, other_bb);
4849 367858 : stmt = last_nondebug_stmt (bb);
4850 367858 : gimple_set_visited (stmt, true);
4851 367858 : if (gimple_code (stmt) != GIMPLE_COND)
4852 : {
4853 5150 : use_operand_p use_p;
4854 5150 : gimple *phi;
4855 5150 : edge e2;
4856 5150 : unsigned int d;
4857 :
4858 5150 : lhs = gimple_assign_lhs (stmt);
4859 5150 : rhs = gimple_assign_rhs1 (stmt);
4860 5150 : gcc_assert (bb == last_bb);
4861 :
4862 : /* stmt is
4863 : _123 = (int) _234;
4864 : OR
4865 : _234 = a_2(D) == 2;
4866 :
4867 : followed by:
4868 : <bb M>:
4869 : # _345 = PHI <_123(N), 1(...), 1(...)>
4870 :
4871 : or 0 instead of 1. If it is 0, the _234
4872 : range test is anded together with all the
4873 : other range tests, if it is 1, it is ored with
4874 : them. */
4875 5150 : single_imm_use (lhs, &use_p, &phi);
4876 5150 : gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4877 5150 : e2 = find_edge (first_bb, other_bb);
4878 5150 : d = e2->dest_idx;
4879 5150 : gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4880 5150 : if (integer_zerop (gimple_phi_arg_def (phi, d)))
4881 : code = BIT_AND_EXPR;
4882 : else
4883 : {
4884 2764 : gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4885 : code = BIT_IOR_EXPR;
4886 : }
4887 :
4888 : /* If _234 SSA_NAME_DEF_STMT is
4889 : _234 = _567 | _789;
4890 : (or &, corresponding to 1/0 in the phi arguments,
4891 : push into ops the individual range test arguments
4892 : of the bitwise or resp. and, recursively. */
4893 5150 : if (TREE_CODE (rhs) == SSA_NAME
4894 5150 : && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4895 : != tcc_comparison)
4896 2711 : && !get_ops (rhs, code, &ops,
4897 : loop_containing_stmt (stmt))
4898 7654 : && has_single_use (rhs))
4899 : {
4900 : /* Otherwise, push the _234 range test itself. */
4901 2494 : operand_entry *oe = operand_entry_pool.allocate ();
4902 :
4903 2494 : oe->op = rhs;
4904 2494 : oe->rank = code;
4905 2494 : oe->id = 0;
4906 2494 : oe->count = 1;
4907 2494 : oe->stmt_to_insert = NULL;
4908 2494 : ops.safe_push (oe);
4909 2494 : bb_ent.last_idx++;
4910 2494 : bb_ent.op = rhs;
4911 : }
4912 2656 : else if (is_gimple_assign (stmt)
4913 2656 : && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4914 : == tcc_comparison)
4915 2439 : && !get_ops (lhs, code, &ops,
4916 : loop_containing_stmt (stmt))
4917 5095 : && has_single_use (lhs))
4918 : {
4919 2439 : operand_entry *oe = operand_entry_pool.allocate ();
4920 2439 : oe->op = lhs;
4921 2439 : oe->rank = code;
4922 2439 : oe->id = 0;
4923 2439 : oe->count = 1;
4924 2439 : ops.safe_push (oe);
4925 2439 : bb_ent.last_idx++;
4926 2439 : bb_ent.op = lhs;
4927 : }
4928 : else
4929 : {
4930 217 : bb_ent.last_idx = ops.length ();
4931 217 : bb_ent.op = rhs;
4932 : }
4933 5150 : bbinfo.safe_push (bb_ent);
4934 10559 : for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4935 5409 : ops[i]->id = bb->index;
4936 5150 : continue;
4937 5150 : }
4938 362708 : else if (bb == last_bb)
4939 : {
4940 : /* For last_bb, handle also:
4941 : if (x_3(D) == 3)
4942 : goto <bb 6>; [34.00%]
4943 : else
4944 : goto <bb 7>; [66.00%]
4945 :
4946 : <bb 6> [local count: 79512730]:
4947 :
4948 : <bb 7> [local count: 1073741824]:
4949 : # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4950 : where bb 7 is OTHER_BB, but the PHI values from the
4951 : earlier bbs match the path through the empty bb
4952 : in between. */
4953 149409 : bool test_swapped_p = false;
4954 149409 : bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4955 : &other_bb, &test_swapped_p, true);
4956 149409 : gcc_assert (ok);
4957 149409 : if (test_swapped_p)
4958 501 : e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4959 : }
4960 : /* Otherwise stmt is GIMPLE_COND. */
4961 362708 : code = gimple_cond_code (stmt);
4962 362708 : lhs = gimple_cond_lhs (stmt);
4963 362708 : rhs = gimple_cond_rhs (stmt);
4964 362708 : if (TREE_CODE (lhs) == SSA_NAME
4965 362616 : && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4966 665783 : && ((code != EQ_EXPR && code != NE_EXPR)
4967 228689 : || rhs != boolean_false_node
4968 : /* Either push into ops the individual bitwise
4969 : or resp. and operands, depending on which
4970 : edge is other_bb. */
4971 50436 : || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4972 50436 : ^ (code == EQ_EXPR))
4973 : ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4974 : loop_containing_stmt (stmt))))
4975 : {
4976 : /* Or push the GIMPLE_COND stmt itself. */
4977 287923 : operand_entry *oe = operand_entry_pool.allocate ();
4978 :
4979 287923 : oe->op = NULL;
4980 575846 : oe->rank = (e->flags & EDGE_TRUE_VALUE)
4981 287923 : ? BIT_IOR_EXPR : BIT_AND_EXPR;
4982 : /* oe->op = NULL signs that there is no SSA_NAME
4983 : for the range test, and oe->id instead is the
4984 : basic block number, at which's end the GIMPLE_COND
4985 : is. */
4986 287923 : oe->id = bb->index;
4987 287923 : oe->count = 1;
4988 287923 : oe->stmt_to_insert = NULL;
4989 287923 : ops.safe_push (oe);
4990 287923 : bb_ent.op = NULL;
4991 287923 : bb_ent.last_idx++;
4992 : }
4993 74785 : else if (ops.length () > bb_ent.first_idx)
4994 : {
4995 15080 : bb_ent.op = lhs;
4996 15080 : bb_ent.last_idx = ops.length ();
4997 : }
4998 362708 : bbinfo.safe_push (bb_ent);
4999 688760 : for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
5000 326052 : ops[i]->id = bb->index;
5001 362708 : if (bb == first_bb)
5002 : break;
5003 213299 : }
5004 19038613 : if (ops.length () > 1)
5005 121551 : any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
5006 121551 : if (any_changes)
5007 : {
5008 : unsigned int idx, max_idx = 0;
5009 : /* update_ops relies on has_single_use predicates returning the
5010 : same values as it did during get_ops earlier. Additionally it
5011 : never removes statements, only adds new ones and it should walk
5012 : from the single imm use and check the predicate already before
5013 : making those changes.
5014 : On the other side, the handling of GIMPLE_COND directly can turn
5015 : previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5016 : it needs to be done in a separate loop afterwards. */
5017 21054 : for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5018 : {
5019 34788 : if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5020 34788 : && bbinfo[idx].op != NULL_TREE)
5021 : {
5022 3465 : tree new_op;
5023 :
5024 3465 : max_idx = idx;
5025 3465 : stmt = last_nondebug_stmt (bb);
5026 6930 : new_op = update_ops (bbinfo[idx].op,
5027 : (enum tree_code)
5028 3465 : ops[bbinfo[idx].first_idx]->rank,
5029 3465 : ops, &bbinfo[idx].first_idx,
5030 : loop_containing_stmt (stmt));
5031 3465 : if (new_op == NULL_TREE)
5032 : {
5033 347 : gcc_assert (bb == last_bb);
5034 347 : new_op = ops[bbinfo[idx].first_idx++]->op;
5035 : }
5036 3465 : if (bbinfo[idx].op != new_op)
5037 : {
5038 3186 : imm_use_iterator iter;
5039 3186 : use_operand_p use_p;
5040 3186 : gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
5041 :
5042 9568 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
5043 3196 : if (is_gimple_debug (use_stmt))
5044 10 : continue;
5045 3186 : else if (gimple_code (use_stmt) == GIMPLE_COND
5046 3186 : || gimple_code (use_stmt) == GIMPLE_PHI)
5047 8655 : FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5048 2885 : SET_USE (use_p, new_op);
5049 301 : else if ((is_gimple_assign (use_stmt)
5050 301 : && (TREE_CODE_CLASS
5051 : (gimple_assign_rhs_code (use_stmt))
5052 : == tcc_comparison)))
5053 : cast_or_tcc_cmp_stmt = use_stmt;
5054 301 : else if (gimple_assign_cast_p (use_stmt))
5055 : cast_or_tcc_cmp_stmt = use_stmt;
5056 : else
5057 0 : gcc_unreachable ();
5058 :
5059 3186 : if (cast_or_tcc_cmp_stmt)
5060 : {
5061 301 : gcc_assert (bb == last_bb);
5062 301 : tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
5063 301 : tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
5064 301 : enum tree_code rhs_code
5065 301 : = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
5066 301 : ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
5067 : : CONVERT_EXPR;
5068 301 : gassign *g;
5069 301 : if (is_gimple_min_invariant (new_op))
5070 : {
5071 87 : new_op = fold_convert (TREE_TYPE (lhs), new_op);
5072 87 : g = gimple_build_assign (new_lhs, new_op);
5073 : }
5074 : else
5075 214 : g = gimple_build_assign (new_lhs, rhs_code, new_op);
5076 301 : gimple_stmt_iterator gsi
5077 301 : = gsi_for_stmt (cast_or_tcc_cmp_stmt);
5078 301 : gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
5079 301 : gimple_set_visited (g, true);
5080 301 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5081 903 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
5082 301 : if (is_gimple_debug (use_stmt))
5083 0 : continue;
5084 301 : else if (gimple_code (use_stmt) == GIMPLE_COND
5085 301 : || gimple_code (use_stmt) == GIMPLE_PHI)
5086 903 : FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5087 301 : SET_USE (use_p, new_lhs);
5088 : else
5089 301 : gcc_unreachable ();
5090 : }
5091 : }
5092 : }
5093 34788 : if (bb == first_bb)
5094 : break;
5095 21054 : }
5096 21054 : for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5097 : {
5098 34788 : if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5099 30986 : && bbinfo[idx].op == NULL_TREE
5100 65774 : && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
5101 : {
5102 53928 : gcond *cond_stmt = as_a <gcond *> (*gsi_last_bb (bb));
5103 :
5104 26964 : if (idx > max_idx)
5105 : max_idx = idx;
5106 :
5107 : /* If we collapse the conditional to a true/false
5108 : condition, then bubble that knowledge up to our caller. */
5109 26964 : if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
5110 : {
5111 11036 : gimple_cond_make_false (cond_stmt);
5112 11036 : cfg_cleanup_needed = true;
5113 : }
5114 15928 : else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
5115 : {
5116 3841 : gimple_cond_make_true (cond_stmt);
5117 3841 : cfg_cleanup_needed = true;
5118 : }
5119 : else
5120 : {
5121 12087 : gimple_cond_set_code (cond_stmt, NE_EXPR);
5122 12087 : gimple_cond_set_lhs (cond_stmt,
5123 12087 : ops[bbinfo[idx].first_idx]->op);
5124 12087 : gimple_cond_set_rhs (cond_stmt, boolean_false_node);
5125 : }
5126 26964 : update_stmt (cond_stmt);
5127 : }
5128 34788 : if (bb == first_bb)
5129 : break;
5130 21054 : }
5131 :
5132 : /* The above changes could result in basic blocks after the first
5133 : modified one, up to and including last_bb, to be executed even if
5134 : they would not be in the original program. If the value ranges of
5135 : assignment lhs' in those bbs were dependent on the conditions
5136 : guarding those basic blocks which now can change, the VRs might
5137 : be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5138 : are only used within the same bb, it should be not a big deal if
5139 : we just reset all the VRs in those bbs. See PR68671. */
5140 33639 : for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5141 19905 : reset_flow_sensitive_info_in_bb (bb);
5142 : }
5143 : return cfg_cleanup_needed;
5144 18884054 : }
5145 :
5146 : /* Remove def stmt of VAR if VAR has zero uses and recurse
5147 : on rhs1 operand if so. */
5148 :
5149 : static void
5150 72511 : remove_visited_stmt_chain (tree var)
5151 : {
5152 97137 : gimple *stmt;
5153 97137 : gimple_stmt_iterator gsi;
5154 :
5155 121763 : while (1)
5156 : {
5157 97137 : if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5158 : return;
5159 36063 : stmt = SSA_NAME_DEF_STMT (var);
5160 36063 : if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5161 : {
5162 24626 : var = gimple_assign_rhs1 (stmt);
5163 24626 : gsi = gsi_for_stmt (stmt);
5164 24626 : reassoc_remove_stmt (&gsi);
5165 24626 : release_defs (stmt);
5166 : }
5167 : else
5168 : return;
5169 : }
5170 : }
5171 :
5172 : /* This function checks three consequtive operands in
5173 : passed operands vector OPS starting from OPINDEX and
5174 : swaps two operands if it is profitable for binary operation
5175 : consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5176 :
5177 : We pair ops with the same rank if possible. */
5178 :
5179 : static void
5180 131982 : swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5181 : unsigned int opindex)
5182 : {
5183 131982 : operand_entry *oe1, *oe2, *oe3;
5184 :
5185 131982 : oe1 = ops[opindex];
5186 131982 : oe2 = ops[opindex + 1];
5187 131982 : oe3 = ops[opindex + 2];
5188 :
5189 131982 : if (oe1->rank == oe2->rank && oe2->rank != oe3->rank)
5190 15855 : std::swap (*oe1, *oe3);
5191 116127 : else if (oe1->rank == oe3->rank && oe2->rank != oe3->rank)
5192 439 : std::swap (*oe1, *oe2);
5193 131982 : }
5194 :
5195 : /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5196 : two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5197 : whether RHS1 op RHS2 can be inserted before or needs to be inserted
5198 : after the returned stmt. */
5199 :
5200 : static inline gimple *
5201 370801 : find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
5202 : {
5203 370801 : insert_before = true;
5204 370801 : if (TREE_CODE (rhs1) == SSA_NAME
5205 370801 : && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5206 : {
5207 10662 : stmt = SSA_NAME_DEF_STMT (rhs1);
5208 10662 : insert_before = false;
5209 : }
5210 370801 : if (TREE_CODE (rhs2) == SSA_NAME
5211 370801 : && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5212 : {
5213 24862 : stmt = SSA_NAME_DEF_STMT (rhs2);
5214 24862 : insert_before = false;
5215 : }
5216 370801 : return stmt;
5217 : }
5218 :
5219 : /* If the stmt that defines operand has to be inserted, insert it
5220 : before the use. */
5221 : static void
5222 85 : insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5223 : {
5224 85 : gcc_assert (is_gimple_assign (stmt_to_insert));
5225 85 : tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5226 85 : tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5227 85 : bool insert_before;
5228 85 : gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
5229 85 : gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5230 85 : gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5231 :
5232 : /* If the insert point is not stmt, then insert_point would be
5233 : the point where operand rhs1 or rhs2 is defined. In this case,
5234 : stmt_to_insert has to be inserted afterwards. This would
5235 : only happen when the stmt insertion point is flexible. */
5236 85 : if (insert_before)
5237 84 : gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5238 : else
5239 1 : insert_stmt_after (stmt_to_insert, insert_point);
5240 85 : }
5241 :
5242 :
5243 : /* Recursively rewrite our linearized statements so that the operators
5244 : match those in OPS[OPINDEX], putting the computation in rank
5245 : order. Return new lhs.
5246 : CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5247 : the current stmt and during recursive invocations.
5248 : NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5249 : recursive invocations. */
5250 :
5251 : static tree
5252 4839971 : rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5253 : const vec<operand_entry *> &ops, bool changed,
5254 : bool next_changed)
5255 : {
5256 4839971 : tree rhs1 = gimple_assign_rhs1 (stmt);
5257 4839971 : tree rhs2 = gimple_assign_rhs2 (stmt);
5258 4839971 : tree lhs = gimple_assign_lhs (stmt);
5259 4839971 : operand_entry *oe;
5260 :
5261 : /* The final recursion case for this function is that you have
5262 : exactly two operations left.
5263 : If we had exactly one op in the entire list to start with, we
5264 : would have never called this function, and the tail recursion
5265 : rewrites them one at a time. */
5266 9679942 : if (opindex + 2 == ops.length ())
5267 : {
5268 4586814 : operand_entry *oe1, *oe2;
5269 :
5270 4586814 : oe1 = ops[opindex];
5271 4586814 : oe2 = ops[opindex + 1];
5272 4586814 : if (commutative_tree_code (rhs_code)
5273 4586814 : && tree_swap_operands_p (oe1->op, oe2->op))
5274 : std::swap (oe1, oe2);
5275 :
5276 4586814 : if (rhs1 != oe1->op || rhs2 != oe2->op)
5277 : {
5278 236593 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5279 236593 : unsigned int uid = gimple_uid (stmt);
5280 :
5281 236593 : if (dump_file && (dump_flags & TDF_DETAILS))
5282 : {
5283 31 : fprintf (dump_file, "Transforming ");
5284 31 : print_gimple_stmt (dump_file, stmt, 0);
5285 : }
5286 :
5287 : /* If the stmt that defines operand has to be inserted, insert it
5288 : before the use. */
5289 236593 : if (oe1->stmt_to_insert)
5290 36 : insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5291 236593 : if (oe2->stmt_to_insert)
5292 49 : insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5293 : /* Even when changed is false, reassociation could have e.g. removed
5294 : some redundant operations, so unless we are just swapping the
5295 : arguments or unless there is no change at all (then we just
5296 : return lhs), force creation of a new SSA_NAME. */
5297 236593 : if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5298 : {
5299 107681 : bool insert_before;
5300 107681 : gimple *insert_point
5301 107681 : = find_insert_point (stmt, oe1->op, oe2->op, insert_before);
5302 107681 : lhs = make_ssa_name (TREE_TYPE (lhs));
5303 107681 : stmt
5304 107681 : = gimple_build_assign (lhs, rhs_code,
5305 : oe1->op, oe2->op);
5306 107681 : gimple_set_uid (stmt, uid);
5307 107681 : gimple_set_visited (stmt, true);
5308 107681 : if (insert_before)
5309 86219 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5310 : else
5311 21462 : insert_stmt_after (stmt, insert_point);
5312 107681 : }
5313 : else
5314 : {
5315 128912 : bool insert_before;
5316 128912 : gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
5317 : insert_before)
5318 : == stmt);
5319 128912 : gimple_assign_set_rhs1 (stmt, oe1->op);
5320 128912 : gimple_assign_set_rhs2 (stmt, oe2->op);
5321 128912 : update_stmt (stmt);
5322 : }
5323 :
5324 236593 : if (rhs1 != oe1->op && rhs1 != oe2->op)
5325 56278 : remove_visited_stmt_chain (rhs1);
5326 :
5327 236593 : if (dump_file && (dump_flags & TDF_DETAILS))
5328 : {
5329 31 : fprintf (dump_file, " into ");
5330 31 : print_gimple_stmt (dump_file, stmt, 0);
5331 : }
5332 : }
5333 4586814 : return lhs;
5334 : }
5335 :
5336 : /* If we hit here, we should have 3 or more ops left. */
5337 253157 : gcc_assert (opindex + 2 < ops.length ());
5338 :
5339 : /* Rewrite the next operator. */
5340 253157 : oe = ops[opindex];
5341 :
5342 : /* If the stmt that defines operand has to be inserted, insert it
5343 : before the use. */
5344 253157 : if (oe->stmt_to_insert)
5345 0 : insert_stmt_before_use (stmt, oe->stmt_to_insert);
5346 :
5347 : /* Recurse on the LHS of the binary operator, which is guaranteed to
5348 : be the non-leaf side. */
5349 253157 : tree new_rhs1
5350 253157 : = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5351 253157 : changed || oe->op != rhs2 || next_changed,
5352 : false);
5353 :
5354 253157 : if (oe->op != rhs2 || new_rhs1 != rhs1)
5355 : {
5356 134123 : if (dump_file && (dump_flags & TDF_DETAILS))
5357 : {
5358 6 : fprintf (dump_file, "Transforming ");
5359 6 : print_gimple_stmt (dump_file, stmt, 0);
5360 : }
5361 :
5362 : /* If changed is false, this is either opindex == 0
5363 : or all outer rhs2's were equal to corresponding oe->op,
5364 : and powi_result is NULL.
5365 : That means lhs is equivalent before and after reassociation.
5366 : Otherwise ensure the old lhs SSA_NAME is not reused and
5367 : create a new stmt as well, so that any debug stmts will be
5368 : properly adjusted. */
5369 134123 : if (changed)
5370 : {
5371 24842 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5372 24842 : unsigned int uid = gimple_uid (stmt);
5373 24842 : bool insert_before;
5374 24842 : gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
5375 : insert_before);
5376 :
5377 24842 : lhs = make_ssa_name (TREE_TYPE (lhs));
5378 24842 : stmt = gimple_build_assign (lhs, rhs_code,
5379 : new_rhs1, oe->op);
5380 24842 : gimple_set_uid (stmt, uid);
5381 24842 : gimple_set_visited (stmt, true);
5382 24842 : if (insert_before)
5383 11831 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5384 : else
5385 13011 : insert_stmt_after (stmt, insert_point);
5386 : }
5387 : else
5388 : {
5389 109281 : bool insert_before;
5390 109281 : gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5391 : insert_before)
5392 : == stmt);
5393 109281 : gimple_assign_set_rhs1 (stmt, new_rhs1);
5394 109281 : gimple_assign_set_rhs2 (stmt, oe->op);
5395 109281 : update_stmt (stmt);
5396 : }
5397 :
5398 134123 : if (dump_file && (dump_flags & TDF_DETAILS))
5399 : {
5400 6 : fprintf (dump_file, " into ");
5401 6 : print_gimple_stmt (dump_file, stmt, 0);
5402 : }
5403 : }
5404 : return lhs;
5405 : }
5406 :
5407 : /* Find out how many cycles we need to compute statements chain.
5408 : OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5409 : maximum number of independent statements we may execute per cycle. */
5410 :
5411 : static int
5412 18792 : get_required_cycles (int ops_num, int cpu_width)
5413 : {
5414 18792 : int res;
5415 18792 : int elog;
5416 18792 : unsigned int rest;
5417 :
5418 : /* While we have more than 2 * cpu_width operands
5419 : we may reduce number of operands by cpu_width
5420 : per cycle. */
5421 18792 : res = ops_num / (2 * cpu_width);
5422 :
5423 : /* Remained operands count may be reduced twice per cycle
5424 : until we have only one operand. */
5425 18792 : rest = (unsigned)(ops_num - res * cpu_width);
5426 18792 : elog = exact_log2 (rest);
5427 8596 : if (elog >= 0)
5428 8596 : res += elog;
5429 : else
5430 20392 : res += floor_log2 (rest) + 1;
5431 :
5432 18792 : return res;
5433 : }
5434 :
5435 : /* Given that the target fully pipelines FMA instructions, return the latency
5436 : of MULT_EXPRs that can't be hidden by the FMAs. WIDTH is the number of
5437 : pipes. */
5438 :
5439 : static inline int
5440 0 : get_mult_latency_consider_fma (int ops_num, int mult_num, int width)
5441 : {
5442 0 : gcc_checking_assert (mult_num && mult_num <= ops_num);
5443 :
5444 : /* For each partition, if mult_num == ops_num, there's latency(MULT)*2.
5445 : e.g:
5446 :
5447 : A * B + C * D
5448 : =>
5449 : _1 = A * B;
5450 : _2 = .FMA (C, D, _1);
5451 :
5452 : Otherwise there's latency(MULT)*1 in the first FMA. */
5453 0 : return CEIL (ops_num, width) == CEIL (mult_num, width) ? 2 : 1;
5454 : }
5455 :
5456 : /* Returns an optimal number of registers to use for computation of
5457 : given statements.
5458 :
5459 : LHS is the result ssa name of OPS. MULT_NUM is number of sub-expressions
5460 : that are MULT_EXPRs, when OPS are PLUS_EXPRs or MINUS_EXPRs. */
5461 :
5462 : static int
5463 21524 : get_reassociation_width (vec<operand_entry *> *ops, int mult_num, tree lhs,
5464 : enum tree_code opc, machine_mode mode)
5465 : {
5466 21524 : int param_width = param_tree_reassoc_width;
5467 21524 : int width;
5468 21524 : int width_min;
5469 21524 : int cycles_best;
5470 21524 : int ops_num = ops->length ();
5471 :
5472 21524 : if (param_width > 0)
5473 : width = param_width;
5474 : else
5475 21479 : width = targetm.sched.reassociation_width (opc, mode);
5476 :
5477 21524 : if (width == 1)
5478 : return width;
5479 :
5480 : /* Get the minimal time required for sequence computation. */
5481 7165 : cycles_best = get_required_cycles (ops_num, width);
5482 :
5483 : /* Check if we may use less width and still compute sequence for
5484 : the same time. It will allow us to reduce registers usage.
5485 : get_required_cycles is monotonically increasing with lower width
5486 : so we can perform a binary search for the minimal width that still
5487 : results in the optimal cycle count. */
5488 7165 : width_min = 1;
5489 :
5490 : /* If the target fully pipelines FMA instruction, the multiply part can start
5491 : already if its operands are ready. Assuming symmetric pipes are used for
5492 : FMUL/FADD/FMA, then for a sequence of FMA like:
5493 :
5494 : _8 = .FMA (_2, _3, _1);
5495 : _9 = .FMA (_5, _4, _8);
5496 : _10 = .FMA (_7, _6, _9);
5497 :
5498 : , if width=1, the latency is latency(MULT) + latency(ADD)*3.
5499 : While with width=2:
5500 :
5501 : _8 = _4 * _5;
5502 : _9 = .FMA (_2, _3, _1);
5503 : _10 = .FMA (_6, _7, _8);
5504 : _11 = _9 + _10;
5505 :
5506 : , it is latency(MULT)*2 + latency(ADD)*2. Assuming latency(MULT) >=
5507 : latency(ADD), the first variant is preferred.
5508 :
5509 : Find out if we can get a smaller width considering FMA.
5510 : Assume FMUL and FMA use the same units that can also do FADD.
5511 : For other scenarios, such as when FMUL and FADD are using separated units,
5512 : the following code may not apply. */
5513 :
5514 7165 : int width_mult = targetm.sched.reassociation_width (MULT_EXPR, mode);
5515 7165 : if (width > 1 && mult_num && param_fully_pipelined_fma
5516 0 : && width_mult <= width)
5517 : {
5518 : /* Latency of MULT_EXPRs. */
5519 0 : int lat_mul
5520 0 : = get_mult_latency_consider_fma (ops_num, mult_num, width_mult);
5521 :
5522 : /* Quick search might not apply. So start from 1. */
5523 0 : for (int i = 1; i < width_mult; i++)
5524 : {
5525 0 : int lat_mul_new
5526 0 : = get_mult_latency_consider_fma (ops_num, mult_num, i);
5527 0 : int lat_add_new = get_required_cycles (ops_num, i);
5528 :
5529 : /* Assume latency(MULT) >= latency(ADD). */
5530 0 : if (lat_mul - lat_mul_new >= lat_add_new - cycles_best)
5531 : {
5532 : width = i;
5533 : break;
5534 : }
5535 : }
5536 : }
5537 : else
5538 : {
5539 17035 : while (width > width_min)
5540 : {
5541 11627 : int width_mid = (width + width_min) / 2;
5542 :
5543 11627 : if (get_required_cycles (ops_num, width_mid) == cycles_best)
5544 : width = width_mid;
5545 1960 : else if (width_min < width_mid)
5546 : width_min = width_mid;
5547 : else
5548 : break;
5549 : }
5550 : }
5551 :
5552 : /* If there's loop dependent FMA result, return width=2 to avoid it. This is
5553 : better than skipping these FMA candidates in widening_mul. */
5554 7165 : if (width == 1
5555 7165 : && maybe_le (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (lhs))),
5556 : param_avoid_fma_max_bits))
5557 : {
5558 : /* Look for cross backedge dependency:
5559 : 1. LHS is a phi argument in the same basic block it is defined.
5560 : 2. And the result of the phi node is used in OPS. */
5561 4992 : basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (lhs));
5562 :
5563 4992 : use_operand_p use_p;
5564 4992 : imm_use_iterator iter;
5565 15905 : FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
5566 8164 : if (gphi *phi = dyn_cast<gphi *> (USE_STMT (use_p)))
5567 : {
5568 4363 : if (gimple_phi_arg_edge (phi, phi_arg_index_from_use (use_p))->src
5569 : != bb)
5570 0 : continue;
5571 4363 : tree phi_result = gimple_phi_result (phi);
5572 4363 : operand_entry *oe;
5573 4363 : unsigned int j;
5574 21130 : FOR_EACH_VEC_ELT (*ops, j, oe)
5575 : {
5576 13089 : if (TREE_CODE (oe->op) != SSA_NAME)
5577 0 : continue;
5578 :
5579 : /* Result of phi is operand of PLUS_EXPR. */
5580 13089 : if (oe->op == phi_result)
5581 2243 : return 2;
5582 :
5583 : /* Check is result of phi is operand of MULT_EXPR. */
5584 10846 : gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5585 10846 : if (is_gimple_assign (def_stmt)
5586 10846 : && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR)
5587 : {
5588 2212 : tree rhs = gimple_assign_rhs1 (def_stmt);
5589 2212 : if (TREE_CODE (rhs) == SSA_NAME)
5590 : {
5591 2212 : if (rhs == phi_result)
5592 : return 2;
5593 2212 : def_stmt = SSA_NAME_DEF_STMT (rhs);
5594 : }
5595 : }
5596 10846 : if (is_gimple_assign (def_stmt)
5597 10846 : && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
5598 : {
5599 8726 : if (gimple_assign_rhs1 (def_stmt) == phi_result
5600 8726 : || gimple_assign_rhs2 (def_stmt) == phi_result)
5601 : return 2;
5602 : }
5603 : }
5604 4992 : }
5605 : }
5606 :
5607 : return width;
5608 : }
5609 :
5610 : #define SPECIAL_BIASED_END_STMT 0 /* It is the end stmt of all ops. */
5611 : #define BIASED_END_STMT 1 /* It is the end stmt of normal or biased ops. */
5612 : #define NORMAL_END_STMT 2 /* It is the end stmt of normal ops. */
5613 :
5614 : /* Rewrite statements with dependency chain with regard the chance to generate
5615 : FMA.
5616 : For the chain with FMA: Try to keep fma opportunity as much as possible.
5617 : For the chain without FMA: Putting the computation in rank order and trying
5618 : to allow operations to be executed in parallel.
5619 : E.g.
5620 : e + f + a * b + c * d;
5621 :
5622 : ssa1 = e + a * b;
5623 : ssa2 = f + c * d;
5624 : ssa3 = ssa1 + ssa2;
5625 :
5626 : This reassociation approach preserves the chance of fma generation as much
5627 : as possible.
5628 :
5629 : Another thing is to avoid adding loop-carried ops to long chains, otherwise
5630 : the whole chain will have dependencies across the loop iteration. Just keep
5631 : loop-carried ops in a separate chain.
5632 : E.g.
5633 : x_1 = phi (x_0, x_2)
5634 : y_1 = phi (y_0, y_2)
5635 :
5636 : a + b + c + d + e + x1 + y1
5637 :
5638 : SSA1 = a + b;
5639 : SSA2 = c + d;
5640 : SSA3 = SSA1 + e;
5641 : SSA4 = SSA3 + SSA2;
5642 : SSA5 = x1 + y1;
5643 : SSA6 = SSA4 + SSA5;
5644 : */
5645 : static void
5646 1757 : rewrite_expr_tree_parallel (gassign *stmt, int width, bool has_fma,
5647 : const vec<operand_entry *> &ops)
5648 : {
5649 1757 : enum tree_code opcode = gimple_assign_rhs_code (stmt);
5650 1757 : int op_num = ops.length ();
5651 1757 : int op_normal_num = op_num;
5652 1757 : gcc_assert (op_num > 0);
5653 1757 : int stmt_num = op_num - 1;
5654 1757 : gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5655 1757 : int i = 0, j = 0;
5656 1757 : tree tmp_op[2], op1;
5657 1757 : operand_entry *oe;
5658 1757 : gimple *stmt1 = NULL;
5659 1757 : tree last_rhs1 = gimple_assign_rhs1 (stmt);
5660 1757 : int last_rhs1_stmt_index = 0, last_rhs2_stmt_index = 0;
5661 1757 : int width_active = 0, width_count = 0;
5662 1757 : bool has_biased = false, ops_changed = false;
5663 1757 : auto_vec<operand_entry *> ops_normal;
5664 1757 : auto_vec<operand_entry *> ops_biased;
5665 1757 : vec<operand_entry *> *ops1;
5666 :
5667 : /* We start expression rewriting from the top statements.
5668 : So, in this loop we create a full list of statements
5669 : we will work with. */
5670 1757 : stmts[stmt_num - 1] = stmt;
5671 8908 : for (i = stmt_num - 2; i >= 0; i--)
5672 7151 : stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5673 :
5674 : /* Avoid adding loop-carried ops to long chains, first filter out the
5675 : loop-carried. But we need to make sure that the length of the remainder
5676 : is not less than 4, which is the smallest ops length we can break the
5677 : dependency. */
5678 12422 : FOR_EACH_VEC_ELT (ops, i, oe)
5679 : {
5680 10665 : if (TREE_CODE (oe->op) == SSA_NAME
5681 10510 : && bitmap_bit_p (biased_names, SSA_NAME_VERSION (oe->op))
5682 10912 : && op_normal_num > 4)
5683 : {
5684 223 : ops_biased.safe_push (oe);
5685 223 : has_biased = true;
5686 223 : op_normal_num --;
5687 : }
5688 : else
5689 10442 : ops_normal.safe_push (oe);
5690 : }
5691 :
5692 : /* Width should not be larger than ops length / 2, since we can not create
5693 : more parallel dependency chains that exceeds such value. */
5694 1757 : int width_normal = op_normal_num / 2;
5695 1757 : int width_biased = (op_num - op_normal_num) / 2;
5696 1757 : width_normal = width <= width_normal ? width : width_normal;
5697 1757 : width_biased = width <= width_biased ? width : width_biased;
5698 :
5699 1757 : ops1 = &ops_normal;
5700 1757 : width_count = width_active = width_normal;
5701 :
5702 : /* Build parallel dependency chain according to width. */
5703 10665 : for (i = 0; i < stmt_num; i++)
5704 : {
5705 8908 : if (dump_file && (dump_flags & TDF_DETAILS))
5706 : {
5707 6 : fprintf (dump_file, "Transforming ");
5708 6 : print_gimple_stmt (dump_file, stmts[i], 0);
5709 : }
5710 :
5711 : /* When the work of normal ops is over, but the loop is not over,
5712 : continue to do biased ops. */
5713 8908 : if (width_count == 0 && ops1 == &ops_normal)
5714 : {
5715 219 : ops1 = &ops_biased;
5716 219 : width_count = width_active = width_biased;
5717 219 : ops_changed = true;
5718 : }
5719 :
5720 : /* Swap the operands if no FMA in the chain. */
5721 8908 : if (ops1->length () > 2 && !has_fma)
5722 4448 : swap_ops_for_binary_stmt (*ops1, ops1->length () - 3);
5723 :
5724 8908 : if (i < width_active
5725 5192 : || (ops_changed && i <= (last_rhs1_stmt_index + width_active)))
5726 : {
5727 11157 : for (j = 0; j < 2; j++)
5728 : {
5729 7438 : oe = ops1->pop ();
5730 7438 : tmp_op[j] = oe->op;
5731 : /* If the stmt that defines operand has to be inserted, insert it
5732 : before the use. */
5733 7438 : stmt1 = oe->stmt_to_insert;
5734 7438 : if (stmt1)
5735 0 : insert_stmt_before_use (stmts[i], stmt1);
5736 7438 : stmt1 = NULL;
5737 : }
5738 3719 : stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5739 : tmp_op[1],
5740 : tmp_op[0],
5741 : opcode);
5742 3719 : gimple_set_visited (stmts[i], true);
5743 :
5744 : }
5745 : else
5746 : {
5747 : /* We keep original statement only for the last one. All others are
5748 : recreated. */
5749 5189 : if (!ops1->length ())
5750 : {
5751 : /* For biased length equal to 2. */
5752 1962 : if (width_count == BIASED_END_STMT && !last_rhs2_stmt_index)
5753 1 : last_rhs2_stmt_index = i - 1;
5754 :
5755 : /* When width_count == 2 and there is no biased, just finish. */
5756 1962 : if (width_count == NORMAL_END_STMT && !has_biased)
5757 : {
5758 1538 : last_rhs1_stmt_index = i - 1;
5759 1538 : last_rhs2_stmt_index = i - 2;
5760 : }
5761 1962 : if (last_rhs1_stmt_index && (last_rhs2_stmt_index || !has_biased))
5762 : {
5763 : /* We keep original statement only for the last one. All
5764 : others are recreated. */
5765 1540 : gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5766 1540 : (stmts[last_rhs1_stmt_index]));
5767 1540 : gimple_assign_set_rhs2 (stmts[i], gimple_assign_lhs
5768 1540 : (stmts[last_rhs2_stmt_index]));
5769 1540 : update_stmt (stmts[i]);
5770 : }
5771 : else
5772 : {
5773 1266 : stmts[i] =
5774 422 : build_and_add_sum (TREE_TYPE (last_rhs1),
5775 422 : gimple_assign_lhs (stmts[i-width_count]),
5776 : gimple_assign_lhs
5777 422 : (stmts[i-width_count+1]),
5778 : opcode);
5779 422 : gimple_set_visited (stmts[i], true);
5780 422 : width_count--;
5781 :
5782 : /* It is the end of normal or biased ops.
5783 : last_rhs1_stmt_index used to record the last stmt index
5784 : for normal ops. last_rhs2_stmt_index used to record the
5785 : last stmt index for biased ops. */
5786 422 : if (width_count == BIASED_END_STMT)
5787 : {
5788 220 : gcc_assert (has_biased);
5789 220 : if (ops_biased.length ())
5790 : last_rhs1_stmt_index = i;
5791 : else
5792 1 : last_rhs2_stmt_index = i;
5793 : width_count--;
5794 : }
5795 : }
5796 : }
5797 : else
5798 : {
5799 : /* Attach the rest ops to the parallel dependency chain. */
5800 3227 : oe = ops1->pop ();
5801 3227 : op1 = oe->op;
5802 3227 : stmt1 = oe->stmt_to_insert;
5803 3227 : if (stmt1)
5804 0 : insert_stmt_before_use (stmts[i], stmt1);
5805 3227 : stmt1 = NULL;
5806 :
5807 : /* For only one biased ops. */
5808 3227 : if (width_count == SPECIAL_BIASED_END_STMT)
5809 : {
5810 : /* We keep original statement only for the last one. All
5811 : others are recreated. */
5812 217 : gcc_assert (has_biased);
5813 217 : gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5814 217 : (stmts[last_rhs1_stmt_index]));
5815 217 : gimple_assign_set_rhs2 (stmts[i], op1);
5816 217 : update_stmt (stmts[i]);
5817 : }
5818 : else
5819 : {
5820 3010 : stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5821 : gimple_assign_lhs
5822 3010 : (stmts[i-width_active]),
5823 : op1,
5824 : opcode);
5825 3010 : gimple_set_visited (stmts[i], true);
5826 : }
5827 : }
5828 : }
5829 :
5830 8908 : if (dump_file && (dump_flags & TDF_DETAILS))
5831 : {
5832 6 : fprintf (dump_file, " into ");
5833 6 : print_gimple_stmt (dump_file, stmts[i], 0);
5834 : }
5835 : }
5836 :
5837 1757 : remove_visited_stmt_chain (last_rhs1);
5838 1757 : }
5839 :
5840 : /* Transform STMT, which is really (A +B) + (C + D) into the left
5841 : linear form, ((A+B)+C)+D.
5842 : Recurse on D if necessary. */
5843 :
5844 : static void
5845 2236 : linearize_expr (gimple *stmt)
5846 : {
5847 2236 : gimple_stmt_iterator gsi;
5848 2236 : gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5849 2236 : gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5850 2236 : gimple *oldbinrhs = binrhs;
5851 2236 : enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5852 2236 : gimple *newbinrhs = NULL;
5853 2236 : class loop *loop = loop_containing_stmt (stmt);
5854 2236 : tree lhs = gimple_assign_lhs (stmt);
5855 :
5856 2236 : gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5857 : && is_reassociable_op (binrhs, rhscode, loop));
5858 :
5859 2236 : gsi = gsi_for_stmt (stmt);
5860 :
5861 2236 : gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5862 2236 : binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5863 : gimple_assign_rhs_code (binrhs),
5864 : gimple_assign_lhs (binlhs),
5865 : gimple_assign_rhs2 (binrhs));
5866 2236 : gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5867 2236 : gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5868 2236 : gimple_set_uid (binrhs, gimple_uid (stmt));
5869 :
5870 2236 : if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5871 2229 : newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5872 :
5873 2236 : if (dump_file && (dump_flags & TDF_DETAILS))
5874 : {
5875 0 : fprintf (dump_file, "Linearized: ");
5876 0 : print_gimple_stmt (dump_file, stmt, 0);
5877 : }
5878 :
5879 2236 : reassociate_stats.linearized++;
5880 2236 : update_stmt (stmt);
5881 :
5882 2236 : gsi = gsi_for_stmt (oldbinrhs);
5883 2236 : reassoc_remove_stmt (&gsi);
5884 2236 : release_defs (oldbinrhs);
5885 :
5886 2236 : gimple_set_visited (stmt, true);
5887 2236 : gimple_set_visited (binlhs, true);
5888 2236 : gimple_set_visited (binrhs, true);
5889 :
5890 : /* Tail recurse on the new rhs if it still needs reassociation. */
5891 2236 : if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5892 : /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5893 : want to change the algorithm while converting to tuples. */
5894 515 : linearize_expr (stmt);
5895 2236 : }
5896 :
5897 : /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5898 : it. Otherwise, return NULL. */
5899 :
5900 : static gimple *
5901 418228 : get_single_immediate_use (tree lhs)
5902 : {
5903 418228 : use_operand_p immuse;
5904 418228 : gimple *immusestmt;
5905 :
5906 418228 : if (TREE_CODE (lhs) == SSA_NAME
5907 418228 : && single_imm_use (lhs, &immuse, &immusestmt)
5908 734735 : && is_gimple_assign (immusestmt))
5909 : return immusestmt;
5910 :
5911 : return NULL;
5912 : }
5913 :
5914 : /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5915 : representing the negated value. Insertions of any necessary
5916 : instructions go before GSI.
5917 : This function is recursive in that, if you hand it "a_5" as the
5918 : value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5919 : transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5920 :
5921 : static tree
5922 82394 : negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5923 : {
5924 82394 : gimple *negatedefstmt = NULL;
5925 82394 : tree resultofnegate;
5926 82394 : gimple_stmt_iterator gsi;
5927 82394 : unsigned int uid;
5928 :
5929 : /* If we are trying to negate a name, defined by an add, negate the
5930 : add operands instead. */
5931 82394 : if (TREE_CODE (tonegate) == SSA_NAME)
5932 80623 : negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5933 82394 : if (TREE_CODE (tonegate) == SSA_NAME
5934 80623 : && is_gimple_assign (negatedefstmt)
5935 68045 : && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5936 68045 : && has_single_use (gimple_assign_lhs (negatedefstmt))
5937 132132 : && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5938 : {
5939 1280 : tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5940 1280 : tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5941 1280 : tree lhs = gimple_assign_lhs (negatedefstmt);
5942 1280 : gimple *g;
5943 :
5944 1280 : gsi = gsi_for_stmt (negatedefstmt);
5945 1280 : rhs1 = negate_value (rhs1, &gsi);
5946 :
5947 1280 : gsi = gsi_for_stmt (negatedefstmt);
5948 1280 : rhs2 = negate_value (rhs2, &gsi);
5949 :
5950 1280 : gsi = gsi_for_stmt (negatedefstmt);
5951 1280 : lhs = make_ssa_name (TREE_TYPE (lhs));
5952 1280 : gimple_set_visited (negatedefstmt, true);
5953 1280 : g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5954 1280 : gimple_set_uid (g, gimple_uid (negatedefstmt));
5955 1280 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5956 1280 : return lhs;
5957 : }
5958 :
5959 81114 : tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5960 81114 : resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5961 : NULL_TREE, true, GSI_SAME_STMT);
5962 81114 : gsi = *gsip;
5963 81114 : uid = gimple_uid (gsi_stmt (gsi));
5964 320914 : for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5965 : {
5966 155191 : gimple *stmt = gsi_stmt (gsi);
5967 155191 : if (gimple_uid (stmt) != 0)
5968 : break;
5969 79343 : gimple_set_uid (stmt, uid);
5970 : }
5971 : return resultofnegate;
5972 : }
5973 :
5974 : /* Return true if we should break up the subtract in STMT into an add
5975 : with negate. This is true when we the subtract operands are really
5976 : adds, or the subtract itself is used in an add expression. In
5977 : either case, breaking up the subtract into an add with negate
5978 : exposes the adds to reassociation. */
5979 :
5980 : static bool
5981 297900 : should_break_up_subtract (gimple *stmt)
5982 : {
5983 297900 : tree lhs = gimple_assign_lhs (stmt);
5984 297900 : tree binlhs = gimple_assign_rhs1 (stmt);
5985 297900 : tree binrhs = gimple_assign_rhs2 (stmt);
5986 297900 : gimple *immusestmt;
5987 297900 : class loop *loop = loop_containing_stmt (stmt);
5988 :
5989 297900 : if (TREE_CODE (binlhs) == SSA_NAME
5990 297900 : && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5991 : return true;
5992 :
5993 277449 : if (TREE_CODE (binrhs) == SSA_NAME
5994 277449 : && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5995 : return true;
5996 :
5997 276266 : if (TREE_CODE (lhs) == SSA_NAME
5998 276266 : && (immusestmt = get_single_immediate_use (lhs))
5999 121768 : && is_gimple_assign (immusestmt)
6000 398034 : && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
6001 76861 : || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
6002 3093 : && gimple_assign_rhs1 (immusestmt) == lhs)
6003 73778 : || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
6004 : return true;
6005 : return false;
6006 : }
6007 :
6008 : /* Transform STMT from A - B into A + -B. */
6009 :
6010 : static void
6011 79834 : break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
6012 : {
6013 79834 : tree rhs1 = gimple_assign_rhs1 (stmt);
6014 79834 : tree rhs2 = gimple_assign_rhs2 (stmt);
6015 :
6016 79834 : if (dump_file && (dump_flags & TDF_DETAILS))
6017 : {
6018 0 : fprintf (dump_file, "Breaking up subtract ");
6019 0 : print_gimple_stmt (dump_file, stmt, 0);
6020 : }
6021 :
6022 79834 : rhs2 = negate_value (rhs2, gsip);
6023 79834 : gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
6024 79834 : update_stmt (stmt);
6025 79834 : }
6026 :
6027 : /* Determine whether STMT is a builtin call that raises an SSA name
6028 : to an integer power and has only one use. If so, and this is early
6029 : reassociation and unsafe math optimizations are permitted, place
6030 : the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
6031 : If any of these conditions does not hold, return FALSE. */
6032 :
6033 : static bool
6034 127 : acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
6035 : {
6036 127 : tree arg1;
6037 127 : REAL_VALUE_TYPE c, cint;
6038 :
6039 127 : switch (gimple_call_combined_fn (stmt))
6040 : {
6041 19 : CASE_CFN_POW:
6042 19 : if (flag_errno_math)
6043 : return false;
6044 :
6045 19 : *base = gimple_call_arg (stmt, 0);
6046 19 : arg1 = gimple_call_arg (stmt, 1);
6047 :
6048 19 : if (TREE_CODE (arg1) != REAL_CST)
6049 : return false;
6050 :
6051 16 : c = TREE_REAL_CST (arg1);
6052 :
6053 16 : if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
6054 : return false;
6055 :
6056 16 : *exponent = real_to_integer (&c);
6057 16 : real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
6058 16 : if (!real_identical (&c, &cint))
6059 : return false;
6060 :
6061 : break;
6062 :
6063 10 : CASE_CFN_POWI:
6064 10 : *base = gimple_call_arg (stmt, 0);
6065 10 : arg1 = gimple_call_arg (stmt, 1);
6066 :
6067 10 : if (!tree_fits_shwi_p (arg1))
6068 : return false;
6069 :
6070 10 : *exponent = tree_to_shwi (arg1);
6071 10 : break;
6072 :
6073 : default:
6074 : return false;
6075 : }
6076 :
6077 : /* Expanding negative exponents is generally unproductive, so we don't
6078 : complicate matters with those. Exponents of zero and one should
6079 : have been handled by expression folding. */
6080 18 : if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
6081 : return false;
6082 :
6083 : return true;
6084 : }
6085 :
6086 : /* Try to derive and add operand entry for OP to *OPS. Return false if
6087 : unsuccessful. */
6088 :
6089 : static bool
6090 9571511 : try_special_add_to_ops (vec<operand_entry *> *ops,
6091 : enum tree_code code,
6092 : tree op, gimple* def_stmt)
6093 : {
6094 9571511 : tree base = NULL_TREE;
6095 9571511 : HOST_WIDE_INT exponent = 0;
6096 :
6097 9571511 : if (TREE_CODE (op) != SSA_NAME
6098 9571511 : || ! has_single_use (op))
6099 : return false;
6100 :
6101 3569763 : if (code == MULT_EXPR
6102 765824 : && reassoc_insert_powi_p
6103 361094 : && flag_unsafe_math_optimizations
6104 30040 : && is_gimple_call (def_stmt)
6105 3569890 : && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
6106 : {
6107 18 : add_repeat_to_ops_vec (ops, base, exponent);
6108 18 : gimple_set_visited (def_stmt, true);
6109 18 : return true;
6110 : }
6111 3569745 : else if (code == MULT_EXPR
6112 765806 : && is_gimple_assign (def_stmt)
6113 729187 : && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
6114 253 : && !HONOR_SNANS (TREE_TYPE (op))
6115 253 : && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
6116 0 : || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))
6117 3569998 : && (!FLOAT_TYPE_P (TREE_TYPE (op))
6118 51 : || !DECIMAL_FLOAT_MODE_P (element_mode (op))))
6119 : {
6120 246 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
6121 246 : tree cst = build_minus_one_cst (TREE_TYPE (op));
6122 246 : add_to_ops_vec (ops, rhs1);
6123 246 : add_to_ops_vec (ops, cst);
6124 246 : gimple_set_visited (def_stmt, true);
6125 246 : return true;
6126 : }
6127 :
6128 : return false;
6129 : }
6130 :
6131 : /* Recursively linearize a binary expression that is the RHS of STMT.
6132 : Place the operands of the expression tree in the vector named OPS. */
6133 :
6134 : static void
6135 4926607 : linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
6136 : bool is_associative, bool set_visited)
6137 : {
6138 4926607 : tree binlhs = gimple_assign_rhs1 (stmt);
6139 4926607 : tree binrhs = gimple_assign_rhs2 (stmt);
6140 4926607 : gimple *binlhsdef = NULL, *binrhsdef = NULL;
6141 4926607 : bool binlhsisreassoc = false;
6142 4926607 : bool binrhsisreassoc = false;
6143 4926607 : enum tree_code rhscode = gimple_assign_rhs_code (stmt);
6144 4926607 : class loop *loop = loop_containing_stmt (stmt);
6145 :
6146 4926607 : if (set_visited)
6147 4883944 : gimple_set_visited (stmt, true);
6148 :
6149 4926607 : if (TREE_CODE (binlhs) == SSA_NAME)
6150 : {
6151 4923652 : binlhsdef = SSA_NAME_DEF_STMT (binlhs);
6152 4923652 : binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
6153 4923652 : && !stmt_could_throw_p (cfun, binlhsdef));
6154 : }
6155 :
6156 4926607 : if (TREE_CODE (binrhs) == SSA_NAME)
6157 : {
6158 1574331 : binrhsdef = SSA_NAME_DEF_STMT (binrhs);
6159 1574331 : binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
6160 1574331 : && !stmt_could_throw_p (cfun, binrhsdef));
6161 : }
6162 :
6163 : /* If the LHS is not reassociable, but the RHS is, we need to swap
6164 : them. If neither is reassociable, there is nothing we can do, so
6165 : just put them in the ops vector. If the LHS is reassociable,
6166 : linearize it. If both are reassociable, then linearize the RHS
6167 : and the LHS. */
6168 :
6169 4926607 : if (!binlhsisreassoc)
6170 : {
6171 : /* If this is not a associative operation like division, give up. */
6172 4712678 : if (!is_associative)
6173 : {
6174 15 : add_to_ops_vec (ops, binrhs);
6175 15 : return;
6176 : }
6177 :
6178 4712663 : if (!binrhsisreassoc)
6179 : {
6180 4644919 : bool swap = false;
6181 4644919 : if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6182 : /* If we add ops for the rhs we expect to be able to recurse
6183 : to it via the lhs during expression rewrite so swap
6184 : operands. */
6185 : swap = true;
6186 : else
6187 4644767 : add_to_ops_vec (ops, binrhs);
6188 :
6189 4644919 : if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
6190 4644811 : add_to_ops_vec (ops, binlhs);
6191 :
6192 4644919 : if (!swap)
6193 : return;
6194 : }
6195 :
6196 67896 : if (dump_file && (dump_flags & TDF_DETAILS))
6197 : {
6198 9 : fprintf (dump_file, "swapping operands of ");
6199 9 : print_gimple_stmt (dump_file, stmt, 0);
6200 : }
6201 :
6202 67896 : swap_ssa_operands (stmt,
6203 : gimple_assign_rhs1_ptr (stmt),
6204 : gimple_assign_rhs2_ptr (stmt));
6205 67896 : update_stmt (stmt);
6206 :
6207 67896 : if (dump_file && (dump_flags & TDF_DETAILS))
6208 : {
6209 9 : fprintf (dump_file, " is now ");
6210 9 : print_gimple_stmt (dump_file, stmt, 0);
6211 : }
6212 67896 : if (!binrhsisreassoc)
6213 : return;
6214 :
6215 : /* We want to make it so the lhs is always the reassociative op,
6216 : so swap. */
6217 : std::swap (binlhs, binrhs);
6218 : }
6219 213929 : else if (binrhsisreassoc)
6220 : {
6221 1721 : linearize_expr (stmt);
6222 1721 : binlhs = gimple_assign_rhs1 (stmt);
6223 1721 : binrhs = gimple_assign_rhs2 (stmt);
6224 : }
6225 :
6226 281673 : gcc_assert (TREE_CODE (binrhs) != SSA_NAME
6227 : || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
6228 : rhscode, loop));
6229 281673 : linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
6230 : is_associative, set_visited);
6231 :
6232 281673 : if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6233 281669 : add_to_ops_vec (ops, binrhs);
6234 : }
6235 :
6236 : /* Repropagate the negates back into subtracts, since no other pass
6237 : currently does it. */
6238 :
6239 : static void
6240 2082662 : repropagate_negates (void)
6241 : {
6242 2082662 : unsigned int i = 0;
6243 2082662 : tree negate;
6244 :
6245 2224624 : FOR_EACH_VEC_ELT (plus_negates, i, negate)
6246 : {
6247 141962 : gimple *user = get_single_immediate_use (negate);
6248 141962 : if (!user || !is_gimple_assign (user))
6249 23223 : continue;
6250 :
6251 118739 : tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
6252 118749 : if (TREE_CODE (negateop) == SSA_NAME
6253 118739 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
6254 10 : continue;
6255 :
6256 : /* The negate operand can be either operand of a PLUS_EXPR
6257 : (it can be the LHS if the RHS is a constant for example).
6258 :
6259 : Force the negate operand to the RHS of the PLUS_EXPR, then
6260 : transform the PLUS_EXPR into a MINUS_EXPR. */
6261 118729 : if (gimple_assign_rhs_code (user) == PLUS_EXPR)
6262 : {
6263 : /* If the negated operand appears on the LHS of the
6264 : PLUS_EXPR, exchange the operands of the PLUS_EXPR
6265 : to force the negated operand to the RHS of the PLUS_EXPR. */
6266 88652 : if (gimple_assign_rhs1 (user) == negate)
6267 : {
6268 27386 : swap_ssa_operands (user,
6269 : gimple_assign_rhs1_ptr (user),
6270 : gimple_assign_rhs2_ptr (user));
6271 : }
6272 :
6273 : /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6274 : the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6275 88652 : if (gimple_assign_rhs2 (user) == negate)
6276 : {
6277 88652 : tree rhs1 = gimple_assign_rhs1 (user);
6278 88652 : gimple_stmt_iterator gsi = gsi_for_stmt (user);
6279 88652 : gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
6280 : negateop);
6281 88652 : update_stmt (user);
6282 : }
6283 : }
6284 30077 : else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
6285 : {
6286 3119 : if (gimple_assign_rhs1 (user) == negate)
6287 : {
6288 : /* We have
6289 : x = -negateop
6290 : y = x - b
6291 : which we transform into
6292 : x = negateop + b
6293 : y = -x .
6294 : This pushes down the negate which we possibly can merge
6295 : into some other operation, hence insert it into the
6296 : plus_negates vector. */
6297 3119 : gimple *feed = SSA_NAME_DEF_STMT (negate);
6298 3119 : tree b = gimple_assign_rhs2 (user);
6299 3119 : gimple_stmt_iterator gsi = gsi_for_stmt (feed);
6300 3119 : gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
6301 3119 : tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
6302 3119 : gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
6303 3119 : gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
6304 3119 : gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
6305 3119 : user = gsi_stmt (gsi2);
6306 3119 : update_stmt (user);
6307 3119 : reassoc_remove_stmt (&gsi);
6308 3119 : release_defs (feed);
6309 3119 : plus_negates.safe_push (gimple_assign_lhs (user));
6310 : }
6311 : else
6312 : {
6313 : /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6314 : getting rid of one operation. */
6315 0 : tree rhs1 = gimple_assign_rhs1 (user);
6316 0 : gimple_stmt_iterator gsi = gsi_for_stmt (user);
6317 0 : gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
6318 0 : update_stmt (gsi_stmt (gsi));
6319 : }
6320 : }
6321 : }
6322 2082662 : }
6323 :
6324 : /* Break up subtract operations in block BB.
6325 :
6326 : We do this top down because we don't know whether the subtract is
6327 : part of a possible chain of reassociation except at the top.
6328 :
6329 : IE given
6330 : d = f + g
6331 : c = a + e
6332 : b = c - d
6333 : q = b - r
6334 : k = t - q
6335 :
6336 : we want to break up k = t - q, but we won't until we've transformed q
6337 : = b - r, which won't be broken up until we transform b = c - d.
6338 :
6339 : En passant, clear the GIMPLE visited flag on every statement
6340 : and set UIDs within each basic block. */
6341 :
6342 : static void
6343 19511583 : break_up_subtract_bb (basic_block bb)
6344 : {
6345 19511583 : gimple_stmt_iterator gsi;
6346 19511583 : unsigned int uid = 1;
6347 :
6348 207730829 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6349 : {
6350 168707663 : gimple *stmt = gsi_stmt (gsi);
6351 168707663 : gimple_set_visited (stmt, false);
6352 168707663 : gimple_set_uid (stmt, uid++);
6353 :
6354 168707663 : if (!is_gimple_assign (stmt)
6355 46709133 : || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
6356 186078660 : || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
6357 151336996 : continue;
6358 :
6359 : /* Look for simple gimple subtract operations. */
6360 17370667 : if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6361 : {
6362 298174 : if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6363 298174 : || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6364 274 : continue;
6365 :
6366 : /* Check for a subtract used only in an addition. If this
6367 : is the case, transform it into add of a negate for better
6368 : reassociation. IE transform C = A-B into C = A + -B if C
6369 : is only used in an addition. */
6370 297900 : if (should_break_up_subtract (stmt))
6371 79834 : break_up_subtract (stmt, &gsi);
6372 : }
6373 17072493 : else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6374 17072493 : && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6375 47354 : plus_negates.safe_push (gimple_assign_lhs (stmt));
6376 : }
6377 19511583 : }
6378 :
6379 : /* Used for repeated factor analysis. */
6380 : struct repeat_factor
6381 : {
6382 : /* An SSA name that occurs in a multiply chain. */
6383 : tree factor;
6384 :
6385 : /* Cached rank of the factor. */
6386 : unsigned rank;
6387 :
6388 : /* Number of occurrences of the factor in the chain. */
6389 : HOST_WIDE_INT count;
6390 :
6391 : /* An SSA name representing the product of this factor and
6392 : all factors appearing later in the repeated factor vector. */
6393 : tree repr;
6394 : };
6395 :
6396 :
6397 : static vec<repeat_factor> repeat_factor_vec;
6398 :
6399 : /* Used for sorting the repeat factor vector. Sort primarily by
6400 : ascending occurrence count, secondarily by descending rank. */
6401 :
6402 : static int
6403 247059 : compare_repeat_factors (const void *x1, const void *x2)
6404 : {
6405 247059 : const repeat_factor *rf1 = (const repeat_factor *) x1;
6406 247059 : const repeat_factor *rf2 = (const repeat_factor *) x2;
6407 :
6408 247059 : if (rf1->count < rf2->count)
6409 : return -1;
6410 246425 : else if (rf1->count > rf2->count)
6411 : return 1;
6412 :
6413 245932 : if (rf1->rank < rf2->rank)
6414 : return 1;
6415 134123 : else if (rf1->rank > rf2->rank)
6416 110670 : return -1;
6417 :
6418 : return 0;
6419 : }
6420 :
6421 : /* Look for repeated operands in OPS in the multiply tree rooted at
6422 : STMT. Replace them with an optimal sequence of multiplies and powi
6423 : builtin calls, and remove the used operands from OPS. Return an
6424 : SSA name representing the value of the replacement sequence. */
6425 :
6426 : static tree
6427 483446 : attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6428 : {
6429 483446 : unsigned i, j, vec_len;
6430 483446 : int ii;
6431 483446 : operand_entry *oe;
6432 483446 : repeat_factor *rf1, *rf2;
6433 483446 : repeat_factor rfnew;
6434 483446 : tree result = NULL_TREE;
6435 483446 : tree target_ssa, iter_result;
6436 483446 : tree type = TREE_TYPE (gimple_get_lhs (stmt));
6437 483446 : tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6438 483446 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6439 483446 : gimple *mul_stmt, *pow_stmt;
6440 :
6441 : /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6442 : target, unless type is integral. */
6443 483446 : if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6444 : return NULL_TREE;
6445 :
6446 : /* Allocate the repeated factor vector. */
6447 479790 : repeat_factor_vec.create (10);
6448 :
6449 : /* Scan the OPS vector for all SSA names in the product and build
6450 : up a vector of occurrence counts for each factor. */
6451 1923476 : FOR_EACH_VEC_ELT (*ops, i, oe)
6452 : {
6453 963896 : if (TREE_CODE (oe->op) == SSA_NAME)
6454 : {
6455 609150 : FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6456 : {
6457 67115 : if (rf1->factor == oe->op)
6458 : {
6459 2760 : rf1->count += oe->count;
6460 2760 : break;
6461 : }
6462 : }
6463 :
6464 544795 : if (j >= repeat_factor_vec.length ())
6465 : {
6466 542035 : rfnew.factor = oe->op;
6467 542035 : rfnew.rank = oe->rank;
6468 542035 : rfnew.count = oe->count;
6469 542035 : rfnew.repr = NULL_TREE;
6470 542035 : repeat_factor_vec.safe_push (rfnew);
6471 : }
6472 : }
6473 : }
6474 :
6475 : /* Sort the repeated factor vector by (a) increasing occurrence count,
6476 : and (b) decreasing rank. */
6477 479790 : repeat_factor_vec.qsort (compare_repeat_factors);
6478 :
6479 : /* It is generally best to combine as many base factors as possible
6480 : into a product before applying __builtin_powi to the result.
6481 : However, the sort order chosen for the repeated factor vector
6482 : allows us to cache partial results for the product of the base
6483 : factors for subsequent use. When we already have a cached partial
6484 : result from a previous iteration, it is best to make use of it
6485 : before looking for another __builtin_pow opportunity.
6486 :
6487 : As an example, consider x * x * y * y * y * z * z * z * z.
6488 : We want to first compose the product x * y * z, raise it to the
6489 : second power, then multiply this by y * z, and finally multiply
6490 : by z. This can be done in 5 multiplies provided we cache y * z
6491 : for use in both expressions:
6492 :
6493 : t1 = y * z
6494 : t2 = t1 * x
6495 : t3 = t2 * t2
6496 : t4 = t1 * t3
6497 : result = t4 * z
6498 :
6499 : If we instead ignored the cached y * z and first multiplied by
6500 : the __builtin_pow opportunity z * z, we would get the inferior:
6501 :
6502 : t1 = y * z
6503 : t2 = t1 * x
6504 : t3 = t2 * t2
6505 : t4 = z * z
6506 : t5 = t3 * t4
6507 : result = t5 * y */
6508 :
6509 959580 : vec_len = repeat_factor_vec.length ();
6510 :
6511 : /* Repeatedly look for opportunities to create a builtin_powi call. */
6512 481522 : while (true)
6513 : {
6514 481522 : HOST_WIDE_INT power;
6515 :
6516 : /* First look for the largest cached product of factors from
6517 : preceding iterations. If found, create a builtin_powi for
6518 : it if the minimum occurrence count for its factors is at
6519 : least 2, or just use this cached product as our next
6520 : multiplicand if the minimum occurrence count is 1. */
6521 1025590 : FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6522 : {
6523 544077 : if (rf1->repr && rf1->count > 0)
6524 : break;
6525 : }
6526 :
6527 481522 : if (j < vec_len)
6528 : {
6529 9 : power = rf1->count;
6530 :
6531 9 : if (power == 1)
6532 : {
6533 7 : iter_result = rf1->repr;
6534 :
6535 7 : if (dump_file && (dump_flags & TDF_DETAILS))
6536 : {
6537 0 : unsigned elt;
6538 0 : repeat_factor *rf;
6539 0 : fputs ("Multiplying by cached product ", dump_file);
6540 0 : for (elt = j; elt < vec_len; elt++)
6541 : {
6542 0 : rf = &repeat_factor_vec[elt];
6543 0 : print_generic_expr (dump_file, rf->factor);
6544 0 : if (elt < vec_len - 1)
6545 0 : fputs (" * ", dump_file);
6546 : }
6547 0 : fputs ("\n", dump_file);
6548 : }
6549 : }
6550 : else
6551 : {
6552 2 : if (INTEGRAL_TYPE_P (type))
6553 : {
6554 0 : gcc_assert (power > 1);
6555 0 : gimple_stmt_iterator gsip = gsi;
6556 0 : gsi_prev (&gsip);
6557 0 : iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6558 : rf1->repr, power);
6559 0 : gimple_stmt_iterator gsic = gsi;
6560 0 : while (gsi_stmt (gsic) != gsi_stmt (gsip))
6561 : {
6562 0 : gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6563 0 : gimple_set_visited (gsi_stmt (gsic), true);
6564 0 : gsi_prev (&gsic);
6565 : }
6566 : }
6567 : else
6568 : {
6569 2 : iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6570 2 : pow_stmt
6571 2 : = gimple_build_call (powi_fndecl, 2, rf1->repr,
6572 : build_int_cst (integer_type_node,
6573 2 : power));
6574 2 : gimple_call_set_lhs (pow_stmt, iter_result);
6575 2 : gimple_set_location (pow_stmt, gimple_location (stmt));
6576 2 : gimple_set_uid (pow_stmt, gimple_uid (stmt));
6577 2 : gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6578 : }
6579 :
6580 2 : if (dump_file && (dump_flags & TDF_DETAILS))
6581 : {
6582 0 : unsigned elt;
6583 0 : repeat_factor *rf;
6584 0 : fputs ("Building __builtin_pow call for cached product (",
6585 : dump_file);
6586 0 : for (elt = j; elt < vec_len; elt++)
6587 : {
6588 0 : rf = &repeat_factor_vec[elt];
6589 0 : print_generic_expr (dump_file, rf->factor);
6590 0 : if (elt < vec_len - 1)
6591 0 : fputs (" * ", dump_file);
6592 : }
6593 0 : fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6594 : power);
6595 : }
6596 : }
6597 : }
6598 : else
6599 : {
6600 : /* Otherwise, find the first factor in the repeated factor
6601 : vector whose occurrence count is at least 2. If no such
6602 : factor exists, there are no builtin_powi opportunities
6603 : remaining. */
6604 1023814 : FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6605 : {
6606 544024 : if (rf1->count >= 2)
6607 : break;
6608 : }
6609 :
6610 481513 : if (j >= vec_len)
6611 : break;
6612 :
6613 1723 : power = rf1->count;
6614 :
6615 1723 : if (dump_file && (dump_flags & TDF_DETAILS))
6616 : {
6617 0 : unsigned elt;
6618 0 : repeat_factor *rf;
6619 0 : fputs ("Building __builtin_pow call for (", dump_file);
6620 0 : for (elt = j; elt < vec_len; elt++)
6621 : {
6622 0 : rf = &repeat_factor_vec[elt];
6623 0 : print_generic_expr (dump_file, rf->factor);
6624 0 : if (elt < vec_len - 1)
6625 0 : fputs (" * ", dump_file);
6626 : }
6627 0 : fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6628 : }
6629 :
6630 1723 : reassociate_stats.pows_created++;
6631 :
6632 : /* Visit each element of the vector in reverse order (so that
6633 : high-occurrence elements are visited first, and within the
6634 : same occurrence count, lower-ranked elements are visited
6635 : first). Form a linear product of all elements in this order
6636 : whose occurrencce count is at least that of element J.
6637 : Record the SSA name representing the product of each element
6638 : with all subsequent elements in the vector. */
6639 1723 : if (j == vec_len - 1)
6640 1702 : rf1->repr = rf1->factor;
6641 : else
6642 : {
6643 50 : for (ii = vec_len - 2; ii >= (int)j; ii--)
6644 : {
6645 29 : tree op1, op2;
6646 :
6647 29 : rf1 = &repeat_factor_vec[ii];
6648 29 : rf2 = &repeat_factor_vec[ii + 1];
6649 :
6650 : /* Init the last factor's representative to be itself. */
6651 29 : if (!rf2->repr)
6652 21 : rf2->repr = rf2->factor;
6653 :
6654 29 : op1 = rf1->factor;
6655 29 : op2 = rf2->repr;
6656 :
6657 29 : target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6658 29 : mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6659 : op1, op2);
6660 29 : gimple_set_location (mul_stmt, gimple_location (stmt));
6661 29 : gimple_set_uid (mul_stmt, gimple_uid (stmt));
6662 29 : gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6663 29 : rf1->repr = target_ssa;
6664 :
6665 : /* Don't reprocess the multiply we just introduced. */
6666 29 : gimple_set_visited (mul_stmt, true);
6667 : }
6668 : }
6669 :
6670 : /* Form a call to __builtin_powi for the maximum product
6671 : just formed, raised to the power obtained earlier. */
6672 1723 : rf1 = &repeat_factor_vec[j];
6673 1723 : if (INTEGRAL_TYPE_P (type))
6674 : {
6675 1161 : gcc_assert (power > 1);
6676 1161 : gimple_stmt_iterator gsip = gsi;
6677 1161 : gsi_prev (&gsip);
6678 1161 : iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6679 : rf1->repr, power);
6680 1161 : gimple_stmt_iterator gsic = gsi;
6681 1161 : while (gsi_stmt (gsic) != gsi_stmt (gsip))
6682 : {
6683 2357 : gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6684 2357 : gimple_set_visited (gsi_stmt (gsic), true);
6685 3518 : gsi_prev (&gsic);
6686 : }
6687 : }
6688 : else
6689 : {
6690 562 : iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6691 562 : pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6692 : build_int_cst (integer_type_node,
6693 562 : power));
6694 562 : gimple_call_set_lhs (pow_stmt, iter_result);
6695 562 : gimple_set_location (pow_stmt, gimple_location (stmt));
6696 562 : gimple_set_uid (pow_stmt, gimple_uid (stmt));
6697 562 : gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6698 : }
6699 : }
6700 :
6701 : /* If we previously formed at least one other builtin_powi call,
6702 : form the product of this one and those others. */
6703 1732 : if (result)
6704 : {
6705 9 : tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6706 9 : mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6707 : result, iter_result);
6708 9 : gimple_set_location (mul_stmt, gimple_location (stmt));
6709 9 : gimple_set_uid (mul_stmt, gimple_uid (stmt));
6710 9 : gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6711 9 : gimple_set_visited (mul_stmt, true);
6712 9 : result = new_result;
6713 : }
6714 : else
6715 : result = iter_result;
6716 :
6717 : /* Decrement the occurrence count of each element in the product
6718 : by the count found above, and remove this many copies of each
6719 : factor from OPS. */
6720 3498 : for (i = j; i < vec_len; i++)
6721 : {
6722 1766 : unsigned k = power;
6723 1766 : unsigned n;
6724 :
6725 1766 : rf1 = &repeat_factor_vec[i];
6726 1766 : rf1->count -= power;
6727 :
6728 8461 : FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6729 : {
6730 4929 : if (oe->op == rf1->factor)
6731 : {
6732 4518 : if (oe->count <= k)
6733 : {
6734 4512 : ops->ordered_remove (n);
6735 4512 : k -= oe->count;
6736 :
6737 4512 : if (k == 0)
6738 : break;
6739 : }
6740 : else
6741 : {
6742 6 : oe->count -= k;
6743 6 : break;
6744 : }
6745 : }
6746 : }
6747 : }
6748 : }
6749 :
6750 : /* At this point all elements in the repeated factor vector have a
6751 : remaining occurrence count of 0 or 1, and those with a count of 1
6752 : don't have cached representatives. Re-sort the ops vector and
6753 : clean up. */
6754 479790 : ops->qsort (sort_by_operand_rank);
6755 479790 : repeat_factor_vec.release ();
6756 :
6757 : /* Return the final product computed herein. Note that there may
6758 : still be some elements with single occurrence count left in OPS;
6759 : those will be handled by the normal reassociation logic. */
6760 479790 : return result;
6761 : }
6762 :
6763 : /* Attempt to optimize
6764 : CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6765 : CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6766 :
6767 : static void
6768 1071665 : attempt_builtin_copysign (vec<operand_entry *> *ops)
6769 : {
6770 1071665 : operand_entry *oe;
6771 1071665 : unsigned int i;
6772 1071665 : unsigned int length = ops->length ();
6773 1071665 : tree cst = ops->last ()->op;
6774 :
6775 1071665 : if (length == 1 || TREE_CODE (cst) != REAL_CST)
6776 : return;
6777 :
6778 4125 : FOR_EACH_VEC_ELT (*ops, i, oe)
6779 : {
6780 2940 : if (TREE_CODE (oe->op) == SSA_NAME
6781 2940 : && has_single_use (oe->op))
6782 : {
6783 866 : gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6784 2978 : if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6785 : {
6786 54 : tree arg0, arg1;
6787 54 : switch (gimple_call_combined_fn (old_call))
6788 : {
6789 20 : CASE_CFN_COPYSIGN:
6790 20 : CASE_CFN_COPYSIGN_FN:
6791 20 : arg0 = gimple_call_arg (old_call, 0);
6792 20 : arg1 = gimple_call_arg (old_call, 1);
6793 : /* The first argument of copysign must be a constant,
6794 : otherwise there's nothing to do. */
6795 20 : if (TREE_CODE (arg0) == REAL_CST)
6796 : {
6797 20 : tree type = TREE_TYPE (arg0);
6798 20 : tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6799 : /* If we couldn't fold to a single constant, skip it.
6800 : That happens e.g. for inexact multiplication when
6801 : -frounding-math. */
6802 20 : if (mul == NULL_TREE)
6803 : break;
6804 : /* Instead of adjusting OLD_CALL, let's build a new
6805 : call to not leak the LHS and prevent keeping bogus
6806 : debug statements. DCE will clean up the old call. */
6807 16 : gcall *new_call;
6808 16 : if (gimple_call_internal_p (old_call))
6809 0 : new_call = gimple_build_call_internal
6810 0 : (IFN_COPYSIGN, 2, mul, arg1);
6811 : else
6812 16 : new_call = gimple_build_call
6813 16 : (gimple_call_fndecl (old_call), 2, mul, arg1);
6814 16 : tree lhs = make_ssa_name (type);
6815 16 : gimple_call_set_lhs (new_call, lhs);
6816 16 : gimple_set_location (new_call,
6817 : gimple_location (old_call));
6818 16 : insert_stmt_after (new_call, old_call);
6819 : /* We've used the constant, get rid of it. */
6820 16 : ops->pop ();
6821 16 : bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6822 : /* Handle the CST1 < 0 case by negating the result. */
6823 16 : if (cst1_neg)
6824 : {
6825 7 : tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6826 7 : gimple *negate_stmt
6827 7 : = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6828 7 : insert_stmt_after (negate_stmt, new_call);
6829 7 : oe->op = negrhs;
6830 : }
6831 : else
6832 9 : oe->op = lhs;
6833 16 : if (dump_file && (dump_flags & TDF_DETAILS))
6834 : {
6835 14 : fprintf (dump_file, "Optimizing copysign: ");
6836 14 : print_generic_expr (dump_file, cst);
6837 14 : fprintf (dump_file, " * COPYSIGN (");
6838 14 : print_generic_expr (dump_file, arg0);
6839 14 : fprintf (dump_file, ", ");
6840 14 : print_generic_expr (dump_file, arg1);
6841 23 : fprintf (dump_file, ") into %sCOPYSIGN (",
6842 : cst1_neg ? "-" : "");
6843 14 : print_generic_expr (dump_file, mul);
6844 14 : fprintf (dump_file, ", ");
6845 14 : print_generic_expr (dump_file, arg1);
6846 14 : fprintf (dump_file, "\n");
6847 : }
6848 16 : return;
6849 : }
6850 : break;
6851 : default:
6852 : break;
6853 : }
6854 : }
6855 : }
6856 : }
6857 : }
6858 :
6859 : /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6860 :
6861 : static void
6862 14286 : transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6863 : {
6864 14286 : tree rhs1;
6865 :
6866 14286 : if (dump_file && (dump_flags & TDF_DETAILS))
6867 : {
6868 28 : fprintf (dump_file, "Transforming ");
6869 28 : print_gimple_stmt (dump_file, stmt, 0);
6870 : }
6871 :
6872 14286 : rhs1 = gimple_assign_rhs1 (stmt);
6873 14286 : gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6874 14286 : update_stmt (stmt);
6875 14286 : remove_visited_stmt_chain (rhs1);
6876 :
6877 14286 : if (dump_file && (dump_flags & TDF_DETAILS))
6878 : {
6879 28 : fprintf (dump_file, " into ");
6880 28 : print_gimple_stmt (dump_file, stmt, 0);
6881 : }
6882 14286 : }
6883 :
6884 : /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6885 :
6886 : static void
6887 190 : transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6888 : tree rhs1, tree rhs2)
6889 : {
6890 190 : if (dump_file && (dump_flags & TDF_DETAILS))
6891 : {
6892 0 : fprintf (dump_file, "Transforming ");
6893 0 : print_gimple_stmt (dump_file, stmt, 0);
6894 : }
6895 :
6896 190 : gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6897 190 : update_stmt (gsi_stmt (*gsi));
6898 190 : remove_visited_stmt_chain (rhs1);
6899 :
6900 190 : if (dump_file && (dump_flags & TDF_DETAILS))
6901 : {
6902 0 : fprintf (dump_file, " into ");
6903 0 : print_gimple_stmt (dump_file, stmt, 0);
6904 : }
6905 190 : }
6906 :
6907 : /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs.
6908 : Put no-mult ops and mult ops alternately at the end of the queue, which is
6909 : conducive to generating more FMA and reducing the loss of FMA when breaking
6910 : the chain.
6911 : E.g.
6912 : a * b + c * d + e generates:
6913 :
6914 : _4 = c_9(D) * d_10(D);
6915 : _12 = .FMA (a_7(D), b_8(D), _4);
6916 : _11 = e_6(D) + _12;
6917 :
6918 : Rearrange ops to -> e + a * b + c * d generates:
6919 :
6920 : _4 = .FMA (c_7(D), d_8(D), _3);
6921 : _11 = .FMA (a_5(D), b_6(D), _4);
6922 :
6923 : Return the number of MULT_EXPRs in the chain. */
6924 : static int
6925 16742 : rank_ops_for_fma (vec<operand_entry *> *ops)
6926 : {
6927 16742 : operand_entry *oe;
6928 16742 : unsigned int i;
6929 16742 : unsigned int ops_length = ops->length ();
6930 16742 : auto_vec<operand_entry *> ops_mult;
6931 16742 : auto_vec<operand_entry *> ops_others;
6932 :
6933 55897 : FOR_EACH_VEC_ELT (*ops, i, oe)
6934 : {
6935 39155 : if (TREE_CODE (oe->op) == SSA_NAME)
6936 : {
6937 39140 : gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6938 39140 : if (is_gimple_assign (def_stmt))
6939 : {
6940 28004 : if (gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
6941 13400 : ops_mult.safe_push (oe);
6942 : /* A negate on the multiplication leads to FNMA. */
6943 14604 : else if (gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
6944 14604 : && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
6945 : {
6946 2801 : gimple *neg_def_stmt
6947 2801 : = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
6948 2801 : if (is_gimple_assign (neg_def_stmt)
6949 2798 : && gimple_bb (neg_def_stmt) == gimple_bb (def_stmt)
6950 5594 : && gimple_assign_rhs_code (neg_def_stmt) == MULT_EXPR)
6951 2752 : ops_mult.safe_push (oe);
6952 : else
6953 49 : ops_others.safe_push (oe);
6954 : }
6955 : else
6956 11803 : ops_others.safe_push (oe);
6957 : }
6958 : else
6959 11136 : ops_others.safe_push (oe);
6960 : }
6961 : else
6962 15 : ops_others.safe_push (oe);
6963 : }
6964 : /* 1. When ops_mult.length == 2, like the following case,
6965 :
6966 : a * b + c * d + e.
6967 :
6968 : we need to rearrange the ops.
6969 :
6970 : Putting ops that not def from mult in front can generate more FMAs.
6971 :
6972 : 2. If all ops are defined with mult, we don't need to rearrange them. */
6973 16742 : unsigned mult_num = ops_mult.length ();
6974 16742 : if (mult_num >= 2 && mult_num != ops_length)
6975 : {
6976 : /* Put no-mult ops and mult ops alternately at the end of the
6977 : queue, which is conducive to generating more FMA and reducing the
6978 : loss of FMA when breaking the chain. */
6979 5412 : ops->truncate (0);
6980 5412 : ops->splice (ops_mult);
6981 5412 : int j, opindex = ops->length ();
6982 5412 : int others_length = ops_others.length ();
6983 10829 : for (j = 0; j < others_length; j++)
6984 : {
6985 5417 : oe = ops_others.pop ();
6986 5417 : ops->quick_insert (opindex, oe);
6987 5417 : if (opindex > 0)
6988 5416 : opindex--;
6989 : }
6990 : }
6991 16742 : return mult_num;
6992 16742 : }
6993 : /* Reassociate expressions in basic block BB and its post-dominator as
6994 : children.
6995 :
6996 : Bubble up return status from maybe_optimize_range_tests. */
6997 :
6998 : static bool
6999 19511519 : reassociate_bb (basic_block bb)
7000 : {
7001 19511519 : gimple_stmt_iterator gsi;
7002 19511519 : gimple *stmt = last_nondebug_stmt (bb);
7003 19511519 : bool cfg_cleanup_needed = false;
7004 :
7005 19511519 : if (stmt && !gimple_visited_p (stmt))
7006 18884054 : cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
7007 :
7008 19511519 : bool do_prev = false;
7009 39023038 : for (gsi = gsi_last_bb (bb);
7010 188491349 : !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
7011 : {
7012 168979830 : do_prev = true;
7013 168979830 : stmt = gsi_stmt (gsi);
7014 :
7015 168979830 : if (is_gimple_assign (stmt)
7016 168979830 : && !stmt_could_throw_p (cfun, stmt))
7017 : {
7018 44952771 : tree lhs, rhs1, rhs2;
7019 44952771 : enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
7020 :
7021 : /* If this was part of an already processed statement,
7022 : we don't need to touch it again. */
7023 44952771 : if (gimple_visited_p (stmt))
7024 : {
7025 : /* This statement might have become dead because of previous
7026 : reassociations. */
7027 430997 : if (has_zero_uses (gimple_get_lhs (stmt)))
7028 : {
7029 141089 : reassoc_remove_stmt (&gsi);
7030 141089 : release_defs (stmt);
7031 : /* We might end up removing the last stmt above which
7032 : places the iterator to the end of the sequence.
7033 : Reset it to the last stmt in this case and make sure
7034 : we don't do gsi_prev in that case. */
7035 141089 : if (gsi_end_p (gsi))
7036 : {
7037 418 : gsi = gsi_last_bb (bb);
7038 418 : do_prev = false;
7039 : }
7040 : }
7041 430997 : continue;
7042 : }
7043 :
7044 : /* If this is not a gimple binary expression, there is
7045 : nothing for us to do with it. */
7046 44521774 : if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
7047 32919415 : continue;
7048 :
7049 11602359 : lhs = gimple_assign_lhs (stmt);
7050 11602359 : rhs1 = gimple_assign_rhs1 (stmt);
7051 11602359 : rhs2 = gimple_assign_rhs2 (stmt);
7052 :
7053 : /* For non-bit or min/max operations we can't associate
7054 : all types. Verify that here. */
7055 17007740 : if ((rhs_code != BIT_IOR_EXPR
7056 11602359 : && rhs_code != BIT_AND_EXPR
7057 10621637 : && rhs_code != BIT_XOR_EXPR
7058 10621637 : && rhs_code != MIN_EXPR
7059 10500857 : && rhs_code != MAX_EXPR
7060 10391155 : && !can_reassociate_type_p (TREE_TYPE (lhs)))
7061 6201457 : || !can_reassociate_op_p (rhs1)
7062 17800853 : || !can_reassociate_op_p (rhs2))
7063 5405381 : continue;
7064 :
7065 6196978 : if (associative_tree_code (rhs_code))
7066 : {
7067 4649852 : auto_vec<operand_entry *> ops;
7068 4649852 : tree powi_result = NULL_TREE;
7069 4649852 : bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
7070 :
7071 : /* There may be no immediate uses left by the time we
7072 : get here because we may have eliminated them all. */
7073 4649852 : if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
7074 46805 : continue;
7075 :
7076 4603047 : gimple_set_visited (stmt, true);
7077 4603047 : linearize_expr_tree (&ops, stmt, true, true);
7078 4603047 : ops.qsort (sort_by_operand_rank);
7079 4603047 : int orig_len = ops.length ();
7080 4603047 : optimize_ops_list (rhs_code, &ops);
7081 9206094 : if (undistribute_ops_list (rhs_code, &ops,
7082 : loop_containing_stmt (stmt)))
7083 : {
7084 258 : ops.qsort (sort_by_operand_rank);
7085 258 : optimize_ops_list (rhs_code, &ops);
7086 : }
7087 9206094 : if (undistribute_bitref_for_vector (rhs_code, &ops,
7088 : loop_containing_stmt (stmt)))
7089 : {
7090 40 : ops.qsort (sort_by_operand_rank);
7091 40 : optimize_ops_list (rhs_code, &ops);
7092 : }
7093 4603047 : if (rhs_code == PLUS_EXPR
7094 4603047 : && transform_add_to_multiply (&ops))
7095 73 : ops.qsort (sort_by_operand_rank);
7096 :
7097 4603047 : if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
7098 : {
7099 965508 : if (is_vector)
7100 23257 : optimize_vec_cond_expr (rhs_code, &ops);
7101 : else
7102 942251 : optimize_range_tests (rhs_code, &ops, NULL);
7103 : }
7104 :
7105 4603047 : if (rhs_code == MULT_EXPR && !is_vector)
7106 : {
7107 1071665 : attempt_builtin_copysign (&ops);
7108 :
7109 1071665 : if (reassoc_insert_powi_p
7110 1071665 : && (flag_unsafe_math_optimizations
7111 440196 : || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
7112 483446 : powi_result = attempt_builtin_powi (stmt, &ops);
7113 : }
7114 :
7115 4603047 : operand_entry *last;
7116 4603047 : bool negate_result = false;
7117 4603047 : if (ops.length () > 1
7118 4603047 : && rhs_code == MULT_EXPR)
7119 : {
7120 1094665 : last = ops.last ();
7121 1094665 : if ((integer_minus_onep (last->op)
7122 1094467 : || real_minus_onep (last->op))
7123 226 : && !HONOR_SNANS (TREE_TYPE (lhs))
7124 1094891 : && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
7125 0 : || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
7126 : {
7127 226 : ops.pop ();
7128 226 : negate_result = true;
7129 : }
7130 : }
7131 :
7132 4603047 : tree new_lhs = lhs;
7133 : /* If the operand vector is now empty, all operands were
7134 : consumed by the __builtin_powi optimization. */
7135 4603047 : if (ops.length () == 0)
7136 1404 : transform_stmt_to_copy (&gsi, stmt, powi_result);
7137 4601643 : else if (ops.length () == 1)
7138 : {
7139 13072 : tree last_op = ops.last ()->op;
7140 :
7141 : /* If the stmt that defines operand has to be inserted, insert it
7142 : before the use. */
7143 13072 : if (ops.last ()->stmt_to_insert)
7144 0 : insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
7145 13072 : if (powi_result)
7146 190 : transform_stmt_to_multiply (&gsi, stmt, last_op,
7147 : powi_result);
7148 : else
7149 12882 : transform_stmt_to_copy (&gsi, stmt, last_op);
7150 : }
7151 : else
7152 : {
7153 4588571 : machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
7154 4588571 : int ops_num = ops.length ();
7155 4588571 : int width = 0;
7156 4588571 : int mult_num = 0;
7157 :
7158 : /* For binary bit operations, if there are at least 3
7159 : operands and the last operand in OPS is a constant,
7160 : move it to the front. This helps ensure that we generate
7161 : (X & Y) & C rather than (X & C) & Y. The former will
7162 : often match a canonical bit test when we get to RTL. */
7163 4588571 : if (ops.length () > 2
7164 187291 : && (rhs_code == BIT_AND_EXPR
7165 : || rhs_code == BIT_IOR_EXPR
7166 166467 : || rhs_code == BIT_XOR_EXPR)
7167 4613366 : && TREE_CODE (ops.last ()->op) == INTEGER_CST)
7168 1598 : std::swap (*ops[0], *ops[ops_num - 1]);
7169 :
7170 4588571 : optimization_type opt_type = bb_optimization_type (bb);
7171 :
7172 : /* If the target support FMA, rank_ops_for_fma will detect if
7173 : the chain has fmas and rearrange the ops if so. */
7174 4588571 : if (!reassoc_insert_powi_p
7175 2813477 : && direct_internal_fn_supported_p (IFN_FMA,
7176 2813477 : TREE_TYPE (lhs),
7177 : opt_type)
7178 4623117 : && (rhs_code == PLUS_EXPR || rhs_code == MINUS_EXPR))
7179 : {
7180 16742 : mult_num = rank_ops_for_fma (&ops);
7181 : }
7182 :
7183 : /* Only rewrite the expression tree to parallel in the
7184 : last reassoc pass to avoid useless work back-and-forth
7185 : with initial linearization. */
7186 4588571 : bool has_fma = mult_num >= 2 && mult_num != ops_num;
7187 4588571 : if (!reassoc_insert_powi_p
7188 2813477 : && ops.length () > 3
7189 4604687 : && (width = get_reassociation_width (&ops, mult_num, lhs,
7190 : rhs_code, mode))
7191 : > 1)
7192 : {
7193 1757 : if (dump_file && (dump_flags & TDF_DETAILS))
7194 2 : fprintf (dump_file,
7195 : "Width = %d was chosen for reassociation\n",
7196 : width);
7197 1757 : rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
7198 : width,
7199 : has_fma,
7200 : ops);
7201 : }
7202 : else
7203 : {
7204 : /* When there are three operands left, we want
7205 : to make sure the ones that get the double
7206 : binary op are chosen wisely. */
7207 4586814 : int len = ops.length ();
7208 4586814 : if (!reassoc_insert_powi_p
7209 2811720 : && len >= 3
7210 4717513 : && (!has_fma
7211 : /* width > 1 means ranking ops results in better
7212 : parallelism. Check current value to avoid
7213 : calling get_reassociation_width again. */
7214 5408 : || (width != 1
7215 5408 : && get_reassociation_width (
7216 : &ops, mult_num, lhs, rhs_code, mode)
7217 : > 1)))
7218 127534 : swap_ops_for_binary_stmt (ops, len - 3);
7219 :
7220 4586814 : new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
7221 4586814 : powi_result != NULL
7222 4586814 : || negate_result,
7223 : len != orig_len);
7224 : }
7225 :
7226 : /* If we combined some repeated factors into a
7227 : __builtin_powi call, multiply that result by the
7228 : reassociated operands. */
7229 4588571 : if (powi_result)
7230 : {
7231 129 : gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
7232 129 : tree type = TREE_TYPE (lhs);
7233 129 : tree target_ssa = make_temp_ssa_name (type, NULL,
7234 : "reassocpow");
7235 129 : gimple_set_lhs (lhs_stmt, target_ssa);
7236 129 : update_stmt (lhs_stmt);
7237 129 : if (lhs != new_lhs)
7238 : {
7239 129 : target_ssa = new_lhs;
7240 129 : new_lhs = lhs;
7241 : }
7242 129 : mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
7243 : powi_result, target_ssa);
7244 129 : gimple_set_location (mul_stmt, gimple_location (stmt));
7245 129 : gimple_set_uid (mul_stmt, gimple_uid (stmt));
7246 129 : gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
7247 : }
7248 : }
7249 :
7250 4603047 : if (negate_result)
7251 : {
7252 226 : stmt = SSA_NAME_DEF_STMT (lhs);
7253 226 : tree tmp = make_ssa_name (TREE_TYPE (lhs));
7254 226 : gimple_set_lhs (stmt, tmp);
7255 226 : if (lhs != new_lhs)
7256 216 : tmp = new_lhs;
7257 226 : gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
7258 : tmp);
7259 226 : gimple_set_uid (neg_stmt, gimple_uid (stmt));
7260 226 : gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
7261 226 : update_stmt (stmt);
7262 : }
7263 4649852 : }
7264 : }
7265 : }
7266 :
7267 19511519 : return cfg_cleanup_needed;
7268 : }
7269 :
7270 : /* Add jumps around shifts for range tests turned into bit tests.
7271 : For each SSA_NAME VAR we have code like:
7272 : VAR = ...; // final stmt of range comparison
7273 : // bit test here...;
7274 : OTHERVAR = ...; // final stmt of the bit test sequence
7275 : RES = VAR | OTHERVAR;
7276 : Turn the above into:
7277 : VAR = ...;
7278 : if (VAR != 0)
7279 : goto <l3>;
7280 : else
7281 : goto <l2>;
7282 : <l2>:
7283 : // bit test here...;
7284 : OTHERVAR = ...;
7285 : <l3>:
7286 : # RES = PHI<1(l1), OTHERVAR(l2)>; */
7287 :
7288 : static void
7289 2082662 : branch_fixup (void)
7290 : {
7291 2082662 : tree var;
7292 2082662 : unsigned int i;
7293 :
7294 2083027 : FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
7295 : {
7296 365 : gimple *def_stmt = SSA_NAME_DEF_STMT (var);
7297 365 : gimple *use_stmt;
7298 365 : use_operand_p use;
7299 365 : bool ok = single_imm_use (var, &use, &use_stmt);
7300 365 : gcc_assert (ok
7301 : && is_gimple_assign (use_stmt)
7302 : && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
7303 : && gimple_bb (def_stmt) == gimple_bb (use_stmt));
7304 :
7305 365 : basic_block cond_bb = gimple_bb (def_stmt);
7306 365 : basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
7307 365 : basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
7308 :
7309 365 : gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7310 365 : gimple *g = gimple_build_cond (NE_EXPR, var,
7311 365 : build_zero_cst (TREE_TYPE (var)),
7312 : NULL_TREE, NULL_TREE);
7313 365 : location_t loc = gimple_location (use_stmt);
7314 365 : gimple_set_location (g, loc);
7315 365 : gsi_insert_after (&gsi, g, GSI_NEW_STMT);
7316 :
7317 365 : edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
7318 365 : etrue->probability = profile_probability::even ();
7319 365 : edge efalse = find_edge (cond_bb, then_bb);
7320 365 : efalse->flags = EDGE_FALSE_VALUE;
7321 365 : efalse->probability -= etrue->probability;
7322 365 : then_bb->count -= etrue->count ();
7323 :
7324 365 : tree othervar = NULL_TREE;
7325 365 : if (gimple_assign_rhs1 (use_stmt) == var)
7326 250 : othervar = gimple_assign_rhs2 (use_stmt);
7327 115 : else if (gimple_assign_rhs2 (use_stmt) == var)
7328 : othervar = gimple_assign_rhs1 (use_stmt);
7329 : else
7330 0 : gcc_unreachable ();
7331 365 : tree lhs = gimple_assign_lhs (use_stmt);
7332 365 : gphi *phi = create_phi_node (lhs, merge_bb);
7333 365 : add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
7334 365 : add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
7335 365 : gsi = gsi_for_stmt (use_stmt);
7336 365 : gsi_remove (&gsi, true);
7337 :
7338 365 : set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
7339 365 : set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
7340 : }
7341 2082662 : reassoc_branch_fixups.release ();
7342 2082662 : }
7343 :
7344 : void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
7345 : void debug_ops_vector (vec<operand_entry *> ops);
7346 :
7347 : /* Dump the operand entry vector OPS to FILE. */
7348 :
7349 : void
7350 0 : dump_ops_vector (FILE *file, vec<operand_entry *> ops)
7351 : {
7352 0 : operand_entry *oe;
7353 0 : unsigned int i;
7354 :
7355 0 : FOR_EACH_VEC_ELT (ops, i, oe)
7356 : {
7357 0 : fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
7358 0 : print_generic_expr (file, oe->op);
7359 0 : fprintf (file, "\n");
7360 : }
7361 0 : }
7362 :
7363 : /* Dump the operand entry vector OPS to STDERR. */
7364 :
7365 : DEBUG_FUNCTION void
7366 0 : debug_ops_vector (vec<operand_entry *> ops)
7367 : {
7368 0 : dump_ops_vector (stderr, ops);
7369 0 : }
7370 :
7371 : /* Bubble up return status from reassociate_bb. */
7372 :
7373 : static bool
7374 2082662 : do_reassoc ()
7375 : {
7376 2082662 : bool cfg_cleanup_needed = false;
7377 2082662 : basic_block *worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
7378 :
7379 2082662 : unsigned sp = 0;
7380 2082662 : for (auto son = first_dom_son (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
7381 4165324 : son; son = next_dom_son (CDI_DOMINATORS, son))
7382 2082662 : worklist[sp++] = son;
7383 21594245 : while (sp)
7384 : {
7385 19511583 : basic_block bb = worklist[--sp];
7386 19511583 : break_up_subtract_bb (bb);
7387 19511583 : for (auto son = first_dom_son (CDI_DOMINATORS, bb);
7388 36940504 : son; son = next_dom_son (CDI_DOMINATORS, son))
7389 17428921 : worklist[sp++] = son;
7390 : }
7391 :
7392 10828996 : for (auto son = first_dom_son (CDI_POST_DOMINATORS,
7393 2082662 : EXIT_BLOCK_PTR_FOR_FN (cfun));
7394 10828996 : son; son = next_dom_son (CDI_POST_DOMINATORS, son))
7395 8746334 : worklist[sp++] = son;
7396 21594181 : while (sp)
7397 : {
7398 19511519 : basic_block bb = worklist[--sp];
7399 19511519 : cfg_cleanup_needed |= reassociate_bb (bb);
7400 19511519 : for (auto son = first_dom_son (CDI_POST_DOMINATORS, bb);
7401 30276704 : son; son = next_dom_son (CDI_POST_DOMINATORS, son))
7402 10765185 : worklist[sp++] = son;
7403 : }
7404 :
7405 2082662 : free (worklist);
7406 2082662 : return cfg_cleanup_needed;
7407 : }
7408 :
7409 : /* Initialize the reassociation pass. */
7410 :
7411 : static void
7412 2082662 : init_reassoc (void)
7413 : {
7414 2082662 : int i;
7415 2082662 : int64_t rank = 2;
7416 2082662 : int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
7417 :
7418 : /* Find the loops, so that we can prevent moving calculations in
7419 : them. */
7420 2082662 : loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
7421 :
7422 2082662 : memset (&reassociate_stats, 0, sizeof (reassociate_stats));
7423 :
7424 2082662 : next_operand_entry_id = 0;
7425 :
7426 : /* Reverse RPO (Reverse Post Order) will give us something where
7427 : deeper loops come later. */
7428 2082662 : pre_and_rev_post_order_compute (NULL, bbs, false);
7429 2082662 : bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
7430 2082662 : operand_rank = new hash_map<tree, int64_t>;
7431 :
7432 : /* Give each default definition a distinct rank. This includes
7433 : parameters and the static chain. Walk backwards over all
7434 : SSA names so that we get proper rank ordering according
7435 : to tree_swap_operands_p. */
7436 108912319 : for (i = num_ssa_names - 1; i > 0; --i)
7437 : {
7438 104746995 : tree name = ssa_name (i);
7439 178935749 : if (name && SSA_NAME_IS_DEFAULT_DEF (name))
7440 6142881 : insert_operand_rank (name, ++rank);
7441 : }
7442 :
7443 : /* Set up rank for each BB */
7444 21594181 : for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
7445 19511519 : bb_rank[bbs[i]] = ++rank << 16;
7446 :
7447 2082662 : free (bbs);
7448 2082662 : calculate_dominance_info (CDI_POST_DOMINATORS);
7449 2082662 : plus_negates = vNULL;
7450 2082662 : mark_ssa_maybe_undefs ();
7451 2082662 : }
7452 :
7453 : /* Cleanup after the reassociation pass, and print stats if
7454 : requested. */
7455 :
7456 : static void
7457 2082662 : fini_reassoc (void)
7458 : {
7459 2082662 : statistics_counter_event (cfun, "Linearized",
7460 : reassociate_stats.linearized);
7461 2082662 : statistics_counter_event (cfun, "Constants eliminated",
7462 : reassociate_stats.constants_eliminated);
7463 2082662 : statistics_counter_event (cfun, "Ops eliminated",
7464 : reassociate_stats.ops_eliminated);
7465 2082662 : statistics_counter_event (cfun, "Statements rewritten",
7466 : reassociate_stats.rewritten);
7467 2082662 : statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
7468 : reassociate_stats.pows_encountered);
7469 2082662 : statistics_counter_event (cfun, "Built-in powi calls created",
7470 : reassociate_stats.pows_created);
7471 :
7472 4165324 : delete operand_rank;
7473 2082662 : bitmap_clear (biased_names);
7474 2082662 : operand_entry_pool.release ();
7475 2082662 : free (bb_rank);
7476 2082662 : plus_negates.release ();
7477 2082662 : free_dominance_info (CDI_POST_DOMINATORS);
7478 2082662 : loop_optimizer_finalize ();
7479 2082662 : }
7480 :
7481 : /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7482 : insertion of __builtin_powi calls.
7483 :
7484 : Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7485 : optimization of a gimple conditional. Otherwise returns zero. */
7486 :
7487 : static unsigned int
7488 2082662 : execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
7489 : {
7490 2082662 : reassoc_insert_powi_p = insert_powi_p;
7491 2082662 : reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
7492 :
7493 2082662 : init_reassoc ();
7494 :
7495 2082662 : bool cfg_cleanup_needed;
7496 2082662 : cfg_cleanup_needed = do_reassoc ();
7497 2082662 : repropagate_negates ();
7498 2082662 : branch_fixup ();
7499 :
7500 2082662 : fini_reassoc ();
7501 2082662 : return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7502 : }
7503 :
7504 : namespace {
7505 :
7506 : const pass_data pass_data_reassoc =
7507 : {
7508 : GIMPLE_PASS, /* type */
7509 : "reassoc", /* name */
7510 : OPTGROUP_NONE, /* optinfo_flags */
7511 : TV_TREE_REASSOC, /* tv_id */
7512 : ( PROP_cfg | PROP_ssa ), /* properties_required */
7513 : 0, /* properties_provided */
7514 : 0, /* properties_destroyed */
7515 : 0, /* todo_flags_start */
7516 : TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7517 : };
7518 :
7519 : class pass_reassoc : public gimple_opt_pass
7520 : {
7521 : public:
7522 571444 : pass_reassoc (gcc::context *ctxt)
7523 1142888 : : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7524 : {}
7525 :
7526 : /* opt_pass methods: */
7527 285722 : opt_pass * clone () final override { return new pass_reassoc (m_ctxt); }
7528 571444 : void set_pass_param (unsigned int n, bool param) final override
7529 : {
7530 571444 : gcc_assert (n == 0);
7531 571444 : insert_powi_p = param;
7532 571444 : bias_loop_carried_phi_ranks_p = !param;
7533 571444 : }
7534 2082968 : bool gate (function *) final override { return flag_tree_reassoc != 0; }
7535 2082662 : unsigned int execute (function *) final override
7536 : {
7537 2082662 : return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7538 : }
7539 :
7540 : private:
7541 : /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7542 : point 3a in the pass header comment. */
7543 : bool insert_powi_p;
7544 : bool bias_loop_carried_phi_ranks_p;
7545 : }; // class pass_reassoc
7546 :
7547 : } // anon namespace
7548 :
7549 : gimple_opt_pass *
7550 285722 : make_pass_reassoc (gcc::context *ctxt)
7551 : {
7552 285722 : return new pass_reassoc (ctxt);
7553 : }
|