Line data Source code
1 : /* Reassociation for trees.
2 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
3 : Contributed by Daniel Berlin <dan@dberlin.org>
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify
8 : it under the terms of the GNU General Public License as published by
9 : the Free Software Foundation; either version 3, or (at your option)
10 : any later version.
11 :
12 : GCC is distributed in the hope that it will be useful,
13 : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : GNU General Public License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : #include "config.h"
22 : #include "system.h"
23 : #include "coretypes.h"
24 : #include "backend.h"
25 : #include "target.h"
26 : #include "rtl.h"
27 : #include "tree.h"
28 : #include "gimple.h"
29 : #include "cfghooks.h"
30 : #include "alloc-pool.h"
31 : #include "tree-pass.h"
32 : #include "memmodel.h"
33 : #include "tm_p.h"
34 : #include "ssa.h"
35 : #include "optabs-tree.h"
36 : #include "gimple-pretty-print.h"
37 : #include "diagnostic-core.h"
38 : #include "fold-const.h"
39 : #include "stor-layout.h"
40 : #include "cfganal.h"
41 : #include "gimple-iterator.h"
42 : #include "gimple-fold.h"
43 : #include "tree-eh.h"
44 : #include "gimplify-me.h"
45 : #include "tree-cfg.h"
46 : #include "tree-ssa-loop.h"
47 : #include "flags.h"
48 : #include "tree-ssa.h"
49 : #include "langhooks.h"
50 : #include "cfgloop.h"
51 : #include "builtins.h"
52 : #include "gimplify.h"
53 : #include "case-cfn-macros.h"
54 : #include "tree-ssa-reassoc.h"
55 : #include "tree-ssa-math-opts.h"
56 : #include "gimple-range.h"
57 : #include "internal-fn.h"
58 :
59 : /* This is a simple global reassociation pass. It is, in part, based
60 : on the LLVM pass of the same name (They do some things more/less
61 : than we do, in different orders, etc).
62 :
63 : It consists of five steps:
64 :
65 : 1. Breaking up subtract operations into addition + negate, where
66 : it would promote the reassociation of adds.
67 :
68 : 2. Left linearization of the expression trees, so that (A+B)+(C+D)
69 : becomes (((A+B)+C)+D), which is easier for us to rewrite later.
70 : During linearization, we place the operands of the binary
71 : expressions into a vector of operand_entry_*
72 :
73 : 3. Optimization of the operand lists, eliminating things like a +
74 : -a, a & a, etc.
75 :
76 : 3a. Combine repeated factors with the same occurrence counts
77 : into a __builtin_powi call that will later be optimized into
78 : an optimal number of multiplies.
79 :
80 : 4. Rewrite the expression trees we linearized and optimized so
81 : they are in proper rank order.
82 :
83 : 5. Repropagate negates, as nothing else will clean it up ATM.
84 :
85 : A bit of theory on #4, since nobody seems to write anything down
86 : about why it makes sense to do it the way they do it:
87 :
88 : We could do this much nicer theoretically, but don't (for reasons
89 : explained after how to do it theoretically nice :P).
90 :
91 : In order to promote the most redundancy elimination, you want
92 : binary expressions whose operands are the same rank (or
93 : preferably, the same value) exposed to the redundancy eliminator,
94 : for possible elimination.
95 :
96 : So the way to do this if we really cared, is to build the new op
97 : tree from the leaves to the roots, merging as you go, and putting the
98 : new op on the end of the worklist, until you are left with one
99 : thing on the worklist.
100 :
101 : IE if you have to rewrite the following set of operands (listed with
102 : rank in parentheses), with opcode PLUS_EXPR:
103 :
104 : a (1), b (1), c (1), d (2), e (2)
105 :
106 :
107 : We start with our merge worklist empty, and the ops list with all of
108 : those on it.
109 :
110 : You want to first merge all leaves of the same rank, as much as
111 : possible.
112 :
113 : So first build a binary op of
114 :
115 : mergetmp = a + b, and put "mergetmp" on the merge worklist.
116 :
117 : Because there is no three operand form of PLUS_EXPR, c is not going to
118 : be exposed to redundancy elimination as a rank 1 operand.
119 :
120 : So you might as well throw it on the merge worklist (you could also
121 : consider it to now be a rank two operand, and merge it with d and e,
122 : but in this case, you then have evicted e from a binary op. So at
123 : least in this situation, you can't win.)
124 :
125 : Then build a binary op of d + e
126 : mergetmp2 = d + e
127 :
128 : and put mergetmp2 on the merge worklist.
129 :
130 : so merge worklist = {mergetmp, c, mergetmp2}
131 :
132 : Continue building binary ops of these operations until you have only
133 : one operation left on the worklist.
134 :
135 : So we have
136 :
137 : build binary op
138 : mergetmp3 = mergetmp + c
139 :
140 : worklist = {mergetmp2, mergetmp3}
141 :
142 : mergetmp4 = mergetmp2 + mergetmp3
143 :
144 : worklist = {mergetmp4}
145 :
146 : because we have one operation left, we can now just set the original
147 : statement equal to the result of that operation.
148 :
149 : This will at least expose a + b and d + e to redundancy elimination
150 : as binary operations.
151 :
152 : For extra points, you can reuse the old statements to build the
153 : mergetmps, since you shouldn't run out.
154 :
155 : So why don't we do this?
156 :
157 : Because it's expensive, and rarely will help. Most trees we are
158 : reassociating have 3 or less ops. If they have 2 ops, they already
159 : will be written into a nice single binary op. If you have 3 ops, a
160 : single simple check suffices to tell you whether the first two are of the
161 : same rank. If so, you know to order it
162 :
163 : mergetmp = op1 + op2
164 : newstmt = mergetmp + op3
165 :
166 : instead of
167 : mergetmp = op2 + op3
168 : newstmt = mergetmp + op1
169 :
170 : If all three are of the same rank, you can't expose them all in a
171 : single binary operator anyway, so the above is *still* the best you
172 : can do.
173 :
174 : Thus, this is what we do. When we have three ops left, we check to see
175 : what order to put them in, and call it a day. As a nod to vector sum
176 : reduction, we check if any of the ops are really a phi node that is a
177 : destructive update for the associating op, and keep the destructive
178 : update together for vector sum reduction recognition. */
179 :
180 : /* Enable insertion of __builtin_powi calls during execute_reassoc. See
181 : point 3a in the pass header comment. */
182 : static bool reassoc_insert_powi_p;
183 :
184 : /* Enable biasing ranks of loop accumulators. We don't want this before
185 : vectorization, since it interferes with reduction chains. */
186 : static bool reassoc_bias_loop_carried_phi_ranks_p;
187 :
188 : /* Statistics */
189 : static struct
190 : {
191 : int linearized;
192 : int constants_eliminated;
193 : int ops_eliminated;
194 : int rewritten;
195 : int pows_encountered;
196 : int pows_created;
197 : } reassociate_stats;
198 :
199 :
200 : static object_allocator<operand_entry> operand_entry_pool
201 : ("operand entry pool");
202 :
203 : /* This is used to assign a unique ID to each struct operand_entry
204 : so that qsort results are identical on different hosts. */
205 : static unsigned int next_operand_entry_id;
206 :
207 : /* Starting rank number for a given basic block, so that we can rank
208 : operations using unmovable instructions in that BB based on the bb
209 : depth. */
210 : static int64_t *bb_rank;
211 :
212 : /* Operand->rank hashtable. */
213 : static hash_map<tree, int64_t> *operand_rank;
214 :
215 : /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
216 : biased. */
217 : static auto_bitmap biased_names;
218 :
219 : /* Vector of SSA_NAMEs on which after reassociate_bb is done with
220 : all basic blocks the CFG should be adjusted - basic blocks
221 : split right after that SSA_NAME's definition statement and before
222 : the only use, which must be a bit ior. */
223 : static vec<tree> reassoc_branch_fixups;
224 :
225 : /* Forward decls. */
226 : static int64_t get_rank (tree);
227 : static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
228 :
229 : /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
230 : possibly added by gsi_remove. */
231 :
232 : static bool
233 169210 : reassoc_remove_stmt (gimple_stmt_iterator *gsi)
234 : {
235 169210 : gimple *stmt = gsi_stmt (*gsi);
236 :
237 169210 : if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
238 72410 : return gsi_remove (gsi, true);
239 :
240 96800 : gimple_stmt_iterator prev = *gsi;
241 96800 : gsi_prev (&prev);
242 96800 : unsigned uid = gimple_uid (stmt);
243 96800 : basic_block bb = gimple_bb (stmt);
244 96800 : bool ret = gsi_remove (gsi, true);
245 96800 : if (!gsi_end_p (prev))
246 96587 : gsi_next (&prev);
247 : else
248 426 : prev = gsi_start_bb (bb);
249 96800 : gimple *end_stmt = gsi_stmt (*gsi);
250 100785 : while ((stmt = gsi_stmt (prev)) != end_stmt)
251 : {
252 3985 : gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
253 3985 : gimple_set_uid (stmt, uid);
254 3985 : gsi_next (&prev);
255 : }
256 : return ret;
257 : }
258 :
259 : /* Bias amount for loop-carried phis. We want this to be larger than
260 : the depth of any reassociation tree we can see, but not larger than
261 : the rank difference between two blocks. */
262 : #define PHI_LOOP_BIAS (1 << 15)
263 :
264 : /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
265 : operands to the STMT's left-hand side. The goal is to preserve bias in code
266 : like this:
267 :
268 : x_1 = phi(x_0, x_2)
269 : a = x_1 | 1
270 : b = a ^ 2
271 : .MEM = b
272 : c = b + d
273 : x_2 = c + e
274 :
275 : That is, we need to preserve bias along single-use chains originating from
276 : loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
277 : uses, because only they participate in rank propagation. */
278 : static bool
279 6770353 : propagate_bias_p (gimple *stmt)
280 : {
281 6770353 : use_operand_p use;
282 6770353 : imm_use_iterator use_iter;
283 6770353 : gimple *single_use_stmt = NULL;
284 :
285 6770353 : if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
286 : return false;
287 :
288 17291748 : FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
289 : {
290 7711250 : gimple *current_use_stmt = USE_STMT (use);
291 :
292 7711250 : if (is_gimple_assign (current_use_stmt)
293 7711250 : && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
294 : {
295 5878122 : if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
296 715002 : return false;
297 : single_use_stmt = current_use_stmt;
298 : }
299 715002 : }
300 :
301 4432748 : if (single_use_stmt == NULL)
302 : return false;
303 :
304 4432376 : if (gimple_bb (stmt)->loop_father
305 4432376 : != gimple_bb (single_use_stmt)->loop_father)
306 : return false;
307 :
308 : return true;
309 : }
310 :
311 : /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
312 : an innermost loop, and the phi has only a single use which is inside
313 : the loop, then the rank is the block rank of the loop latch plus an
314 : extra bias for the loop-carried dependence. This causes expressions
315 : calculated into an accumulator variable to be independent for each
316 : iteration of the loop. If STMT is some other phi, the rank is the
317 : block rank of its containing block. */
318 : static int64_t
319 1420677 : phi_rank (gimple *stmt)
320 : {
321 1420677 : basic_block bb = gimple_bb (stmt);
322 1420677 : class loop *father = bb->loop_father;
323 1420677 : tree res;
324 1420677 : unsigned i;
325 1420677 : use_operand_p use;
326 1420677 : gimple *use_stmt;
327 :
328 1420677 : if (!reassoc_bias_loop_carried_phi_ranks_p)
329 546059 : return bb_rank[bb->index];
330 :
331 : /* We only care about real loops (those with a latch). */
332 874618 : if (!father->latch)
333 1 : return bb_rank[bb->index];
334 :
335 : /* Interesting phis must be in headers of innermost loops. */
336 874617 : if (bb != father->header
337 685250 : || father->inner)
338 342295 : return bb_rank[bb->index];
339 :
340 : /* Ignore virtual SSA_NAMEs. */
341 532322 : res = gimple_phi_result (stmt);
342 1064644 : if (virtual_operand_p (res))
343 0 : return bb_rank[bb->index];
344 :
345 : /* The phi definition must have a single use, and that use must be
346 : within the loop. Otherwise this isn't an accumulator pattern. */
347 532322 : if (!single_imm_use (res, &use, &use_stmt)
348 532322 : || gimple_bb (use_stmt)->loop_father != father)
349 463028 : return bb_rank[bb->index];
350 :
351 : /* Look for phi arguments from within the loop. If found, bias this phi. */
352 78940 : for (i = 0; i < gimple_phi_num_args (stmt); i++)
353 : {
354 78732 : tree arg = gimple_phi_arg_def (stmt, i);
355 78732 : if (TREE_CODE (arg) == SSA_NAME
356 78732 : && !SSA_NAME_IS_DEFAULT_DEF (arg))
357 : {
358 73858 : gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
359 73858 : if (gimple_bb (def_stmt)->loop_father == father)
360 69086 : return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
361 : }
362 : }
363 :
364 : /* Must be an uninteresting phi. */
365 208 : return bb_rank[bb->index];
366 : }
367 :
368 : /* Return the maximum of RANK and the rank that should be propagated
369 : from expression OP. For most operands, this is just the rank of OP.
370 : For loop-carried phis, the value is zero to avoid undoing the bias
371 : in favor of the phi. */
372 : static int64_t
373 7414579 : propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
374 : {
375 7414579 : int64_t op_rank;
376 :
377 7414579 : op_rank = get_rank (op);
378 :
379 : /* Check whether op is biased after the get_rank () call, since it might have
380 : updated biased_names. */
381 7414579 : if (TREE_CODE (op) == SSA_NAME
382 7414579 : && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
383 : {
384 45950 : if (maybe_biased_p == NULL)
385 : return rank;
386 32149 : *maybe_biased_p = true;
387 : }
388 :
389 7400778 : return MAX (rank, op_rank);
390 : }
391 :
392 : /* Look up the operand rank structure for expression E. */
393 :
394 : static inline int64_t
395 13597393 : find_operand_rank (tree e)
396 : {
397 13597393 : int64_t *slot = operand_rank->get (e);
398 13597393 : return slot ? *slot : -1;
399 : }
400 :
401 : /* Insert {E,RANK} into the operand rank hashtable. */
402 :
403 : static inline void
404 14594735 : insert_operand_rank (tree e, int64_t rank)
405 : {
406 14594735 : gcc_assert (rank > 0);
407 14594735 : bool existed = operand_rank->put (e, rank);
408 14594735 : gcc_assert (!existed);
409 14594735 : }
410 :
411 : /* Given an expression E, return the rank of the expression. */
412 :
413 : static int64_t
414 16911825 : get_rank (tree e)
415 : {
416 : /* SSA_NAME's have the rank of the expression they are the result
417 : of.
418 : For globals and uninitialized values, the rank is 0.
419 : For function arguments, use the pre-setup rank.
420 : For PHI nodes, stores, asm statements, etc, we use the rank of
421 : the BB.
422 : For simple operations, the rank is the maximum rank of any of
423 : its operands, or the bb_rank, whichever is less.
424 : I make no claims that this is optimal, however, it gives good
425 : results. */
426 :
427 : /* We make an exception to the normal ranking system to break
428 : dependences of accumulator variables in loops. Suppose we
429 : have a simple one-block loop containing:
430 :
431 : x_1 = phi(x_0, x_2)
432 : b = a + x_1
433 : c = b + d
434 : x_2 = c + e
435 :
436 : As shown, each iteration of the calculation into x is fully
437 : dependent upon the iteration before it. We would prefer to
438 : see this in the form:
439 :
440 : x_1 = phi(x_0, x_2)
441 : b = a + d
442 : c = b + e
443 : x_2 = c + x_1
444 :
445 : If the loop is unrolled, the calculations of b and c from
446 : different iterations can be interleaved.
447 :
448 : To obtain this result during reassociation, we bias the rank
449 : of the phi definition x_1 upward, when it is recognized as an
450 : accumulator pattern. The artificial rank causes it to be
451 : added last, providing the desired independence. */
452 :
453 16911825 : if (TREE_CODE (e) == SSA_NAME)
454 : {
455 13597393 : ssa_op_iter iter;
456 13597393 : gimple *stmt;
457 13597393 : int64_t rank;
458 13597393 : tree op;
459 :
460 : /* If we already have a rank for this expression, use that. */
461 13597393 : rank = find_operand_rank (e);
462 13597393 : if (rank != -1)
463 : return rank;
464 :
465 8451724 : stmt = SSA_NAME_DEF_STMT (e);
466 8451724 : if (gimple_code (stmt) == GIMPLE_PHI)
467 : {
468 1420677 : rank = phi_rank (stmt);
469 1420677 : if (rank != bb_rank[gimple_bb (stmt)->index])
470 69086 : bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
471 : }
472 :
473 7031047 : else if (!is_gimple_assign (stmt))
474 260694 : rank = bb_rank[gimple_bb (stmt)->index];
475 :
476 : else
477 : {
478 6770353 : bool biased_p = false;
479 6770353 : bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
480 :
481 : /* Otherwise, find the maximum rank for the operands. As an
482 : exception, remove the bias from loop-carried phis when propagating
483 : the rank so that dependent operations are not also biased. */
484 : /* Simply walk over all SSA uses - this takes advantage of the
485 : fact that non-SSA operands are is_gimple_min_invariant and
486 : thus have rank 0. */
487 6770353 : rank = 0;
488 14184932 : FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
489 7414579 : rank = propagate_rank (rank, op, maybe_biased_p);
490 :
491 6770353 : rank += 1;
492 6770353 : if (biased_p)
493 30849 : bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
494 : }
495 :
496 8451724 : if (dump_file && (dump_flags & TDF_DETAILS))
497 : {
498 205 : fprintf (dump_file, "Rank for ");
499 205 : print_generic_expr (dump_file, e);
500 205 : fprintf (dump_file, " is %" PRId64 "\n", rank);
501 : }
502 :
503 : /* Note the rank in the hashtable so we don't recompute it. */
504 8451724 : insert_operand_rank (e, rank);
505 8451724 : return rank;
506 : }
507 :
508 : /* Constants, globals, etc., are rank 0 */
509 : return 0;
510 : }
511 :
512 :
513 : /* We want integer ones to end up last no matter what, since they are
514 : the ones we can do the most with. */
515 : #define INTEGER_CONST_TYPE 1 << 4
516 : #define FLOAT_ONE_CONST_TYPE 1 << 3
517 : #define FLOAT_CONST_TYPE 1 << 2
518 : #define OTHER_CONST_TYPE 1 << 1
519 :
520 : /* Classify an invariant tree into integer, float, or other, so that
521 : we can sort them to be near other constants of the same type. */
522 : static inline int
523 319472 : constant_type (tree t)
524 : {
525 319472 : if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
526 : return INTEGER_CONST_TYPE;
527 8962 : else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
528 : {
529 : /* Sort -1.0 and 1.0 constants last, while in some cases
530 : const_binop can't optimize some inexact operations, multiplication
531 : by -1.0 or 1.0 can be always merged with others. */
532 6588 : if (real_onep (t) || real_minus_onep (t))
533 816 : return FLOAT_ONE_CONST_TYPE;
534 : return FLOAT_CONST_TYPE;
535 : }
536 : else
537 : return OTHER_CONST_TYPE;
538 : }
539 :
540 : /* qsort comparison function to sort operand entries PA and PB by rank
541 : so that the sorted array is ordered by rank in decreasing order. */
542 : static int
543 23255109 : sort_by_operand_rank (const void *pa, const void *pb)
544 : {
545 23255109 : const operand_entry *oea = *(const operand_entry *const *)pa;
546 23255109 : const operand_entry *oeb = *(const operand_entry *const *)pb;
547 :
548 23255109 : if (oeb->rank != oea->rank)
549 34468934 : return oeb->rank > oea->rank ? 1 : -1;
550 :
551 : /* It's nicer for optimize_expression if constants that are likely
552 : to fold when added/multiplied/whatever are put next to each
553 : other. Since all constants have rank 0, order them by type. */
554 2792964 : if (oea->rank == 0)
555 : {
556 159678 : if (constant_type (oeb->op) != constant_type (oea->op))
557 58 : return constant_type (oea->op) - constant_type (oeb->op);
558 : else
559 : /* To make sorting result stable, we use unique IDs to determine
560 : order. */
561 256415 : return oeb->id > oea->id ? 1 : -1;
562 : }
563 :
564 2633286 : if (TREE_CODE (oea->op) != SSA_NAME)
565 : {
566 0 : if (TREE_CODE (oeb->op) != SSA_NAME)
567 0 : return oeb->id > oea->id ? 1 : -1;
568 : else
569 : return 1;
570 : }
571 2633286 : else if (TREE_CODE (oeb->op) != SSA_NAME)
572 : return -1;
573 :
574 : /* Lastly, make sure the versions that are the same go next to each
575 : other. */
576 2633286 : if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
577 : {
578 : /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
579 : versions of removed SSA_NAMEs, so if possible, prefer to sort
580 : based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
581 : See PR60418. */
582 2578365 : gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
583 2578365 : gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
584 2578365 : basic_block bba = gimple_bb (stmta);
585 2578365 : basic_block bbb = gimple_bb (stmtb);
586 2578365 : if (bbb != bba)
587 : {
588 : /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
589 : but the other might not. */
590 179697 : if (!bba)
591 : return 1;
592 175412 : if (!bbb)
593 : return -1;
594 : /* If neither is, compare bb_rank. */
595 169488 : if (bb_rank[bbb->index] != bb_rank[bba->index])
596 169488 : return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
597 : }
598 :
599 2398668 : bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
600 2398668 : bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
601 2398668 : if (da != db)
602 3679102 : return da ? 1 : -1;
603 :
604 53569 : return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
605 : }
606 :
607 54921 : return oeb->id > oea->id ? 1 : -1;
608 : }
609 :
610 : /* Add an operand entry to *OPS for the tree operand OP. */
611 :
612 : static void
613 9496793 : add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
614 : {
615 9496793 : operand_entry *oe = operand_entry_pool.allocate ();
616 :
617 9496793 : oe->op = op;
618 9496793 : oe->rank = get_rank (op);
619 9496793 : oe->id = next_operand_entry_id++;
620 9496793 : oe->count = 1;
621 9496793 : oe->stmt_to_insert = stmt_to_insert;
622 9496793 : ops->safe_push (oe);
623 9496793 : }
624 :
625 : /* Add an operand entry to *OPS for the tree operand OP with repeat
626 : count REPEAT. */
627 :
628 : static void
629 18 : add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
630 : HOST_WIDE_INT repeat)
631 : {
632 18 : operand_entry *oe = operand_entry_pool.allocate ();
633 :
634 18 : oe->op = op;
635 18 : oe->rank = get_rank (op);
636 18 : oe->id = next_operand_entry_id++;
637 18 : oe->count = repeat;
638 18 : oe->stmt_to_insert = NULL;
639 18 : ops->safe_push (oe);
640 :
641 18 : reassociate_stats.pows_encountered++;
642 18 : }
643 :
644 : /* Returns true if we can associate the SSA def OP. */
645 :
646 : static bool
647 31273194 : can_reassociate_op_p (tree op)
648 : {
649 31273194 : if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
650 : return false;
651 : /* Uninitialized variables can't participate in reassociation. */
652 31272368 : if (TREE_CODE (op) == SSA_NAME && ssa_name_maybe_undef_p (op))
653 : return false;
654 : /* Make sure asm goto outputs do not participate in reassociation since
655 : we have no way to find an insertion place after asm goto. */
656 31267550 : if (TREE_CODE (op) == SSA_NAME
657 23098001 : && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
658 31289407 : && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
659 90 : return false;
660 : return true;
661 : }
662 :
663 : /* Returns true if we can reassociate operations of TYPE.
664 : That is for integral or non-saturating fixed-point types, and for
665 : floating point type when associative-math is enabled. */
666 :
667 : static bool
668 56928784 : can_reassociate_type_p (tree type)
669 : {
670 56928784 : if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
671 35199188 : || NON_SAT_FIXED_POINT_TYPE_P (type)
672 92127972 : || (flag_associative_math && FLOAT_TYPE_P (type)))
673 22114127 : return true;
674 : return false;
675 : }
676 :
677 : /* Return true if STMT is reassociable operation containing a binary
678 : operation with tree code CODE, and is inside LOOP. */
679 :
680 : static bool
681 7644520 : is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
682 : {
683 7644520 : basic_block bb = gimple_bb (stmt);
684 :
685 7644520 : if (gimple_bb (stmt) == NULL)
686 : return false;
687 :
688 7463688 : if (!flow_bb_inside_loop_p (loop, bb))
689 : return false;
690 :
691 7253973 : if (is_gimple_assign (stmt)
692 5776225 : && gimple_assign_rhs_code (stmt) == code
693 8074231 : && has_single_use (gimple_assign_lhs (stmt)))
694 : {
695 607324 : tree rhs1 = gimple_assign_rhs1 (stmt);
696 607324 : tree rhs2 = gimple_assign_rhs2 (stmt);
697 607324 : if (!can_reassociate_op_p (rhs1)
698 607324 : || (rhs2 && !can_reassociate_op_p (rhs2)))
699 : return false;
700 : return true;
701 : }
702 :
703 : return false;
704 : }
705 :
706 :
707 : /* Return true if STMT is a nop-conversion. */
708 :
709 : static bool
710 7566306 : gimple_nop_conversion_p (gimple *stmt)
711 : {
712 7566306 : if (gassign *ass = dyn_cast <gassign *> (stmt))
713 : {
714 8936447 : if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
715 6671929 : && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
716 1469137 : TREE_TYPE (gimple_assign_rhs1 (ass))))
717 : return true;
718 : }
719 : return false;
720 : }
721 :
722 : /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
723 : operand of the negate operation. Otherwise, return NULL. */
724 :
725 : static tree
726 7474432 : get_unary_op (tree name, enum tree_code opcode)
727 : {
728 7474432 : gimple *stmt = SSA_NAME_DEF_STMT (name);
729 :
730 : /* Look through nop conversions (sign changes). */
731 7474432 : if (gimple_nop_conversion_p (stmt)
732 7474432 : && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
733 887528 : stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
734 :
735 7474432 : if (!is_gimple_assign (stmt))
736 : return NULL_TREE;
737 :
738 4754124 : if (gimple_assign_rhs_code (stmt) == opcode)
739 126569 : return gimple_assign_rhs1 (stmt);
740 : return NULL_TREE;
741 : }
742 :
743 : /* Return true if OP1 and OP2 have the same value if casted to either type. */
744 :
745 : static bool
746 47039 : ops_equal_values_p (tree op1, tree op2)
747 : {
748 47039 : if (op1 == op2)
749 : return true;
750 :
751 46857 : tree orig_op1 = op1;
752 46857 : if (TREE_CODE (op1) == SSA_NAME)
753 : {
754 46857 : gimple *stmt = SSA_NAME_DEF_STMT (op1);
755 46857 : if (gimple_nop_conversion_p (stmt))
756 : {
757 18509 : op1 = gimple_assign_rhs1 (stmt);
758 18509 : if (op1 == op2)
759 : return true;
760 : }
761 : }
762 :
763 45017 : if (TREE_CODE (op2) == SSA_NAME)
764 : {
765 45017 : gimple *stmt = SSA_NAME_DEF_STMT (op2);
766 45017 : if (gimple_nop_conversion_p (stmt))
767 : {
768 17293 : op2 = gimple_assign_rhs1 (stmt);
769 17293 : if (op1 == op2
770 17293 : || orig_op1 == op2)
771 : return true;
772 : }
773 : }
774 :
775 : return false;
776 : }
777 :
778 :
779 : /* If CURR and LAST are a pair of ops that OPCODE allows us to
780 : eliminate through equivalences, do so, remove them from OPS, and
781 : return true. Otherwise, return false. */
782 :
783 : static bool
784 9390136 : eliminate_duplicate_pair (enum tree_code opcode,
785 : vec<operand_entry *> *ops,
786 : bool *all_done,
787 : unsigned int i,
788 : operand_entry *curr,
789 : operand_entry *last)
790 : {
791 :
792 : /* If we have two of the same op, and the opcode is & |, min, or max,
793 : we can eliminate one of them.
794 : If we have two of the same op, and the opcode is ^, we can
795 : eliminate both of them. */
796 :
797 9390136 : if (last && last->op == curr->op)
798 : {
799 5429 : switch (opcode)
800 : {
801 30 : case MAX_EXPR:
802 30 : case MIN_EXPR:
803 30 : case BIT_IOR_EXPR:
804 30 : case BIT_AND_EXPR:
805 30 : if (dump_file && (dump_flags & TDF_DETAILS))
806 : {
807 1 : fprintf (dump_file, "Equivalence: ");
808 1 : print_generic_expr (dump_file, curr->op);
809 1 : fprintf (dump_file, " [&|minmax] ");
810 1 : print_generic_expr (dump_file, last->op);
811 1 : fprintf (dump_file, " -> ");
812 1 : print_generic_stmt (dump_file, last->op);
813 : }
814 :
815 30 : ops->ordered_remove (i);
816 30 : reassociate_stats.ops_eliminated ++;
817 :
818 30 : return true;
819 :
820 121 : case BIT_XOR_EXPR:
821 121 : if (dump_file && (dump_flags & TDF_DETAILS))
822 : {
823 0 : fprintf (dump_file, "Equivalence: ");
824 0 : print_generic_expr (dump_file, curr->op);
825 0 : fprintf (dump_file, " ^ ");
826 0 : print_generic_expr (dump_file, last->op);
827 0 : fprintf (dump_file, " -> nothing\n");
828 : }
829 :
830 121 : reassociate_stats.ops_eliminated += 2;
831 :
832 121 : if (ops->length () == 2)
833 : {
834 1 : ops->truncate (0);
835 1 : add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
836 1 : *all_done = true;
837 : }
838 : else
839 : {
840 120 : ops->ordered_remove (i-1);
841 120 : ops->ordered_remove (i-1);
842 : }
843 :
844 121 : return true;
845 :
846 : default:
847 : break;
848 : }
849 : }
850 : return false;
851 : }
852 :
853 : static vec<tree> plus_negates;
854 :
855 : /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
856 : expression, look in OPS for a corresponding positive operation to cancel
857 : it out. If we find one, remove the other from OPS, replace
858 : OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
859 : return false. */
860 :
861 : static bool
862 9389985 : eliminate_plus_minus_pair (enum tree_code opcode,
863 : vec<operand_entry *> *ops,
864 : unsigned int currindex,
865 : operand_entry *curr)
866 : {
867 9389985 : tree negateop;
868 9389985 : tree notop;
869 9389985 : unsigned int i;
870 9389985 : operand_entry *oe;
871 :
872 9389985 : if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
873 : return false;
874 :
875 2995994 : negateop = get_unary_op (curr->op, NEGATE_EXPR);
876 2995994 : notop = get_unary_op (curr->op, BIT_NOT_EXPR);
877 2995994 : if (negateop == NULL_TREE && notop == NULL_TREE)
878 : return false;
879 :
880 : /* Any non-negated version will have a rank that is one less than
881 : the current rank. So once we hit those ranks, if we don't find
882 : one, we can stop. */
883 :
884 135487 : for (i = currindex + 1;
885 199253 : ops->iterate (i, &oe)
886 246292 : && oe->rank >= curr->rank - 1 ;
887 : i++)
888 : {
889 47039 : if (negateop
890 47039 : && ops_equal_values_p (oe->op, negateop))
891 : {
892 1618 : if (dump_file && (dump_flags & TDF_DETAILS))
893 : {
894 0 : fprintf (dump_file, "Equivalence: ");
895 0 : print_generic_expr (dump_file, negateop);
896 0 : fprintf (dump_file, " + -");
897 0 : print_generic_expr (dump_file, oe->op);
898 0 : fprintf (dump_file, " -> 0\n");
899 : }
900 :
901 1618 : ops->ordered_remove (i);
902 1618 : add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
903 1618 : ops->ordered_remove (currindex);
904 1618 : reassociate_stats.ops_eliminated ++;
905 :
906 1618 : return true;
907 : }
908 45421 : else if (notop
909 45421 : && ops_equal_values_p (oe->op, notop))
910 : {
911 1832 : tree op_type = TREE_TYPE (oe->op);
912 :
913 1832 : if (dump_file && (dump_flags & TDF_DETAILS))
914 : {
915 0 : fprintf (dump_file, "Equivalence: ");
916 0 : print_generic_expr (dump_file, notop);
917 0 : fprintf (dump_file, " + ~");
918 0 : print_generic_expr (dump_file, oe->op);
919 0 : fprintf (dump_file, " -> -1\n");
920 : }
921 :
922 1832 : ops->ordered_remove (i);
923 1832 : add_to_ops_vec (ops, build_all_ones_cst (op_type));
924 1832 : ops->ordered_remove (currindex);
925 1832 : reassociate_stats.ops_eliminated ++;
926 :
927 1832 : return true;
928 : }
929 : }
930 :
931 : /* If CURR->OP is a negate expr without nop conversion in a plus expr:
932 : save it for later inspection in repropagate_negates(). */
933 88448 : if (negateop != NULL_TREE
934 88448 : && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
935 87938 : plus_negates.safe_push (curr->op);
936 :
937 : return false;
938 : }
939 :
940 : /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
941 : bitwise not expression, look in OPS for a corresponding operand to
942 : cancel it out. If we find one, remove the other from OPS, replace
943 : OPS[CURRINDEX] with 0, and return true. Otherwise, return
944 : false. */
945 :
946 : static bool
947 9390137 : eliminate_not_pairs (enum tree_code opcode,
948 : vec<operand_entry *> *ops,
949 : unsigned int currindex,
950 : operand_entry *curr)
951 : {
952 9390137 : tree notop;
953 9390137 : unsigned int i;
954 9390137 : operand_entry *oe;
955 :
956 9390137 : if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
957 1965906 : || TREE_CODE (curr->op) != SSA_NAME)
958 : return false;
959 :
960 1482444 : notop = get_unary_op (curr->op, BIT_NOT_EXPR);
961 1482444 : if (notop == NULL_TREE)
962 : return false;
963 :
964 : /* Any non-not version will have a rank that is one less than
965 : the current rank. So once we hit those ranks, if we don't find
966 : one, we can stop. */
967 :
968 41479 : for (i = currindex + 1;
969 9417443 : ops->iterate (i, &oe)
970 68785 : && oe->rank >= curr->rank - 1;
971 : i++)
972 : {
973 6809 : if (oe->op == notop)
974 : {
975 1 : if (dump_file && (dump_flags & TDF_DETAILS))
976 : {
977 0 : fprintf (dump_file, "Equivalence: ");
978 0 : print_generic_expr (dump_file, notop);
979 0 : if (opcode == BIT_AND_EXPR)
980 0 : fprintf (dump_file, " & ~");
981 0 : else if (opcode == BIT_IOR_EXPR)
982 0 : fprintf (dump_file, " | ~");
983 0 : print_generic_expr (dump_file, oe->op);
984 0 : if (opcode == BIT_AND_EXPR)
985 0 : fprintf (dump_file, " -> 0\n");
986 0 : else if (opcode == BIT_IOR_EXPR)
987 0 : fprintf (dump_file, " -> -1\n");
988 : }
989 :
990 1 : if (opcode == BIT_AND_EXPR)
991 1 : oe->op = build_zero_cst (TREE_TYPE (oe->op));
992 0 : else if (opcode == BIT_IOR_EXPR)
993 0 : oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
994 :
995 1 : reassociate_stats.ops_eliminated += ops->length () - 1;
996 1 : ops->truncate (0);
997 1 : ops->quick_push (oe);
998 1 : return true;
999 : }
1000 : }
1001 :
1002 : return false;
1003 : }
1004 :
1005 : /* Use constant value that may be present in OPS to try to eliminate
1006 : operands. Note that this function is only really used when we've
1007 : eliminated ops for other reasons, or merged constants. Across
1008 : single statements, fold already does all of this, plus more. There
1009 : is little point in duplicating logic, so I've only included the
1010 : identities that I could ever construct testcases to trigger. */
1011 :
1012 : static void
1013 4561965 : eliminate_using_constants (enum tree_code opcode,
1014 : vec<operand_entry *> *ops)
1015 : {
1016 4561965 : operand_entry *oelast = ops->last ();
1017 4561965 : tree type = TREE_TYPE (oelast->op);
1018 :
1019 4561965 : if (oelast->rank == 0
1020 4561965 : && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1021 : {
1022 3274494 : switch (opcode)
1023 : {
1024 414197 : case BIT_AND_EXPR:
1025 414197 : if (integer_zerop (oelast->op))
1026 : {
1027 0 : if (ops->length () != 1)
1028 : {
1029 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1030 0 : fprintf (dump_file, "Found & 0, removing all other ops\n");
1031 :
1032 0 : reassociate_stats.ops_eliminated += ops->length () - 1;
1033 :
1034 0 : ops->truncate (0);
1035 0 : ops->quick_push (oelast);
1036 1823 : return;
1037 : }
1038 : }
1039 414197 : else if (integer_all_onesp (oelast->op))
1040 : {
1041 4 : if (ops->length () != 1)
1042 : {
1043 4 : if (dump_file && (dump_flags & TDF_DETAILS))
1044 0 : fprintf (dump_file, "Found & -1, removing\n");
1045 4 : ops->pop ();
1046 4 : reassociate_stats.ops_eliminated++;
1047 : }
1048 : }
1049 : break;
1050 68756 : case BIT_IOR_EXPR:
1051 68756 : if (integer_all_onesp (oelast->op))
1052 : {
1053 0 : if (ops->length () != 1)
1054 : {
1055 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1056 0 : fprintf (dump_file, "Found | -1, removing all other ops\n");
1057 :
1058 0 : reassociate_stats.ops_eliminated += ops->length () - 1;
1059 :
1060 0 : ops->truncate (0);
1061 0 : ops->quick_push (oelast);
1062 0 : return;
1063 : }
1064 : }
1065 68756 : else if (integer_zerop (oelast->op))
1066 : {
1067 6 : if (ops->length () != 1)
1068 : {
1069 6 : if (dump_file && (dump_flags & TDF_DETAILS))
1070 0 : fprintf (dump_file, "Found | 0, removing\n");
1071 6 : ops->pop ();
1072 6 : reassociate_stats.ops_eliminated++;
1073 : }
1074 : }
1075 : break;
1076 929371 : case MULT_EXPR:
1077 929371 : if (integer_zerop (oelast->op)
1078 929371 : || (FLOAT_TYPE_P (type)
1079 1398 : && !HONOR_NANS (type)
1080 1304 : && !HONOR_SIGNED_ZEROS (type)
1081 1304 : && real_zerop (oelast->op)))
1082 : {
1083 0 : if (ops->length () != 1)
1084 : {
1085 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1086 0 : fprintf (dump_file, "Found * 0, removing all other ops\n");
1087 :
1088 0 : reassociate_stats.ops_eliminated += ops->length () - 1;
1089 0 : ops->truncate (0);
1090 0 : ops->quick_push (oelast);
1091 0 : return;
1092 : }
1093 : }
1094 929371 : else if (integer_onep (oelast->op)
1095 929371 : || (FLOAT_TYPE_P (type)
1096 1398 : && !HONOR_SNANS (type)
1097 1398 : && real_onep (oelast->op)))
1098 : {
1099 4 : if (ops->length () != 1)
1100 : {
1101 4 : if (dump_file && (dump_flags & TDF_DETAILS))
1102 0 : fprintf (dump_file, "Found * 1, removing\n");
1103 4 : ops->pop ();
1104 4 : reassociate_stats.ops_eliminated++;
1105 4 : return;
1106 : }
1107 : }
1108 : break;
1109 1726256 : case BIT_XOR_EXPR:
1110 1726256 : case PLUS_EXPR:
1111 1726256 : case MINUS_EXPR:
1112 1726256 : if (integer_zerop (oelast->op)
1113 1726256 : || (FLOAT_TYPE_P (type)
1114 730 : && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1115 730 : && fold_real_zero_addition_p (type, 0, oelast->op,
1116 : opcode == MINUS_EXPR)))
1117 : {
1118 1819 : if (ops->length () != 1)
1119 : {
1120 1819 : if (dump_file && (dump_flags & TDF_DETAILS))
1121 0 : fprintf (dump_file, "Found [|^+] 0, removing\n");
1122 1819 : ops->pop ();
1123 1819 : reassociate_stats.ops_eliminated++;
1124 1819 : return;
1125 : }
1126 : }
1127 : break;
1128 : default:
1129 : break;
1130 : }
1131 : }
1132 : }
1133 :
1134 :
1135 : static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1136 : bool, bool);
1137 :
1138 : /* Structure for tracking and counting operands. */
1139 : struct oecount {
1140 : unsigned int cnt;
1141 : unsigned int id;
1142 : enum tree_code oecode;
1143 : tree op;
1144 : };
1145 :
1146 :
1147 : /* The heap for the oecount hashtable and the sorted list of operands. */
1148 : static vec<oecount> cvec;
1149 :
1150 :
1151 : /* Oecount hashtable helpers. */
1152 :
1153 : struct oecount_hasher : int_hash <int, 0, 1>
1154 : {
1155 : static inline hashval_t hash (int);
1156 : static inline bool equal (int, int);
1157 : };
1158 :
1159 : /* Hash function for oecount. */
1160 :
1161 : inline hashval_t
1162 155194 : oecount_hasher::hash (int p)
1163 : {
1164 155194 : const oecount *c = &cvec[p - 42];
1165 155194 : return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1166 : }
1167 :
1168 : /* Comparison function for oecount. */
1169 :
1170 : inline bool
1171 84900 : oecount_hasher::equal (int p1, int p2)
1172 : {
1173 84900 : const oecount *c1 = &cvec[p1 - 42];
1174 84900 : const oecount *c2 = &cvec[p2 - 42];
1175 84900 : return c1->oecode == c2->oecode && c1->op == c2->op;
1176 : }
1177 :
1178 : /* Comparison function for qsort sorting oecount elements by count. */
1179 :
1180 : static int
1181 593583 : oecount_cmp (const void *p1, const void *p2)
1182 : {
1183 593583 : const oecount *c1 = (const oecount *)p1;
1184 593583 : const oecount *c2 = (const oecount *)p2;
1185 593583 : if (c1->cnt != c2->cnt)
1186 13532 : return c1->cnt > c2->cnt ? 1 : -1;
1187 : else
1188 : /* If counts are identical, use unique IDs to stabilize qsort. */
1189 856545 : return c1->id > c2->id ? 1 : -1;
1190 : }
1191 :
1192 : /* Return TRUE iff STMT represents a builtin call that raises OP
1193 : to some exponent. */
1194 :
1195 : static bool
1196 1153 : stmt_is_power_of_op (gimple *stmt, tree op)
1197 : {
1198 1153 : if (!is_gimple_call (stmt))
1199 : return false;
1200 :
1201 11 : switch (gimple_call_combined_fn (stmt))
1202 : {
1203 6 : CASE_CFN_POW:
1204 6 : CASE_CFN_POWI:
1205 6 : return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1206 :
1207 : default:
1208 : return false;
1209 : }
1210 : }
1211 :
1212 : /* Given STMT which is a __builtin_pow* call, decrement its exponent
1213 : in place and return the result. Assumes that stmt_is_power_of_op
1214 : was previously called for STMT and returned TRUE. */
1215 :
1216 : static HOST_WIDE_INT
1217 6 : decrement_power (gimple *stmt)
1218 : {
1219 6 : REAL_VALUE_TYPE c, cint;
1220 6 : HOST_WIDE_INT power;
1221 6 : tree arg1;
1222 :
1223 6 : switch (gimple_call_combined_fn (stmt))
1224 : {
1225 0 : CASE_CFN_POW:
1226 0 : arg1 = gimple_call_arg (stmt, 1);
1227 0 : c = TREE_REAL_CST (arg1);
1228 0 : power = real_to_integer (&c) - 1;
1229 0 : real_from_integer (&cint, VOIDmode, power, SIGNED);
1230 0 : gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1231 0 : return power;
1232 :
1233 6 : CASE_CFN_POWI:
1234 6 : arg1 = gimple_call_arg (stmt, 1);
1235 6 : power = TREE_INT_CST_LOW (arg1) - 1;
1236 6 : gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1237 6 : return power;
1238 :
1239 0 : default:
1240 0 : gcc_unreachable ();
1241 : }
1242 : }
1243 :
1244 : /* Replace SSA defined by STMT and replace all its uses with new
1245 : SSA. Also return the new SSA. */
1246 :
1247 : static tree
1248 326 : make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1249 : {
1250 326 : gimple *use_stmt;
1251 326 : use_operand_p use;
1252 326 : imm_use_iterator iter;
1253 326 : tree new_lhs, new_debug_lhs = NULL_TREE;
1254 326 : tree lhs = gimple_get_lhs (stmt);
1255 :
1256 326 : new_lhs = make_ssa_name (TREE_TYPE (lhs));
1257 326 : gimple_set_lhs (stmt, new_lhs);
1258 :
1259 : /* Also need to update GIMPLE_DEBUGs. */
1260 1035 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1261 : {
1262 383 : tree repl = new_lhs;
1263 383 : if (is_gimple_debug (use_stmt))
1264 : {
1265 57 : if (new_debug_lhs == NULL_TREE)
1266 : {
1267 21 : new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1268 21 : gdebug *def_temp
1269 21 : = gimple_build_debug_bind (new_debug_lhs,
1270 21 : build2 (opcode, TREE_TYPE (lhs),
1271 : new_lhs, op),
1272 : stmt);
1273 21 : gimple_set_uid (def_temp, gimple_uid (stmt));
1274 21 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1275 21 : gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1276 : }
1277 : repl = new_debug_lhs;
1278 : }
1279 1149 : FOR_EACH_IMM_USE_ON_STMT (use, iter)
1280 383 : SET_USE (use, repl);
1281 383 : update_stmt (use_stmt);
1282 326 : }
1283 326 : return new_lhs;
1284 : }
1285 :
1286 : /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1287 : uses with new SSAs. Also do this for the stmt that defines DEF
1288 : if *DEF is not OP. */
1289 :
1290 : static void
1291 259 : make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1292 : vec<gimple *> &stmts_to_fix)
1293 : {
1294 259 : unsigned i;
1295 259 : gimple *stmt;
1296 :
1297 259 : if (*def != op
1298 259 : && TREE_CODE (*def) == SSA_NAME
1299 259 : && (stmt = SSA_NAME_DEF_STMT (*def))
1300 518 : && gimple_code (stmt) != GIMPLE_NOP)
1301 259 : *def = make_new_ssa_for_def (stmt, opcode, op);
1302 :
1303 326 : FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1304 67 : make_new_ssa_for_def (stmt, opcode, op);
1305 259 : }
1306 :
1307 : /* Find the single immediate use of STMT's LHS, and replace it
1308 : with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1309 : replace *DEF with OP as well. */
1310 :
1311 : static void
1312 785 : propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1313 : {
1314 785 : tree lhs;
1315 785 : gimple *use_stmt;
1316 785 : use_operand_p use;
1317 785 : gimple_stmt_iterator gsi;
1318 :
1319 785 : if (is_gimple_call (stmt))
1320 1 : lhs = gimple_call_lhs (stmt);
1321 : else
1322 784 : lhs = gimple_assign_lhs (stmt);
1323 :
1324 785 : gcc_assert (has_single_use (lhs));
1325 785 : single_imm_use (lhs, &use, &use_stmt);
1326 785 : if (lhs == *def)
1327 535 : *def = op;
1328 785 : SET_USE (use, op);
1329 785 : if (TREE_CODE (op) != SSA_NAME)
1330 39 : update_stmt (use_stmt);
1331 785 : gsi = gsi_for_stmt (stmt);
1332 785 : unlink_stmt_vdef (stmt);
1333 785 : reassoc_remove_stmt (&gsi);
1334 785 : release_defs (stmt);
1335 785 : }
1336 :
1337 : /* Walks the linear chain with result *DEF searching for an operation
1338 : with operand OP and code OPCODE removing that from the chain. *DEF
1339 : is updated if there is only one operand but no operation left. */
1340 :
1341 : static void
1342 794 : zero_one_operation (tree *def, enum tree_code opcode, tree op)
1343 : {
1344 794 : tree orig_def = *def;
1345 794 : gimple *stmt = SSA_NAME_DEF_STMT (*def);
1346 : /* PR72835 - Record the stmt chain that has to be updated such that
1347 : we dont use the same LHS when the values computed are different. */
1348 794 : auto_vec<gimple *, 64> stmts_to_fix;
1349 :
1350 1428 : do
1351 : {
1352 1111 : tree name;
1353 :
1354 1111 : if (opcode == MULT_EXPR)
1355 : {
1356 1109 : if (stmt_is_power_of_op (stmt, op))
1357 : {
1358 6 : if (decrement_power (stmt) == 1)
1359 : {
1360 1 : if (stmts_to_fix.length () > 0)
1361 1 : stmts_to_fix.pop ();
1362 1 : propagate_op_to_single_use (op, stmt, def);
1363 : }
1364 : break;
1365 : }
1366 1103 : else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1367 : {
1368 15 : if (gimple_assign_rhs1 (stmt) == op)
1369 : {
1370 11 : tree cst = build_minus_one_cst (TREE_TYPE (op));
1371 11 : if (stmts_to_fix.length () > 0)
1372 11 : stmts_to_fix.pop ();
1373 11 : propagate_op_to_single_use (cst, stmt, def);
1374 11 : break;
1375 : }
1376 4 : else if (integer_minus_onep (op)
1377 4 : || real_minus_onep (op))
1378 : {
1379 4 : gimple_assign_set_rhs_code
1380 4 : (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1381 4 : break;
1382 : }
1383 : }
1384 : }
1385 :
1386 1090 : name = gimple_assign_rhs1 (stmt);
1387 :
1388 : /* If this is the operation we look for and one of the operands
1389 : is ours simply propagate the other operand into the stmts
1390 : single use. */
1391 1090 : if (gimple_assign_rhs_code (stmt) == opcode
1392 1090 : && (name == op
1393 890 : || gimple_assign_rhs2 (stmt) == op))
1394 : {
1395 773 : if (name == op)
1396 200 : name = gimple_assign_rhs2 (stmt);
1397 773 : if (stmts_to_fix.length () > 0)
1398 238 : stmts_to_fix.pop ();
1399 773 : propagate_op_to_single_use (name, stmt, def);
1400 773 : break;
1401 : }
1402 :
1403 : /* We might have a multiply of two __builtin_pow* calls, and
1404 : the operand might be hiding in the rightmost one. Likewise
1405 : this can happen for a negate. */
1406 317 : if (opcode == MULT_EXPR
1407 317 : && gimple_assign_rhs_code (stmt) == opcode
1408 317 : && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1409 569 : && has_single_use (gimple_assign_rhs2 (stmt)))
1410 : {
1411 44 : gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1412 44 : if (stmt_is_power_of_op (stmt2, op))
1413 : {
1414 0 : if (decrement_power (stmt2) == 1)
1415 0 : propagate_op_to_single_use (op, stmt2, def);
1416 : else
1417 0 : stmts_to_fix.safe_push (stmt2);
1418 0 : break;
1419 : }
1420 44 : else if (is_gimple_assign (stmt2)
1421 44 : && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1422 : {
1423 0 : if (gimple_assign_rhs1 (stmt2) == op)
1424 : {
1425 0 : tree cst = build_minus_one_cst (TREE_TYPE (op));
1426 0 : propagate_op_to_single_use (cst, stmt2, def);
1427 0 : break;
1428 : }
1429 0 : else if (integer_minus_onep (op)
1430 0 : || real_minus_onep (op))
1431 : {
1432 0 : stmts_to_fix.safe_push (stmt2);
1433 0 : gimple_assign_set_rhs_code
1434 0 : (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1435 0 : break;
1436 : }
1437 : }
1438 : }
1439 :
1440 : /* Continue walking the chain. */
1441 317 : gcc_assert (name != op
1442 : && TREE_CODE (name) == SSA_NAME);
1443 317 : stmt = SSA_NAME_DEF_STMT (name);
1444 317 : stmts_to_fix.safe_push (stmt);
1445 317 : }
1446 : while (1);
1447 :
1448 794 : if (stmts_to_fix.length () > 0 || *def == orig_def)
1449 259 : make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1450 794 : }
1451 :
1452 : /* Returns true if statement S1 dominates statement S2. Like
1453 : stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1454 :
1455 : static bool
1456 5479975 : reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1457 : {
1458 5479975 : basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1459 :
1460 : /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1461 : SSA_NAME. Assume it lives at the beginning of function and
1462 : thus dominates everything. */
1463 5479975 : if (!bb1 || s1 == s2)
1464 : return true;
1465 :
1466 : /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1467 5476814 : if (!bb2)
1468 : return false;
1469 :
1470 5469556 : if (bb1 == bb2)
1471 : {
1472 : /* PHIs in the same basic block are assumed to be
1473 : executed all in parallel, if only one stmt is a PHI,
1474 : it dominates the other stmt in the same basic block. */
1475 5340315 : if (gimple_code (s1) == GIMPLE_PHI)
1476 : return true;
1477 :
1478 5229763 : if (gimple_code (s2) == GIMPLE_PHI)
1479 : return false;
1480 :
1481 5180533 : gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1482 :
1483 5180533 : if (gimple_uid (s1) < gimple_uid (s2))
1484 : return true;
1485 :
1486 2805875 : if (gimple_uid (s1) > gimple_uid (s2))
1487 : return false;
1488 :
1489 35503 : gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1490 35503 : unsigned int uid = gimple_uid (s1);
1491 76428 : for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1492 : {
1493 74473 : gimple *s = gsi_stmt (gsi);
1494 74473 : if (gimple_uid (s) != uid)
1495 : break;
1496 44244 : if (s == s2)
1497 : return true;
1498 : }
1499 :
1500 : return false;
1501 : }
1502 :
1503 129241 : return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1504 : }
1505 :
1506 : /* Insert STMT after INSERT_POINT. */
1507 :
1508 : static void
1509 41889 : insert_stmt_after (gimple *stmt, gimple *insert_point)
1510 : {
1511 41889 : gimple_stmt_iterator gsi;
1512 41889 : basic_block bb;
1513 :
1514 41889 : if (gimple_code (insert_point) == GIMPLE_PHI)
1515 45 : bb = gimple_bb (insert_point);
1516 41844 : else if (!stmt_ends_bb_p (insert_point))
1517 : {
1518 41836 : gsi = gsi_for_stmt (insert_point);
1519 41836 : gimple_set_uid (stmt, gimple_uid (insert_point));
1520 41836 : gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1521 41836 : return;
1522 : }
1523 8 : else if (gimple_code (insert_point) == GIMPLE_ASM
1524 5 : && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0
1525 13 : && !single_succ_p (gimple_bb (insert_point)))
1526 : /* We have no idea where to insert - it depends on where the
1527 : uses will be placed. */
1528 0 : gcc_unreachable ();
1529 : else
1530 : /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1531 : thus if it must end a basic block, it should be a call that can
1532 : throw, or some assignment that can throw. If it throws, the LHS
1533 : of it will not be initialized though, so only valid places using
1534 : the SSA_NAME should be dominated by the fallthru edge. */
1535 8 : bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1536 53 : gsi = gsi_after_labels (bb);
1537 53 : if (gsi_end_p (gsi))
1538 : {
1539 0 : gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1540 0 : gimple_set_uid (stmt,
1541 0 : gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1542 : }
1543 : else
1544 53 : gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1545 53 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1546 : }
1547 :
1548 : /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1549 : the result. Places the statement after the definition of either
1550 : OP1 or OP2. Returns the new statement. */
1551 :
1552 : static gimple *
1553 8362 : build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1554 : {
1555 8362 : gimple *op1def = NULL, *op2def = NULL;
1556 8362 : gimple_stmt_iterator gsi;
1557 8362 : tree op;
1558 8362 : gassign *sum;
1559 :
1560 : /* Create the addition statement. */
1561 8362 : op = make_ssa_name (type);
1562 8362 : sum = gimple_build_assign (op, opcode, op1, op2);
1563 :
1564 : /* Find an insertion place and insert. */
1565 8362 : if (TREE_CODE (op1) == SSA_NAME)
1566 8361 : op1def = SSA_NAME_DEF_STMT (op1);
1567 8362 : if (TREE_CODE (op2) == SSA_NAME)
1568 8083 : op2def = SSA_NAME_DEF_STMT (op2);
1569 8361 : if ((!op1def || gimple_nop_p (op1def))
1570 8455 : && (!op2def || gimple_nop_p (op2def)))
1571 : {
1572 93 : gsi = gsi_start_nondebug_after_labels_bb
1573 93 : (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1574 93 : if (!gsi_end_p (gsi)
1575 93 : && is_gimple_call (gsi_stmt (gsi))
1576 105 : && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE))
1577 : {
1578 : /* Don't add statements before a returns_twice call at the start
1579 : of a function. */
1580 2 : split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1581 2 : gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1582 : }
1583 93 : if (gsi_end_p (gsi))
1584 : {
1585 2 : gimple_stmt_iterator gsi2
1586 2 : = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1587 2 : gimple_set_uid (sum,
1588 2 : gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1589 : }
1590 : else
1591 91 : gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1592 93 : gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1593 : }
1594 : else
1595 : {
1596 8269 : gimple *insert_point;
1597 8268 : if ((!op1def || gimple_nop_p (op1def))
1598 16537 : || (op2def && !gimple_nop_p (op2def)
1599 7970 : && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1600 : insert_point = op2def;
1601 : else
1602 : insert_point = op1def;
1603 8269 : insert_stmt_after (sum, insert_point);
1604 : }
1605 8362 : update_stmt (sum);
1606 :
1607 8362 : return sum;
1608 : }
1609 :
1610 : /* Perform un-distribution of divisions and multiplications.
1611 : A * X + B * X is transformed into (A + B) * X and A / X + B / X
1612 : to (A + B) / X for real X.
1613 :
1614 : The algorithm is organized as follows.
1615 :
1616 : - First we walk the addition chain *OPS looking for summands that
1617 : are defined by a multiplication or a real division. This results
1618 : in the candidates bitmap with relevant indices into *OPS.
1619 :
1620 : - Second we build the chains of multiplications or divisions for
1621 : these candidates, counting the number of occurrences of (operand, code)
1622 : pairs in all of the candidates chains.
1623 :
1624 : - Third we sort the (operand, code) pairs by number of occurrence and
1625 : process them starting with the pair with the most uses.
1626 :
1627 : * For each such pair we walk the candidates again to build a
1628 : second candidate bitmap noting all multiplication/division chains
1629 : that have at least one occurrence of (operand, code).
1630 :
1631 : * We build an alternate addition chain only covering these
1632 : candidates with one (operand, code) operation removed from their
1633 : multiplication/division chain.
1634 :
1635 : * The first candidate gets replaced by the alternate addition chain
1636 : multiplied/divided by the operand.
1637 :
1638 : * All candidate chains get disabled for further processing and
1639 : processing of (operand, code) pairs continues.
1640 :
1641 : The alternate addition chains built are re-processed by the main
1642 : reassociation algorithm which allows optimizing a * x * y + b * y * x
1643 : to (a + b ) * x * y in one invocation of the reassociation pass. */
1644 :
1645 : static bool
1646 4559862 : undistribute_ops_list (enum tree_code opcode,
1647 : vec<operand_entry *> *ops, class loop *loop)
1648 : {
1649 4559862 : unsigned int length = ops->length ();
1650 4559862 : operand_entry *oe1;
1651 4559862 : unsigned i, j;
1652 4559862 : unsigned nr_candidates, nr_candidates2;
1653 4559862 : sbitmap_iterator sbi0;
1654 4559862 : vec<operand_entry *> *subops;
1655 4559862 : bool changed = false;
1656 4559862 : unsigned int next_oecount_id = 0;
1657 :
1658 4559862 : if (length <= 1
1659 4559862 : || opcode != PLUS_EXPR)
1660 : return false;
1661 :
1662 : /* Build a list of candidates to process. */
1663 2256745 : auto_sbitmap candidates (length);
1664 2256745 : bitmap_clear (candidates);
1665 2256745 : nr_candidates = 0;
1666 6958442 : FOR_EACH_VEC_ELT (*ops, i, oe1)
1667 : {
1668 4701697 : enum tree_code dcode;
1669 4701697 : gimple *oe1def;
1670 :
1671 4701697 : if (TREE_CODE (oe1->op) != SSA_NAME)
1672 1713311 : continue;
1673 2988386 : oe1def = SSA_NAME_DEF_STMT (oe1->op);
1674 2988386 : if (!is_gimple_assign (oe1def))
1675 1075925 : continue;
1676 1912461 : dcode = gimple_assign_rhs_code (oe1def);
1677 3600409 : if ((dcode != MULT_EXPR
1678 1912461 : && dcode != RDIV_EXPR)
1679 1912461 : || !is_reassociable_op (oe1def, dcode, loop))
1680 1687948 : continue;
1681 :
1682 224513 : bitmap_set_bit (candidates, i);
1683 224513 : nr_candidates++;
1684 : }
1685 :
1686 2256745 : if (nr_candidates < 2)
1687 : return false;
1688 :
1689 16300 : if (dump_file && (dump_flags & TDF_DETAILS))
1690 : {
1691 1 : fprintf (dump_file, "searching for un-distribute opportunities ");
1692 2 : print_generic_expr (dump_file,
1693 1 : (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1694 1 : fprintf (dump_file, " %d\n", nr_candidates);
1695 : }
1696 :
1697 : /* Build linearized sub-operand lists and the counting table. */
1698 16300 : cvec.create (0);
1699 :
1700 16300 : hash_table<oecount_hasher> ctable (15);
1701 :
1702 : /* ??? Macro arguments cannot have multi-argument template types in
1703 : them. This typedef is needed to workaround that limitation. */
1704 16300 : typedef vec<operand_entry *> vec_operand_entry_t_heap;
1705 32600 : subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1706 72175 : EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1707 : {
1708 39575 : gimple *oedef;
1709 39575 : enum tree_code oecode;
1710 39575 : unsigned j;
1711 :
1712 39575 : oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1713 39575 : oecode = gimple_assign_rhs_code (oedef);
1714 79150 : linearize_expr_tree (&subops[i], oedef,
1715 39575 : associative_tree_code (oecode), false);
1716 :
1717 158972 : FOR_EACH_VEC_ELT (subops[i], j, oe1)
1718 : {
1719 79822 : oecount c;
1720 79822 : int *slot;
1721 79822 : int idx;
1722 79822 : c.oecode = oecode;
1723 79822 : c.cnt = 1;
1724 79822 : c.id = next_oecount_id++;
1725 79822 : c.op = oe1->op;
1726 79822 : cvec.safe_push (c);
1727 79822 : idx = cvec.length () + 41;
1728 79822 : slot = ctable.find_slot (idx, INSERT);
1729 79822 : if (!*slot)
1730 : {
1731 78847 : *slot = idx;
1732 : }
1733 : else
1734 : {
1735 975 : cvec.pop ();
1736 975 : cvec[*slot - 42].cnt++;
1737 : }
1738 : }
1739 : }
1740 :
1741 : /* Sort the counting table. */
1742 16300 : cvec.qsort (oecount_cmp);
1743 :
1744 16300 : if (dump_file && (dump_flags & TDF_DETAILS))
1745 : {
1746 1 : oecount *c;
1747 1 : fprintf (dump_file, "Candidates:\n");
1748 5 : FOR_EACH_VEC_ELT (cvec, j, c)
1749 : {
1750 3 : fprintf (dump_file, " %u %s: ", c->cnt,
1751 3 : c->oecode == MULT_EXPR
1752 : ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1753 3 : print_generic_expr (dump_file, c->op);
1754 3 : fprintf (dump_file, "\n");
1755 : }
1756 : }
1757 :
1758 : /* Process the (operand, code) pairs in order of most occurrence. */
1759 16300 : auto_sbitmap candidates2 (length);
1760 16967 : while (!cvec.is_empty ())
1761 : {
1762 16894 : oecount *c = &cvec.last ();
1763 16894 : if (c->cnt < 2)
1764 : break;
1765 :
1766 : /* Now collect the operands in the outer chain that contain
1767 : the common operand in their inner chain. */
1768 667 : bitmap_clear (candidates2);
1769 667 : nr_candidates2 = 0;
1770 4554 : EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1771 : {
1772 3220 : gimple *oedef;
1773 3220 : enum tree_code oecode;
1774 3220 : unsigned j;
1775 3220 : tree op = (*ops)[i]->op;
1776 :
1777 : /* If we undistributed in this chain already this may be
1778 : a constant. */
1779 3220 : if (TREE_CODE (op) != SSA_NAME)
1780 762 : continue;
1781 :
1782 2458 : oedef = SSA_NAME_DEF_STMT (op);
1783 2458 : oecode = gimple_assign_rhs_code (oedef);
1784 2458 : if (oecode != c->oecode)
1785 0 : continue;
1786 :
1787 9013 : FOR_EACH_VEC_ELT (subops[i], j, oe1)
1788 : {
1789 4463 : if (oe1->op == c->op)
1790 : {
1791 1128 : bitmap_set_bit (candidates2, i);
1792 1128 : ++nr_candidates2;
1793 1128 : break;
1794 : }
1795 : }
1796 : }
1797 :
1798 667 : if (nr_candidates2 >= 2)
1799 : {
1800 287 : operand_entry *oe1, *oe2;
1801 287 : gimple *prod;
1802 287 : int first = bitmap_first_set_bit (candidates2);
1803 :
1804 : /* Build the new addition chain. */
1805 287 : oe1 = (*ops)[first];
1806 287 : if (dump_file && (dump_flags & TDF_DETAILS))
1807 : {
1808 0 : fprintf (dump_file, "Building (");
1809 0 : print_generic_expr (dump_file, oe1->op);
1810 : }
1811 287 : zero_one_operation (&oe1->op, c->oecode, c->op);
1812 794 : EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1813 : {
1814 507 : gimple *sum;
1815 507 : oe2 = (*ops)[i];
1816 507 : if (dump_file && (dump_flags & TDF_DETAILS))
1817 : {
1818 0 : fprintf (dump_file, " + ");
1819 0 : print_generic_expr (dump_file, oe2->op);
1820 : }
1821 507 : zero_one_operation (&oe2->op, c->oecode, c->op);
1822 507 : sum = build_and_add_sum (TREE_TYPE (oe1->op),
1823 : oe1->op, oe2->op, opcode);
1824 507 : oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1825 507 : oe2->rank = 0;
1826 507 : oe1->op = gimple_get_lhs (sum);
1827 : }
1828 :
1829 : /* Apply the multiplication/division. */
1830 287 : prod = build_and_add_sum (TREE_TYPE (oe1->op),
1831 : oe1->op, c->op, c->oecode);
1832 287 : if (dump_file && (dump_flags & TDF_DETAILS))
1833 : {
1834 0 : fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1835 0 : print_generic_expr (dump_file, c->op);
1836 0 : fprintf (dump_file, "\n");
1837 : }
1838 :
1839 : /* Record it in the addition chain and disable further
1840 : undistribution with this op. */
1841 287 : oe1->op = gimple_assign_lhs (prod);
1842 287 : oe1->rank = get_rank (oe1->op);
1843 287 : subops[first].release ();
1844 :
1845 287 : changed = true;
1846 : }
1847 :
1848 667 : cvec.pop ();
1849 : }
1850 :
1851 71124 : for (i = 0; i < ops->length (); ++i)
1852 54824 : subops[i].release ();
1853 16300 : free (subops);
1854 16300 : cvec.release ();
1855 :
1856 16300 : return changed;
1857 2256745 : }
1858 :
1859 : /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1860 : first: element index for each relevant BIT_FIELD_REF.
1861 : second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1862 : typedef std::pair<unsigned, unsigned> v_info_elem;
1863 7089 : struct v_info {
1864 : tree vec_type;
1865 : auto_vec<v_info_elem, 32> vec;
1866 : };
1867 : typedef v_info *v_info_ptr;
1868 :
1869 : /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1870 : static int
1871 10590 : sort_by_mach_mode (const void *p_i, const void *p_j)
1872 : {
1873 10590 : const tree tr1 = *((const tree *) p_i);
1874 10590 : const tree tr2 = *((const tree *) p_j);
1875 10590 : unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1876 10590 : unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1877 10590 : if (mode1 > mode2)
1878 : return 1;
1879 10550 : else if (mode1 < mode2)
1880 : return -1;
1881 10500 : if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1882 : return -1;
1883 5070 : else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1884 5070 : return 1;
1885 : return 0;
1886 : }
1887 :
1888 : /* Cleanup hash map for VECTOR information. */
1889 : static void
1890 4363535 : cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1891 : {
1892 4370624 : for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1893 4377713 : it != info_map.end (); ++it)
1894 : {
1895 7089 : v_info_ptr info = (*it).second;
1896 7089 : delete info;
1897 7089 : (*it).second = NULL;
1898 : }
1899 4363535 : }
1900 :
1901 : /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1902 : V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1903 : is transformed to
1904 : Vs = (V1 + V2 + ... + Vn)
1905 : Vs[0] + Vs[1] + ... + Vs[k]
1906 :
1907 : The basic steps are listed below:
1908 :
1909 : 1) Check the addition chain *OPS by looking those summands coming from
1910 : VECTOR bit_field_ref on VECTOR type. Put the information into
1911 : v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1912 :
1913 : 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1914 : continuous, they can cover the whole VECTOR perfectly without any holes.
1915 : Obtain one VECTOR list which contain candidates to be transformed.
1916 :
1917 : 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1918 : candidates with same mode, build the addition statements for them and
1919 : generate BIT_FIELD_REFs accordingly.
1920 :
1921 : TODO:
1922 : The current implementation requires the whole VECTORs should be fully
1923 : covered, but it can be extended to support partial, checking adjacent
1924 : but not fill the whole, it may need some cost model to define the
1925 : boundary to do or not.
1926 : */
1927 : static bool
1928 4559862 : undistribute_bitref_for_vector (enum tree_code opcode,
1929 : vec<operand_entry *> *ops, struct loop *loop)
1930 : {
1931 4559862 : if (ops->length () <= 1)
1932 : return false;
1933 :
1934 4556516 : if (opcode != PLUS_EXPR
1935 4556516 : && opcode != MULT_EXPR
1936 : && opcode != BIT_XOR_EXPR
1937 1201283 : && opcode != BIT_IOR_EXPR
1938 819608 : && opcode != BIT_AND_EXPR)
1939 : return false;
1940 :
1941 4363535 : hash_map<tree, v_info_ptr> v_info_map;
1942 4363535 : operand_entry *oe1;
1943 4363535 : unsigned i;
1944 :
1945 : /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1946 : information into map. */
1947 13353408 : FOR_EACH_VEC_ELT (*ops, i, oe1)
1948 : {
1949 8989873 : enum tree_code dcode;
1950 8989873 : gimple *oe1def;
1951 :
1952 8989873 : if (TREE_CODE (oe1->op) != SSA_NAME)
1953 3137038 : continue;
1954 5852835 : oe1def = SSA_NAME_DEF_STMT (oe1->op);
1955 5852835 : if (!is_gimple_assign (oe1def))
1956 1483831 : continue;
1957 4369004 : dcode = gimple_assign_rhs_code (oe1def);
1958 4369004 : if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1959 4320447 : continue;
1960 :
1961 48557 : tree rhs = gimple_assign_rhs1 (oe1def);
1962 48557 : tree vec = TREE_OPERAND (rhs, 0);
1963 48557 : tree vec_type = TREE_TYPE (vec);
1964 :
1965 48557 : if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1966 28399 : continue;
1967 :
1968 : /* Ignore it if target machine can't support this VECTOR type. */
1969 20158 : if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1970 5437 : continue;
1971 :
1972 : /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1973 14721 : if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1974 : continue;
1975 :
1976 14721 : if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1977 14721 : || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1978 5254 : continue;
1979 :
1980 : /* The type of BIT_FIELD_REF might not be equal to the element type of
1981 : the vector. We want to use a vector type with element type the
1982 : same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1983 9467 : if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1984 : {
1985 1441 : machine_mode simd_mode;
1986 1441 : unsigned HOST_WIDE_INT size, nunits;
1987 1441 : unsigned HOST_WIDE_INT elem_size
1988 1441 : = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1989 2882 : if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1990 8980868 : continue;
1991 1441 : if (size <= elem_size || (size % elem_size) != 0)
1992 0 : continue;
1993 1441 : nunits = size / elem_size;
1994 1441 : if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1995 1441 : nunits).exists (&simd_mode))
1996 0 : continue;
1997 1441 : vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1998 :
1999 : /* Ignore it if target machine can't support this VECTOR type. */
2000 1441 : if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
2001 0 : continue;
2002 :
2003 : /* Check const vector type, constrain BIT_FIELD_REF offset and
2004 : size. */
2005 1441 : if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
2006 : continue;
2007 :
2008 2882 : if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
2009 2882 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
2010 0 : continue;
2011 : }
2012 :
2013 9467 : tree elem_type = TREE_TYPE (vec_type);
2014 9467 : unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
2015 9467 : if (maybe_ne (bit_field_size (rhs), elem_size))
2016 0 : continue;
2017 :
2018 9467 : unsigned idx;
2019 9467 : if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2020 0 : continue;
2021 :
2022 : /* Ignore it if target machine can't support this type of VECTOR
2023 : operation. */
2024 9467 : optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2025 9467 : if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2026 462 : continue;
2027 :
2028 9005 : bool existed;
2029 9005 : v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2030 9005 : if (!existed)
2031 : {
2032 7089 : info = new v_info;
2033 7089 : info->vec_type = vec_type;
2034 : }
2035 1916 : else if (!types_compatible_p (vec_type, info->vec_type))
2036 0 : continue;
2037 9005 : info->vec.safe_push (std::make_pair (idx, i));
2038 : }
2039 :
2040 : /* At least two VECTOR to combine. */
2041 4363535 : if (v_info_map.elements () <= 1)
2042 : {
2043 4363311 : cleanup_vinfo_map (v_info_map);
2044 4363311 : return false;
2045 : }
2046 :
2047 : /* Verify all VECTOR candidates by checking two conditions:
2048 : 1) sorted offsets are adjacent, no holes.
2049 : 2) can fill the whole VECTOR perfectly.
2050 : And add the valid candidates to a vector for further handling. */
2051 224 : auto_vec<tree> valid_vecs (v_info_map.elements ());
2052 1138 : for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2053 2052 : it != v_info_map.end (); ++it)
2054 : {
2055 914 : tree cand_vec = (*it).first;
2056 914 : v_info_ptr cand_info = (*it).second;
2057 914 : unsigned int num_elems
2058 914 : = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2059 1828 : if (cand_info->vec.length () != num_elems)
2060 540 : continue;
2061 374 : sbitmap holes = sbitmap_alloc (num_elems);
2062 374 : bitmap_ones (holes);
2063 374 : bool valid = true;
2064 374 : v_info_elem *curr;
2065 2204 : FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2066 : {
2067 1456 : if (!bitmap_bit_p (holes, curr->first))
2068 : {
2069 : valid = false;
2070 : break;
2071 : }
2072 : else
2073 1456 : bitmap_clear_bit (holes, curr->first);
2074 : }
2075 374 : if (valid && bitmap_empty_p (holes))
2076 374 : valid_vecs.quick_push (cand_vec);
2077 374 : sbitmap_free (holes);
2078 : }
2079 :
2080 : /* At least two VECTOR to combine. */
2081 224 : if (valid_vecs.length () <= 1)
2082 : {
2083 184 : cleanup_vinfo_map (v_info_map);
2084 184 : return false;
2085 : }
2086 :
2087 40 : valid_vecs.qsort (sort_by_mach_mode);
2088 : /* Go through all candidates by machine mode order, query the mode_to_total
2089 : to get the total number for each mode and skip the single one. */
2090 86 : for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2091 : {
2092 46 : tree tvec = valid_vecs[i];
2093 46 : enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2094 :
2095 : /* Skip modes with only a single candidate. */
2096 46 : if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2097 4 : continue;
2098 :
2099 42 : unsigned int idx, j;
2100 42 : gimple *sum = NULL;
2101 42 : tree sum_vec = tvec;
2102 42 : v_info_ptr info_ptr = *(v_info_map.get (tvec));
2103 42 : v_info_elem *elem;
2104 42 : tree vec_type = info_ptr->vec_type;
2105 :
2106 : /* Build the sum for all candidates with same mode. */
2107 325 : do
2108 : {
2109 975 : sum = build_and_add_sum (vec_type, sum_vec,
2110 325 : valid_vecs[i + 1], opcode);
2111 : /* Update the operands only after build_and_add_sum,
2112 : so that we don't have to repeat the placement algorithm
2113 : of build_and_add_sum. */
2114 325 : if (sum_vec == tvec
2115 325 : && !useless_type_conversion_p (vec_type, TREE_TYPE (sum_vec)))
2116 : {
2117 18 : gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2118 18 : tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2119 18 : tree lhs = make_ssa_name (vec_type);
2120 18 : gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2121 18 : gimple_set_uid (g, gimple_uid (sum));
2122 18 : gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2123 18 : gimple_assign_set_rhs1 (sum, lhs);
2124 18 : update_stmt (sum);
2125 : }
2126 325 : if (!useless_type_conversion_p (vec_type,
2127 325 : TREE_TYPE (valid_vecs[i + 1])))
2128 : {
2129 270 : gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2130 810 : tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2131 270 : valid_vecs[i + 1]);
2132 270 : tree lhs = make_ssa_name (vec_type);
2133 270 : gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2134 270 : gimple_set_uid (g, gimple_uid (sum));
2135 270 : gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2136 270 : gimple_assign_set_rhs2 (sum, lhs);
2137 270 : update_stmt (sum);
2138 : }
2139 325 : sum_vec = gimple_get_lhs (sum);
2140 325 : info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2141 325 : gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2142 : /* Update those related ops of current candidate VECTOR. */
2143 1575 : FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2144 : {
2145 1250 : idx = elem->second;
2146 1250 : gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2147 : /* Set this then op definition will get DCEd later. */
2148 1250 : gimple_set_visited (def, true);
2149 1250 : if (opcode == PLUS_EXPR
2150 1250 : || opcode == BIT_XOR_EXPR
2151 100 : || opcode == BIT_IOR_EXPR)
2152 1190 : (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2153 60 : else if (opcode == MULT_EXPR)
2154 24 : (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2155 : else
2156 : {
2157 36 : gcc_assert (opcode == BIT_AND_EXPR);
2158 36 : (*ops)[idx]->op
2159 36 : = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2160 : }
2161 1250 : (*ops)[idx]->rank = 0;
2162 : }
2163 325 : if (dump_file && (dump_flags & TDF_DETAILS))
2164 : {
2165 0 : fprintf (dump_file, "Generating addition -> ");
2166 0 : print_gimple_stmt (dump_file, sum, 0);
2167 : }
2168 325 : i++;
2169 : }
2170 325 : while ((i < valid_vecs.length () - 1)
2171 367 : && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2172 :
2173 : /* Referring to first valid VECTOR with this mode, generate the
2174 : BIT_FIELD_REF statements accordingly. */
2175 42 : info_ptr = *(v_info_map.get (tvec));
2176 42 : gcc_assert (sum);
2177 42 : tree elem_type = TREE_TYPE (vec_type);
2178 232 : FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2179 : {
2180 148 : idx = elem->second;
2181 148 : tree dst = make_ssa_name (elem_type);
2182 148 : tree pos = bitsize_int (elem->first
2183 : * tree_to_uhwi (TYPE_SIZE (elem_type)));
2184 148 : tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2185 148 : TYPE_SIZE (elem_type), pos);
2186 148 : gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2187 148 : insert_stmt_after (gs, sum);
2188 148 : gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2189 : /* Set this then op definition will get DCEd later. */
2190 148 : gimple_set_visited (def, true);
2191 148 : (*ops)[idx]->op = gimple_assign_lhs (gs);
2192 148 : (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2193 148 : if (dump_file && (dump_flags & TDF_DETAILS))
2194 : {
2195 0 : fprintf (dump_file, "Generating bit_field_ref -> ");
2196 0 : print_gimple_stmt (dump_file, gs, 0);
2197 : }
2198 : }
2199 : }
2200 :
2201 40 : if (dump_file && (dump_flags & TDF_DETAILS))
2202 0 : fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2203 :
2204 40 : cleanup_vinfo_map (v_info_map);
2205 :
2206 40 : return true;
2207 4363759 : }
2208 :
2209 : /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2210 : expression, examine the other OPS to see if any of them are comparisons
2211 : of the same values, which we may be able to combine or eliminate.
2212 : For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2213 :
2214 : static bool
2215 9386535 : eliminate_redundant_comparison (enum tree_code opcode,
2216 : vec<operand_entry *> *ops,
2217 : unsigned int currindex,
2218 : operand_entry *curr)
2219 : {
2220 9386535 : tree op1, op2;
2221 9386535 : enum tree_code lcode, rcode;
2222 9386535 : gimple *def1, *def2;
2223 9386535 : int i;
2224 9386535 : operand_entry *oe;
2225 :
2226 9386535 : if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2227 : return false;
2228 :
2229 : /* Check that CURR is a comparison. */
2230 1965876 : if (TREE_CODE (curr->op) != SSA_NAME)
2231 : return false;
2232 1482414 : def1 = SSA_NAME_DEF_STMT (curr->op);
2233 1482414 : if (!is_gimple_assign (def1))
2234 : return false;
2235 1275949 : lcode = gimple_assign_rhs_code (def1);
2236 1275949 : if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2237 : return false;
2238 507536 : op1 = gimple_assign_rhs1 (def1);
2239 507536 : op2 = gimple_assign_rhs2 (def1);
2240 :
2241 : /* Now look for a similar comparison in the remaining OPS. */
2242 1065873 : for (i = currindex + 1; ops->iterate (i, &oe); i++)
2243 : {
2244 558524 : tree t;
2245 :
2246 558524 : if (TREE_CODE (oe->op) != SSA_NAME)
2247 37 : continue;
2248 558487 : def2 = SSA_NAME_DEF_STMT (oe->op);
2249 558487 : if (!is_gimple_assign (def2))
2250 7258 : continue;
2251 551229 : rcode = gimple_assign_rhs_code (def2);
2252 551229 : if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2253 6341 : continue;
2254 :
2255 : /* If we got here, we have a match. See if we can combine the
2256 : two comparisons. */
2257 544888 : tree type = TREE_TYPE (gimple_assign_lhs (def1));
2258 544888 : if (opcode == BIT_IOR_EXPR)
2259 419368 : t = maybe_fold_or_comparisons (type,
2260 : lcode, op1, op2,
2261 : rcode, gimple_assign_rhs1 (def2),
2262 : gimple_assign_rhs2 (def2));
2263 : else
2264 125520 : t = maybe_fold_and_comparisons (type,
2265 : lcode, op1, op2,
2266 : rcode, gimple_assign_rhs1 (def2),
2267 : gimple_assign_rhs2 (def2));
2268 544888 : if (!t)
2269 544663 : continue;
2270 :
2271 : /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2272 : always give us a boolean_type_node value back. If the original
2273 : BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2274 : we need to convert. */
2275 225 : if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2276 : {
2277 2 : if (!fold_convertible_p (TREE_TYPE (curr->op), t))
2278 0 : continue;
2279 2 : t = fold_convert (TREE_TYPE (curr->op), t);
2280 : }
2281 :
2282 225 : if (TREE_CODE (t) != INTEGER_CST
2283 225 : && !operand_equal_p (t, curr->op, 0))
2284 : {
2285 219 : enum tree_code subcode;
2286 219 : tree newop1, newop2;
2287 219 : if (!COMPARISON_CLASS_P (t))
2288 38 : continue;
2289 201 : extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2290 201 : STRIP_USELESS_TYPE_CONVERSION (newop1);
2291 201 : STRIP_USELESS_TYPE_CONVERSION (newop2);
2292 201 : if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2293 0 : continue;
2294 201 : if (lcode == TREE_CODE (t)
2295 104 : && operand_equal_p (op1, newop1, 0)
2296 305 : && operand_equal_p (op2, newop2, 0))
2297 59 : t = curr->op;
2298 162 : else if ((TREE_CODE (newop1) == SSA_NAME
2299 142 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1))
2300 264 : || (TREE_CODE (newop2) == SSA_NAME
2301 78 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2)))
2302 20 : continue;
2303 : }
2304 :
2305 187 : if (dump_file && (dump_flags & TDF_DETAILS))
2306 : {
2307 6 : fprintf (dump_file, "Equivalence: ");
2308 6 : print_generic_expr (dump_file, curr->op);
2309 6 : fprintf (dump_file, " %s ", op_symbol_code (opcode));
2310 6 : print_generic_expr (dump_file, oe->op);
2311 6 : fprintf (dump_file, " -> ");
2312 6 : print_generic_expr (dump_file, t);
2313 6 : fprintf (dump_file, "\n");
2314 : }
2315 :
2316 : /* Now we can delete oe, as it has been subsumed by the new combined
2317 : expression t. */
2318 187 : ops->ordered_remove (i);
2319 187 : reassociate_stats.ops_eliminated ++;
2320 :
2321 : /* If t is the same as curr->op, we're done. Otherwise we must
2322 : replace curr->op with t. Special case is if we got a constant
2323 : back, in which case we add it to the end instead of in place of
2324 : the current entry. */
2325 187 : if (TREE_CODE (t) == INTEGER_CST)
2326 : {
2327 6 : ops->ordered_remove (currindex);
2328 6 : add_to_ops_vec (ops, t);
2329 : }
2330 181 : else if (!operand_equal_p (t, curr->op, 0))
2331 : {
2332 122 : gimple *sum;
2333 122 : enum tree_code subcode;
2334 122 : tree newop1;
2335 122 : tree newop2;
2336 122 : gcc_assert (COMPARISON_CLASS_P (t));
2337 122 : extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2338 122 : STRIP_USELESS_TYPE_CONVERSION (newop1);
2339 122 : STRIP_USELESS_TYPE_CONVERSION (newop2);
2340 122 : gcc_checking_assert (is_gimple_val (newop1)
2341 : && is_gimple_val (newop2));
2342 122 : sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2343 122 : curr->op = gimple_get_lhs (sum);
2344 : }
2345 : return true;
2346 : }
2347 :
2348 : return false;
2349 : }
2350 :
2351 :
2352 : /* Transform repeated addition of same values into multiply with
2353 : constant. */
2354 : static bool
2355 2259874 : transform_add_to_multiply (vec<operand_entry *> *ops)
2356 : {
2357 2259874 : operand_entry *oe;
2358 2259874 : tree op = NULL_TREE;
2359 2259874 : int j;
2360 2259874 : int i, start = -1, end = 0, count = 0;
2361 2259874 : auto_vec<std::pair <int, int> > indxs;
2362 2259874 : bool changed = false;
2363 :
2364 2259874 : if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2365 70628 : && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2366 31537 : || !flag_unsafe_math_optimizations))
2367 : return false;
2368 :
2369 : /* Look for repeated operands. */
2370 6836055 : FOR_EACH_VEC_ELT (*ops, i, oe)
2371 : {
2372 4615331 : if (start == -1)
2373 : {
2374 2220724 : count = 1;
2375 2220724 : op = oe->op;
2376 2220724 : start = i;
2377 : }
2378 2394607 : else if (operand_equal_p (oe->op, op, 0))
2379 : {
2380 155 : count++;
2381 155 : end = i;
2382 : }
2383 : else
2384 : {
2385 2394452 : if (count > 1)
2386 47 : indxs.safe_push (std::make_pair (start, end));
2387 2394452 : count = 1;
2388 2394452 : op = oe->op;
2389 2394452 : start = i;
2390 : }
2391 : }
2392 :
2393 2220724 : if (count > 1)
2394 38 : indxs.safe_push (std::make_pair (start, end));
2395 :
2396 2220882 : for (j = indxs.length () - 1; j >= 0; --j)
2397 : {
2398 : /* Convert repeated operand addition to multiplication. */
2399 85 : start = indxs[j].first;
2400 85 : end = indxs[j].second;
2401 85 : op = (*ops)[start]->op;
2402 85 : count = end - start + 1;
2403 325 : for (i = end; i >= start; --i)
2404 240 : ops->unordered_remove (i);
2405 85 : tree tmp = make_ssa_name (TREE_TYPE (op));
2406 85 : tree cst = build_int_cst (integer_type_node, count);
2407 85 : gassign *mul_stmt
2408 85 : = gimple_build_assign (tmp, MULT_EXPR,
2409 85 : op, fold_convert (TREE_TYPE (op), cst));
2410 85 : gimple_set_visited (mul_stmt, true);
2411 85 : add_to_ops_vec (ops, tmp, mul_stmt);
2412 85 : changed = true;
2413 : }
2414 :
2415 : return changed;
2416 2259874 : }
2417 :
2418 :
2419 : /* Perform various identities and other optimizations on the list of
2420 : operand entries, stored in OPS. The tree code for the binary
2421 : operation between all the operands is OPCODE. */
2422 :
2423 : static void
2424 4560168 : optimize_ops_list (enum tree_code opcode,
2425 : vec<operand_entry *> *ops)
2426 : {
2427 4577971 : unsigned int length = ops->length ();
2428 4577971 : unsigned int i;
2429 4577971 : operand_entry *oe;
2430 9154080 : operand_entry *oelast = NULL;
2431 9154080 : bool iterate = false;
2432 :
2433 4577971 : if (length == 1)
2434 4560168 : return;
2435 :
2436 4576109 : oelast = ops->last ();
2437 :
2438 : /* If the last two are constants, pop the constants off, merge them
2439 : and try the next two. */
2440 4576109 : if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2441 : {
2442 3289312 : operand_entry *oelm1 = (*ops)[length - 2];
2443 :
2444 3289312 : if (oelm1->rank == 0
2445 14162 : && is_gimple_min_invariant (oelm1->op)
2446 3303474 : && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2447 14162 : TREE_TYPE (oelast->op)))
2448 : {
2449 14162 : tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2450 : oelm1->op, oelast->op);
2451 :
2452 14162 : if (folded && is_gimple_min_invariant (folded))
2453 : {
2454 14144 : if (dump_file && (dump_flags & TDF_DETAILS))
2455 0 : fprintf (dump_file, "Merging constants\n");
2456 :
2457 14144 : ops->pop ();
2458 14144 : ops->pop ();
2459 :
2460 14144 : add_to_ops_vec (ops, folded);
2461 14144 : reassociate_stats.constants_eliminated++;
2462 :
2463 14144 : optimize_ops_list (opcode, ops);
2464 14144 : return;
2465 : }
2466 : }
2467 : }
2468 :
2469 4561965 : eliminate_using_constants (opcode, ops);
2470 4561965 : oelast = NULL;
2471 :
2472 13952100 : for (i = 0; ops->iterate (i, &oe);)
2473 : {
2474 9390137 : bool done = false;
2475 :
2476 9390137 : if (eliminate_not_pairs (opcode, ops, i, oe))
2477 2 : return;
2478 9390136 : if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2479 9389985 : || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2480 18776671 : || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2481 : {
2482 3788 : if (done)
2483 : return;
2484 3787 : iterate = true;
2485 3787 : oelast = NULL;
2486 3787 : continue;
2487 : }
2488 9386348 : oelast = oe;
2489 9386348 : i++;
2490 : }
2491 :
2492 4561963 : if (iterate)
2493 : optimize_ops_list (opcode, ops);
2494 : }
2495 :
2496 : /* The following functions are subroutines to optimize_range_tests and allow
2497 : it to try to change a logical combination of comparisons into a range
2498 : test.
2499 :
2500 : For example, both
2501 : X == 2 || X == 5 || X == 3 || X == 4
2502 : and
2503 : X >= 2 && X <= 5
2504 : are converted to
2505 : (unsigned) (X - 2) <= 3
2506 :
2507 : For more information see comments above fold_test_range in fold-const.cc,
2508 : this implementation is for GIMPLE. */
2509 :
2510 :
2511 :
2512 : /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2513 :
2514 : void
2515 141 : dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2516 : {
2517 141 : if (!skip_exp)
2518 59 : print_generic_expr (file, r->exp);
2519 251 : fprintf (file, " %c[", r->in_p ? '+' : '-');
2520 141 : print_generic_expr (file, r->low);
2521 141 : fputs (", ", file);
2522 141 : print_generic_expr (file, r->high);
2523 141 : fputc (']', file);
2524 141 : }
2525 :
2526 : /* Dump the range entry R to STDERR. */
2527 :
2528 : DEBUG_FUNCTION void
2529 0 : debug_range_entry (struct range_entry *r)
2530 : {
2531 0 : dump_range_entry (stderr, r, false);
2532 0 : fputc ('\n', stderr);
2533 0 : }
2534 :
2535 : /* This is similar to make_range in fold-const.cc, but on top of
2536 : GIMPLE instead of trees. If EXP is non-NULL, it should be
2537 : an SSA_NAME and STMT argument is ignored, otherwise STMT
2538 : argument should be a GIMPLE_COND. */
2539 :
2540 : void
2541 5529051 : init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2542 : {
2543 5529051 : int in_p;
2544 5529051 : tree low, high;
2545 5529051 : bool is_bool;
2546 :
2547 5529051 : r->exp = NULL_TREE;
2548 5529051 : r->in_p = false;
2549 5529051 : r->low = NULL_TREE;
2550 5529051 : r->high = NULL_TREE;
2551 5529051 : if (exp != NULL_TREE
2552 5529051 : && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2553 785132 : return;
2554 :
2555 : /* Start with simply saying "EXP != 0" and then look at the code of EXP
2556 : and see if we can refine the range. Some of the cases below may not
2557 : happen, but it doesn't seem worth worrying about this. We "continue"
2558 : the outer loop when we've changed something; otherwise we "break"
2559 : the switch, which will "break" the while. */
2560 5050427 : low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2561 5050427 : high = low;
2562 5050427 : in_p = 0;
2563 5050427 : is_bool = false;
2564 5050427 : if (exp == NULL_TREE)
2565 : is_bool = true;
2566 1535647 : else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2567 : {
2568 639100 : if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2569 : is_bool = true;
2570 : else
2571 : return;
2572 : }
2573 896547 : else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2574 0 : is_bool = true;
2575 :
2576 8010241 : while (1)
2577 : {
2578 8010241 : enum tree_code code;
2579 8010241 : tree arg0, arg1, exp_type;
2580 8010241 : tree nexp;
2581 8010241 : location_t loc;
2582 :
2583 8010241 : if (exp != NULL_TREE)
2584 : {
2585 4495461 : if (TREE_CODE (exp) != SSA_NAME
2586 4495461 : || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2587 : break;
2588 :
2589 4495461 : stmt = SSA_NAME_DEF_STMT (exp);
2590 4495461 : if (!is_gimple_assign (stmt))
2591 : break;
2592 :
2593 2738314 : code = gimple_assign_rhs_code (stmt);
2594 2738314 : arg0 = gimple_assign_rhs1 (stmt);
2595 2738314 : arg1 = gimple_assign_rhs2 (stmt);
2596 2738314 : exp_type = TREE_TYPE (exp);
2597 : }
2598 : else
2599 : {
2600 3514780 : code = gimple_cond_code (stmt);
2601 3514780 : arg0 = gimple_cond_lhs (stmt);
2602 3514780 : arg1 = gimple_cond_rhs (stmt);
2603 3514780 : exp_type = boolean_type_node;
2604 : }
2605 :
2606 6253094 : if (TREE_CODE (arg0) != SSA_NAME
2607 4989926 : || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0)
2608 11242549 : || ssa_name_maybe_undef_p (arg0))
2609 : break;
2610 4982719 : loc = gimple_location (stmt);
2611 4982719 : switch (code)
2612 : {
2613 33011 : case BIT_NOT_EXPR:
2614 33011 : if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2615 : /* Ensure the range is either +[-,0], +[0,0],
2616 : -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2617 : -[1,1]. If it is e.g. +[-,-] or -[-,-]
2618 : or similar expression of unconditional true or
2619 : false, it should not be negated. */
2620 33011 : && ((high && integer_zerop (high))
2621 0 : || (low && integer_onep (low))))
2622 : {
2623 5764 : in_p = !in_p;
2624 5764 : exp = arg0;
2625 5764 : continue;
2626 : }
2627 : break;
2628 2281 : case SSA_NAME:
2629 2281 : exp = arg0;
2630 2281 : continue;
2631 229904 : CASE_CONVERT:
2632 229904 : if (is_bool)
2633 : {
2634 122706 : if ((TYPE_PRECISION (exp_type) == 1
2635 116414 : || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2636 122706 : && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2637 : return;
2638 : }
2639 107198 : else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2640 : {
2641 4306 : if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2642 : is_bool = true;
2643 : else
2644 : return;
2645 : }
2646 102892 : else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2647 124292 : is_bool = true;
2648 227184 : goto do_default;
2649 : case EQ_EXPR:
2650 : case NE_EXPR:
2651 : case LT_EXPR:
2652 : case LE_EXPR:
2653 : case GE_EXPR:
2654 : case GT_EXPR:
2655 : is_bool = true;
2656 : /* FALLTHRU */
2657 645582 : default:
2658 645582 : if (!is_bool)
2659 : return;
2660 341970 : do_default:
2661 4641095 : nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2662 : &low, &high, &in_p);
2663 4641095 : if (nexp != NULL_TREE)
2664 : {
2665 2951945 : exp = nexp;
2666 2951945 : gcc_assert (TREE_CODE (exp) == SSA_NAME);
2667 2951945 : continue;
2668 : }
2669 : break;
2670 : }
2671 : break;
2672 : }
2673 4743919 : if (is_bool)
2674 : {
2675 4155290 : r->exp = exp;
2676 4155290 : r->in_p = in_p;
2677 4155290 : r->low = low;
2678 4155290 : r->high = high;
2679 : }
2680 : }
2681 :
2682 : /* Comparison function for qsort. Sort entries
2683 : without SSA_NAME exp first, then with SSA_NAMEs sorted
2684 : by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2685 : by increasing ->low and if ->low is the same, by increasing
2686 : ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2687 : maximum. */
2688 :
2689 : static int
2690 6219228 : range_entry_cmp (const void *a, const void *b)
2691 : {
2692 6219228 : const struct range_entry *p = (const struct range_entry *) a;
2693 6219228 : const struct range_entry *q = (const struct range_entry *) b;
2694 :
2695 6219228 : if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2696 : {
2697 2797892 : if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2698 : {
2699 : /* Group range_entries for the same SSA_NAME together. */
2700 2729973 : if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2701 : return -1;
2702 1170824 : else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2703 : return 1;
2704 : /* If ->low is different, NULL low goes first, then by
2705 : ascending low. */
2706 155377 : if (p->low != NULL_TREE)
2707 : {
2708 138464 : if (q->low != NULL_TREE)
2709 : {
2710 130944 : tree tem = fold_binary (LT_EXPR, boolean_type_node,
2711 : p->low, q->low);
2712 130944 : if (tem && integer_onep (tem))
2713 : return -1;
2714 63553 : tem = fold_binary (GT_EXPR, boolean_type_node,
2715 : p->low, q->low);
2716 63553 : if (tem && integer_onep (tem))
2717 : return 1;
2718 : }
2719 : else
2720 : return 1;
2721 : }
2722 16913 : else if (q->low != NULL_TREE)
2723 : return -1;
2724 : /* If ->high is different, NULL high goes last, before that by
2725 : ascending high. */
2726 29361 : if (p->high != NULL_TREE)
2727 : {
2728 29223 : if (q->high != NULL_TREE)
2729 : {
2730 28936 : tree tem = fold_binary (LT_EXPR, boolean_type_node,
2731 : p->high, q->high);
2732 28936 : if (tem && integer_onep (tem))
2733 : return -1;
2734 8613 : tem = fold_binary (GT_EXPR, boolean_type_node,
2735 : p->high, q->high);
2736 8613 : if (tem && integer_onep (tem))
2737 : return 1;
2738 : }
2739 : else
2740 : return -1;
2741 : }
2742 138 : else if (q->high != NULL_TREE)
2743 : return 1;
2744 : /* If both ranges are the same, sort below by ascending idx. */
2745 : }
2746 : else
2747 : return 1;
2748 : }
2749 3421336 : else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2750 : return -1;
2751 :
2752 3334476 : if (p->idx < q->idx)
2753 : return -1;
2754 : else
2755 : {
2756 1682368 : gcc_checking_assert (p->idx > q->idx);
2757 : return 1;
2758 : }
2759 : }
2760 :
2761 : /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2762 : insert needed statements BEFORE or after GSI. */
2763 :
2764 : static tree
2765 24720 : force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2766 : {
2767 24720 : enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2768 24720 : tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2769 24720 : if (TREE_CODE (ret) != SSA_NAME)
2770 : {
2771 37 : gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2772 37 : if (before)
2773 37 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
2774 : else
2775 0 : gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2776 37 : ret = gimple_assign_lhs (g);
2777 : }
2778 24720 : return ret;
2779 : }
2780 :
2781 : /* Helper routine of optimize_range_test.
2782 : [EXP, IN_P, LOW, HIGH] is a merged range for
2783 : RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2784 : OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2785 : is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2786 : an array of COUNT pointers to other ranges. Return
2787 : true if the range merge has been successful.
2788 : If OPCODE is ERROR_MARK, this is called from within
2789 : maybe_optimize_range_tests and is performing inter-bb range optimization.
2790 : In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2791 : oe->rank. */
2792 :
2793 : static bool
2794 24720 : update_range_test (struct range_entry *range, struct range_entry *otherrange,
2795 : struct range_entry **otherrangep,
2796 : unsigned int count, enum tree_code opcode,
2797 : vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2798 : bool in_p, tree low, tree high)
2799 : {
2800 24720 : unsigned int idx = range->idx;
2801 24720 : struct range_entry *swap_with = NULL;
2802 24720 : basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL;
2803 24720 : if (opcode == ERROR_MARK)
2804 : {
2805 : /* For inter-bb range test optimization, pick from the range tests
2806 : the one which is tested in the earliest condition (one dominating
2807 : the others), because otherwise there could be some UB (e.g. signed
2808 : overflow) in following bbs that we'd expose which wasn't there in
2809 : the original program. See PR104196. */
2810 14155 : basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id);
2811 14155 : basic_block range_bb = orig_range_bb;
2812 31228 : for (unsigned int i = 0; i < count; i++)
2813 : {
2814 17073 : struct range_entry *this_range;
2815 17073 : if (otherrange)
2816 10400 : this_range = otherrange + i;
2817 : else
2818 6673 : this_range = otherrangep[i];
2819 17073 : operand_entry *oe = (*ops)[this_range->idx];
2820 17073 : basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id);
2821 17073 : if (range_bb != this_bb
2822 17073 : && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb))
2823 : {
2824 6813 : swap_with = this_range;
2825 6813 : range_bb = this_bb;
2826 6813 : idx = this_range->idx;
2827 : }
2828 : }
2829 : /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2830 : only defined in later blocks. In this case we can't move the
2831 : merged comparison earlier, so instead check if there are any stmts
2832 : that might trigger signed integer overflow in between and rewrite
2833 : them. But only after we check if the optimization is possible. */
2834 14155 : if (seq && swap_with)
2835 : {
2836 3437 : rewrite_bb_first = range_bb;
2837 3437 : rewrite_bb_last = orig_range_bb;
2838 3437 : idx = range->idx;
2839 3437 : swap_with = NULL;
2840 : }
2841 : }
2842 24720 : operand_entry *oe = (*ops)[idx];
2843 24720 : tree op = oe->op;
2844 24720 : gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2845 12166 : : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2846 24720 : location_t loc = gimple_location (stmt);
2847 24720 : tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2848 24720 : tree tem = build_range_check (loc, optype, unshare_expr (exp),
2849 : in_p, low, high);
2850 24720 : gimple_stmt_iterator gsi;
2851 24720 : unsigned int i, uid;
2852 :
2853 24720 : if (tem == NULL_TREE)
2854 : return false;
2855 :
2856 : /* If op is default def SSA_NAME, there is no place to insert the
2857 : new comparison. Give up, unless we can use OP itself as the
2858 : range test. */
2859 37274 : if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2860 : {
2861 0 : if (op == range->exp
2862 0 : && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2863 0 : || TREE_CODE (optype) == BOOLEAN_TYPE)
2864 0 : && (op == tem
2865 0 : || (TREE_CODE (tem) == EQ_EXPR
2866 0 : && TREE_OPERAND (tem, 0) == op
2867 0 : && integer_onep (TREE_OPERAND (tem, 1))))
2868 0 : && opcode != BIT_IOR_EXPR
2869 0 : && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2870 : {
2871 : stmt = NULL;
2872 : tem = op;
2873 : }
2874 : else
2875 0 : return false;
2876 : }
2877 :
2878 24720 : if (swap_with)
2879 1806 : std::swap (range->idx, swap_with->idx);
2880 :
2881 24720 : if (dump_file && (dump_flags & TDF_DETAILS))
2882 : {
2883 39 : struct range_entry *r;
2884 39 : fprintf (dump_file, "Optimizing range tests ");
2885 39 : dump_range_entry (dump_file, range, false);
2886 180 : for (i = 0; i < count; i++)
2887 : {
2888 102 : if (otherrange)
2889 82 : r = otherrange + i;
2890 : else
2891 20 : r = otherrangep[i];
2892 102 : if (r->exp
2893 102 : && r->exp != range->exp
2894 20 : && TREE_CODE (r->exp) == SSA_NAME)
2895 : {
2896 20 : fprintf (dump_file, " and ");
2897 20 : dump_range_entry (dump_file, r, false);
2898 : }
2899 : else
2900 : {
2901 82 : fprintf (dump_file, " and");
2902 82 : dump_range_entry (dump_file, r, true);
2903 : }
2904 : }
2905 39 : fprintf (dump_file, "\n into ");
2906 39 : print_generic_expr (dump_file, tem);
2907 39 : fprintf (dump_file, "\n");
2908 : }
2909 :
2910 : /* In inter-bb range optimization mode, if we have a seq, we can't
2911 : move the merged comparison to the earliest bb from the comparisons
2912 : being replaced, so instead rewrite stmts that could trigger signed
2913 : integer overflow. */
2914 6386 : for (basic_block bb = rewrite_bb_last;
2915 31106 : bb != rewrite_bb_first; bb = single_pred (bb))
2916 12772 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2917 24777 : !gsi_end_p (gsi); gsi_next (&gsi))
2918 : {
2919 18391 : gimple *stmt = gsi_stmt (gsi);
2920 18391 : if (gimple_needing_rewrite_undefined (stmt))
2921 : {
2922 69 : gimple_stmt_iterator gsip = gsi;
2923 69 : gimple_stmt_iterator gsin = gsi;
2924 69 : gsi_prev (&gsip);
2925 69 : gsi_next (&gsin);
2926 69 : rewrite_to_defined_unconditional (&gsi);
2927 69 : unsigned uid = gimple_uid (stmt);
2928 69 : if (gsi_end_p (gsip))
2929 31 : gsip = gsi_after_labels (bb);
2930 : else
2931 38 : gsi_next (&gsip);
2932 276 : for (; gsi_stmt (gsip) != gsi_stmt (gsin);
2933 207 : gsi_next (&gsip))
2934 207 : gimple_set_uid (gsi_stmt (gsip), uid);
2935 : }
2936 : }
2937 :
2938 24720 : if (opcode == BIT_IOR_EXPR
2939 18619 : || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2940 17415 : tem = invert_truthvalue_loc (loc, tem);
2941 :
2942 24720 : tem = fold_convert_loc (loc, optype, tem);
2943 24720 : if (stmt)
2944 : {
2945 24720 : gsi = gsi_for_stmt (stmt);
2946 24720 : uid = gimple_uid (stmt);
2947 : }
2948 : else
2949 : {
2950 0 : gsi = gsi_none ();
2951 0 : uid = 0;
2952 : }
2953 24720 : if (stmt == NULL)
2954 0 : gcc_checking_assert (tem == op);
2955 : /* In rare cases range->exp can be equal to lhs of stmt.
2956 : In that case we have to insert after the stmt rather then before
2957 : it. If stmt is a PHI, insert it at the start of the basic block. */
2958 24720 : else if (op != range->exp)
2959 : {
2960 24720 : gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2961 24720 : tem = force_into_ssa_name (&gsi, tem, true);
2962 24720 : gsi_prev (&gsi);
2963 : }
2964 0 : else if (gimple_code (stmt) != GIMPLE_PHI)
2965 : {
2966 0 : gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2967 0 : tem = force_into_ssa_name (&gsi, tem, false);
2968 : }
2969 : else
2970 : {
2971 0 : gsi = gsi_after_labels (gimple_bb (stmt));
2972 0 : if (!gsi_end_p (gsi))
2973 0 : uid = gimple_uid (gsi_stmt (gsi));
2974 : else
2975 : {
2976 0 : gsi = gsi_start_bb (gimple_bb (stmt));
2977 0 : uid = 1;
2978 0 : while (!gsi_end_p (gsi))
2979 : {
2980 0 : uid = gimple_uid (gsi_stmt (gsi));
2981 0 : gsi_next (&gsi);
2982 : }
2983 : }
2984 0 : gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2985 0 : tem = force_into_ssa_name (&gsi, tem, true);
2986 0 : if (gsi_end_p (gsi))
2987 0 : gsi = gsi_last_bb (gimple_bb (stmt));
2988 : else
2989 24720 : gsi_prev (&gsi);
2990 : }
2991 154920 : for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2992 86124 : if (gimple_uid (gsi_stmt (gsi)))
2993 : break;
2994 : else
2995 65100 : gimple_set_uid (gsi_stmt (gsi), uid);
2996 :
2997 24720 : oe->op = tem;
2998 24720 : range->exp = exp;
2999 24720 : range->low = low;
3000 24720 : range->high = high;
3001 24720 : range->in_p = in_p;
3002 :
3003 53455 : for (i = 0; i < count; i++)
3004 : {
3005 28735 : if (otherrange)
3006 18087 : range = otherrange + i;
3007 : else
3008 10648 : range = otherrangep[i];
3009 28735 : oe = (*ops)[range->idx];
3010 : /* Now change all the other range test immediate uses, so that
3011 : those tests will be optimized away. */
3012 28735 : if (opcode == ERROR_MARK)
3013 : {
3014 17073 : if (oe->op)
3015 2135 : oe->op = build_int_cst (TREE_TYPE (oe->op),
3016 2135 : oe->rank == BIT_IOR_EXPR ? 0 : 1);
3017 : else
3018 14938 : oe->op = (oe->rank == BIT_IOR_EXPR
3019 14938 : ? boolean_false_node : boolean_true_node);
3020 : }
3021 : else
3022 11662 : oe->op = error_mark_node;
3023 28735 : range->exp = NULL_TREE;
3024 28735 : range->low = NULL_TREE;
3025 28735 : range->high = NULL_TREE;
3026 : }
3027 : return true;
3028 : }
3029 :
3030 : /* Optimize X == CST1 || X == CST2
3031 : if popcount (CST1 ^ CST2) == 1 into
3032 : (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3033 : Similarly for ranges. E.g.
3034 : X != 2 && X != 3 && X != 10 && X != 11
3035 : will be transformed by the previous optimization into
3036 : !((X - 2U) <= 1U || (X - 10U) <= 1U)
3037 : and this loop can transform that into
3038 : !(((X & ~8) - 2U) <= 1U). */
3039 :
3040 : static bool
3041 22814 : optimize_range_tests_xor (enum tree_code opcode, tree type,
3042 : tree lowi, tree lowj, tree highi, tree highj,
3043 : vec<operand_entry *> *ops,
3044 : struct range_entry *rangei,
3045 : struct range_entry *rangej)
3046 : {
3047 22814 : tree lowxor, highxor, tem, exp;
3048 : /* Check lowi ^ lowj == highi ^ highj and
3049 : popcount (lowi ^ lowj) == 1. */
3050 22814 : lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
3051 22814 : if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
3052 : return false;
3053 22814 : if (!integer_pow2p (lowxor))
3054 : return false;
3055 3130 : highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
3056 3130 : if (!tree_int_cst_equal (lowxor, highxor))
3057 : return false;
3058 :
3059 2701 : exp = rangei->exp;
3060 2701 : scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3061 2701 : int prec = GET_MODE_PRECISION (mode);
3062 2701 : if (TYPE_PRECISION (type) < prec
3063 2700 : || (wi::to_wide (TYPE_MIN_VALUE (type))
3064 8101 : != wi::min_value (prec, TYPE_SIGN (type)))
3065 8101 : || (wi::to_wide (TYPE_MAX_VALUE (type))
3066 8101 : != wi::max_value (prec, TYPE_SIGN (type))))
3067 : {
3068 1 : type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
3069 1 : exp = fold_convert (type, exp);
3070 1 : lowxor = fold_convert (type, lowxor);
3071 1 : lowi = fold_convert (type, lowi);
3072 1 : highi = fold_convert (type, highi);
3073 : }
3074 2701 : tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
3075 2701 : exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
3076 2701 : lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
3077 2701 : highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
3078 2701 : if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
3079 2701 : NULL, rangei->in_p, lowj, highj))
3080 : return true;
3081 : return false;
3082 : }
3083 :
3084 : /* Optimize X == CST1 || X == CST2
3085 : if popcount (CST2 - CST1) == 1 into
3086 : ((X - CST1) & ~(CST2 - CST1)) == 0.
3087 : Similarly for ranges. E.g.
3088 : X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3089 : || X == 75 || X == 45
3090 : will be transformed by the previous optimization into
3091 : (X - 43U) <= 3U || (X - 75U) <= 3U
3092 : and this loop can transform that into
3093 : ((X - 43U) & ~(75U - 43U)) <= 3U. */
3094 : static bool
3095 17741 : optimize_range_tests_diff (enum tree_code opcode, tree type,
3096 : tree lowi, tree lowj, tree highi, tree highj,
3097 : vec<operand_entry *> *ops,
3098 : struct range_entry *rangei,
3099 : struct range_entry *rangej)
3100 : {
3101 17741 : tree tem1, tem2, mask;
3102 : /* Check highi - lowi == highj - lowj. */
3103 17741 : tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3104 17741 : if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3105 : return false;
3106 17741 : tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3107 17741 : if (!tree_int_cst_equal (tem1, tem2))
3108 : return false;
3109 : /* Check popcount (lowj - lowi) == 1. */
3110 11722 : tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3111 11722 : if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3112 : return false;
3113 11722 : if (!integer_pow2p (tem1))
3114 : return false;
3115 :
3116 1949 : scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3117 1949 : int prec = GET_MODE_PRECISION (mode);
3118 1949 : if (TYPE_PRECISION (type) < prec
3119 1945 : || (wi::to_wide (TYPE_MIN_VALUE (type))
3120 5839 : != wi::min_value (prec, TYPE_SIGN (type)))
3121 5839 : || (wi::to_wide (TYPE_MAX_VALUE (type))
3122 5839 : != wi::max_value (prec, TYPE_SIGN (type))))
3123 4 : type = build_nonstandard_integer_type (prec, 1);
3124 : else
3125 1945 : type = unsigned_type_for (type);
3126 1949 : tem1 = fold_convert (type, tem1);
3127 1949 : tem2 = fold_convert (type, tem2);
3128 1949 : lowi = fold_convert (type, lowi);
3129 1949 : mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3130 1949 : tem1 = fold_build2 (MINUS_EXPR, type,
3131 : fold_convert (type, rangei->exp), lowi);
3132 1949 : tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3133 1949 : lowj = build_int_cst (type, 0);
3134 1949 : if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3135 1949 : NULL, rangei->in_p, lowj, tem2))
3136 : return true;
3137 : return false;
3138 : }
3139 :
3140 : /* It does some common checks for function optimize_range_tests_xor and
3141 : optimize_range_tests_diff.
3142 : If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3143 : Else it calls optimize_range_tests_diff. */
3144 :
3145 : static bool
3146 2094971 : optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3147 : bool optimize_xor, vec<operand_entry *> *ops,
3148 : struct range_entry *ranges)
3149 : {
3150 2094971 : int i, j;
3151 2094971 : bool any_changes = false;
3152 3668263 : for (i = first; i < length; i++)
3153 : {
3154 1573292 : tree lowi, highi, lowj, highj, type, tem;
3155 :
3156 1573292 : if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3157 973505 : continue;
3158 599787 : type = TREE_TYPE (ranges[i].exp);
3159 599787 : if (!INTEGRAL_TYPE_P (type))
3160 47776 : continue;
3161 552011 : lowi = ranges[i].low;
3162 552011 : if (lowi == NULL_TREE)
3163 35434 : lowi = TYPE_MIN_VALUE (type);
3164 552011 : highi = ranges[i].high;
3165 552011 : if (highi == NULL_TREE)
3166 6834 : continue;
3167 911154 : for (j = i + 1; j < length && j < i + 64; j++)
3168 : {
3169 370627 : bool changes;
3170 370627 : if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3171 330072 : continue;
3172 40555 : lowj = ranges[j].low;
3173 40555 : if (lowj == NULL_TREE)
3174 0 : continue;
3175 40555 : highj = ranges[j].high;
3176 40555 : if (highj == NULL_TREE)
3177 152 : highj = TYPE_MAX_VALUE (type);
3178 : /* Check lowj > highi. */
3179 40555 : tem = fold_binary (GT_EXPR, boolean_type_node,
3180 : lowj, highi);
3181 40555 : if (tem == NULL_TREE || !integer_onep (tem))
3182 0 : continue;
3183 40555 : if (optimize_xor)
3184 22814 : changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3185 : highi, highj, ops,
3186 : ranges + i, ranges + j);
3187 : else
3188 17741 : changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3189 : highi, highj, ops,
3190 : ranges + i, ranges + j);
3191 40555 : if (changes)
3192 : {
3193 : any_changes = true;
3194 : break;
3195 : }
3196 : }
3197 : }
3198 2094971 : return any_changes;
3199 : }
3200 :
3201 : /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3202 : range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3203 : EXP on success, NULL otherwise. */
3204 :
3205 : static tree
3206 166065 : extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3207 : wide_int *mask, tree *totallowp)
3208 : {
3209 166065 : tree tem = int_const_binop (MINUS_EXPR, high, low);
3210 166065 : if (tem == NULL_TREE
3211 166065 : || TREE_CODE (tem) != INTEGER_CST
3212 166065 : || TREE_OVERFLOW (tem)
3213 155081 : || tree_int_cst_sgn (tem) == -1
3214 321146 : || compare_tree_int (tem, prec) != -1)
3215 14892 : return NULL_TREE;
3216 :
3217 151173 : unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3218 151173 : *mask = wi::shifted_mask (0, max, false, prec);
3219 151173 : if (TREE_CODE (exp) == BIT_AND_EXPR
3220 151173 : && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3221 : {
3222 4850 : widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3223 4850 : msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3224 4850 : if (wi::popcount (msk) == 1
3225 4850 : && wi::ltu_p (msk, prec - max))
3226 : {
3227 4095 : *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3228 4095 : max += msk.to_uhwi ();
3229 4095 : exp = TREE_OPERAND (exp, 0);
3230 4095 : if (integer_zerop (low)
3231 2154 : && TREE_CODE (exp) == PLUS_EXPR
3232 5891 : && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3233 : {
3234 1796 : tree ret = TREE_OPERAND (exp, 0);
3235 1796 : STRIP_NOPS (ret);
3236 1796 : widest_int bias
3237 1796 : = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3238 3592 : TYPE_PRECISION (TREE_TYPE (low))));
3239 1796 : tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3240 1796 : if (totallowp)
3241 : {
3242 1764 : *totallowp = tbias;
3243 1764 : return ret;
3244 : }
3245 32 : else if (!tree_int_cst_lt (totallow, tbias))
3246 : return NULL_TREE;
3247 32 : bias = wi::to_widest (tbias);
3248 32 : bias -= wi::to_widest (totallow);
3249 32 : if (bias >= 0 && bias < prec - max)
3250 : {
3251 22 : *mask = wi::lshift (*mask, bias);
3252 22 : return ret;
3253 : }
3254 1796 : }
3255 : }
3256 4850 : }
3257 149387 : if (totallowp)
3258 : return exp;
3259 13527 : if (!tree_int_cst_lt (totallow, low))
3260 : return exp;
3261 13505 : tem = int_const_binop (MINUS_EXPR, low, totallow);
3262 13505 : if (tem == NULL_TREE
3263 13505 : || TREE_CODE (tem) != INTEGER_CST
3264 13505 : || TREE_OVERFLOW (tem)
3265 26853 : || compare_tree_int (tem, prec - max) == 1)
3266 3854 : return NULL_TREE;
3267 :
3268 9651 : *mask = wi::lshift (*mask, wi::to_widest (tem));
3269 9651 : return exp;
3270 : }
3271 :
3272 : /* Attempt to optimize small range tests using bit test.
3273 : E.g.
3274 : X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3275 : && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3276 : has been by earlier optimizations optimized into:
3277 : ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3278 : As all the 43 through 82 range is less than 64 numbers,
3279 : for 64-bit word targets optimize that into:
3280 : (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3281 :
3282 : static bool
3283 1047494 : optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3284 : vec<operand_entry *> *ops,
3285 : struct range_entry *ranges)
3286 : {
3287 1047494 : int i, j;
3288 1047494 : bool any_changes = false;
3289 1047494 : int prec = GET_MODE_BITSIZE (word_mode);
3290 1047494 : auto_vec<struct range_entry *, 64> candidates;
3291 :
3292 1473667 : for (i = first; i < length - 1; i++)
3293 : {
3294 426173 : tree lowi, highi, lowj, highj, type;
3295 :
3296 426173 : if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3297 288549 : continue;
3298 171148 : type = TREE_TYPE (ranges[i].exp);
3299 171148 : if (!INTEGRAL_TYPE_P (type))
3300 16336 : continue;
3301 154812 : lowi = ranges[i].low;
3302 154812 : if (lowi == NULL_TREE)
3303 11090 : lowi = TYPE_MIN_VALUE (type);
3304 154812 : highi = ranges[i].high;
3305 154812 : if (highi == NULL_TREE)
3306 2436 : continue;
3307 152376 : wide_int mask;
3308 152376 : tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3309 : highi, &mask, &lowi);
3310 152376 : if (exp == NULL_TREE)
3311 14752 : continue;
3312 137624 : candidates.truncate (0);
3313 137624 : int end = MIN (i + 64, length);
3314 297885 : for (j = i + 1; j < end; j++)
3315 : {
3316 160261 : tree exp2;
3317 160261 : if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3318 150596 : continue;
3319 92254 : if (ranges[j].exp == exp)
3320 : ;
3321 78838 : else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3322 : {
3323 1232 : exp2 = TREE_OPERAND (ranges[j].exp, 0);
3324 1232 : if (exp2 == exp)
3325 : ;
3326 1003 : else if (TREE_CODE (exp2) == PLUS_EXPR)
3327 : {
3328 810 : exp2 = TREE_OPERAND (exp2, 0);
3329 810 : STRIP_NOPS (exp2);
3330 810 : if (exp2 != exp)
3331 766 : continue;
3332 : }
3333 : else
3334 193 : continue;
3335 : }
3336 : else
3337 77606 : continue;
3338 13689 : lowj = ranges[j].low;
3339 13689 : if (lowj == NULL_TREE)
3340 0 : continue;
3341 13689 : highj = ranges[j].high;
3342 13689 : if (highj == NULL_TREE)
3343 76 : highj = TYPE_MAX_VALUE (TREE_TYPE (lowj));
3344 13689 : wide_int mask2;
3345 13689 : exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3346 : highj, &mask2, NULL);
3347 13689 : if (exp2 != exp)
3348 4024 : continue;
3349 9665 : mask |= mask2;
3350 9665 : candidates.safe_push (&ranges[j]);
3351 13689 : }
3352 :
3353 : /* If every possible relative value of the expression is a valid shift
3354 : amount, then we can merge the entry test in the bit test. In this
3355 : case, if we would need otherwise 2 or more comparisons, then use
3356 : the bit test; in the other cases, the threshold is 3 comparisons. */
3357 137624 : bool entry_test_needed;
3358 137624 : int_range_max r;
3359 275248 : if (TREE_CODE (exp) == SSA_NAME
3360 273844 : && get_range_query (cfun)->range_of_expr (r, exp)
3361 136922 : && !r.undefined_p ()
3362 136922 : && !r.varying_p ()
3363 316003 : && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3364 : {
3365 5026 : wide_int min = r.lower_bound ();
3366 5026 : wide_int ilowi = wi::to_wide (lowi);
3367 5026 : if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3368 : {
3369 490 : lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3370 490 : mask = wi::lshift (mask, ilowi - min);
3371 : }
3372 4536 : else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3373 : {
3374 1 : lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3375 1 : mask = wi::lrshift (mask, min - ilowi);
3376 : }
3377 5026 : entry_test_needed = false;
3378 5026 : }
3379 : else
3380 : entry_test_needed = true;
3381 280274 : if (candidates.length () >= (entry_test_needed ? 2 : 1))
3382 : {
3383 920 : tree high = wide_int_to_tree (TREE_TYPE (lowi),
3384 460 : wi::to_widest (lowi)
3385 1380 : + prec - 1 - wi::clz (mask));
3386 460 : operand_entry *oe = (*ops)[ranges[i].idx];
3387 460 : tree op = oe->op;
3388 460 : gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3389 52 : : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3390 408 : (cfun, oe->id));
3391 460 : location_t loc = gimple_location (stmt);
3392 460 : tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3393 :
3394 : /* See if it isn't cheaper to pretend the minimum value of the
3395 : range is 0, if maximum value is small enough.
3396 : We can avoid then subtraction of the minimum value, but the
3397 : mask constant could be perhaps more expensive. */
3398 460 : if (compare_tree_int (lowi, 0) > 0
3399 374 : && compare_tree_int (high, prec) < 0
3400 934 : && (entry_test_needed || wi::ltu_p (r.upper_bound (), prec)))
3401 : {
3402 157 : int cost_diff;
3403 157 : HOST_WIDE_INT m = tree_to_uhwi (lowi);
3404 157 : rtx reg = gen_raw_REG (word_mode, 10000);
3405 157 : bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3406 157 : cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3407 : GEN_INT (-m)),
3408 : word_mode, speed_p);
3409 157 : rtx r = immed_wide_int_const (mask, word_mode);
3410 157 : cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3411 : word_mode, speed_p);
3412 157 : r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3413 157 : cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3414 : word_mode, speed_p);
3415 157 : if (cost_diff > 0)
3416 : {
3417 58 : mask = wi::lshift (mask, m);
3418 58 : lowi = build_zero_cst (TREE_TYPE (lowi));
3419 : }
3420 : }
3421 :
3422 460 : tree tem;
3423 460 : if (entry_test_needed)
3424 : {
3425 399 : tem = build_range_check (loc, optype, unshare_expr (exp),
3426 : false, lowi, high);
3427 399 : if (tem == NULL_TREE || is_gimple_val (tem))
3428 0 : continue;
3429 : }
3430 : else
3431 61 : tem = NULL_TREE;
3432 460 : tree etype = unsigned_type_for (TREE_TYPE (exp));
3433 460 : exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3434 : fold_convert_loc (loc, etype, exp),
3435 : fold_convert_loc (loc, etype, lowi));
3436 460 : exp = fold_convert_loc (loc, integer_type_node, exp);
3437 460 : tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3438 460 : exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3439 : build_int_cst (word_type, 1), exp);
3440 920 : exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3441 460 : wide_int_to_tree (word_type, mask));
3442 460 : exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3443 : build_zero_cst (word_type));
3444 460 : if (is_gimple_val (exp))
3445 0 : continue;
3446 :
3447 : /* The shift might have undefined behavior if TEM is true,
3448 : but reassociate_bb isn't prepared to have basic blocks
3449 : split when it is running. So, temporarily emit a code
3450 : with BIT_IOR_EXPR instead of &&, and fix it up in
3451 : branch_fixup. */
3452 460 : gimple_seq seq = NULL;
3453 460 : if (tem)
3454 : {
3455 399 : tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3456 399 : gcc_assert (TREE_CODE (tem) == SSA_NAME);
3457 399 : gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3458 : }
3459 460 : gimple_seq seq2;
3460 460 : exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3461 460 : gimple_seq_add_seq_without_update (&seq, seq2);
3462 460 : gcc_assert (TREE_CODE (exp) == SSA_NAME);
3463 460 : gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3464 460 : if (tem)
3465 : {
3466 399 : gimple *g = gimple_build_assign (make_ssa_name (optype),
3467 : BIT_IOR_EXPR, tem, exp);
3468 399 : gimple_set_location (g, loc);
3469 399 : gimple_seq_add_stmt_without_update (&seq, g);
3470 399 : exp = gimple_assign_lhs (g);
3471 : }
3472 460 : tree val = build_zero_cst (optype);
3473 1380 : if (update_range_test (&ranges[i], NULL, candidates.address (),
3474 : candidates.length (), opcode, ops, exp,
3475 : seq, false, val, val))
3476 : {
3477 460 : any_changes = true;
3478 460 : if (tem)
3479 399 : reassoc_branch_fixups.safe_push (tem);
3480 : }
3481 : else
3482 0 : gimple_seq_discard (seq);
3483 : }
3484 152376 : }
3485 1047494 : return any_changes;
3486 1047494 : }
3487 :
3488 : /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3489 : and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3490 : Also, handle x < C && y < C && z < C where C is power of two as
3491 : (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3492 : as (x | y | z) < 0. */
3493 :
3494 : static bool
3495 1047494 : optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3496 : vec<operand_entry *> *ops,
3497 : struct range_entry *ranges)
3498 : {
3499 1047494 : int i;
3500 1047494 : unsigned int b;
3501 1047494 : bool any_changes = false;
3502 1047494 : auto_vec<int, 128> buckets;
3503 1047494 : auto_vec<int, 32> chains;
3504 1047494 : auto_vec<struct range_entry *, 32> candidates;
3505 :
3506 1834147 : for (i = first; i < length; i++)
3507 : {
3508 786653 : int idx;
3509 :
3510 1142554 : if (ranges[i].exp == NULL_TREE
3511 767036 : || TREE_CODE (ranges[i].exp) != SSA_NAME
3512 762497 : || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3513 1217405 : || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3514 355901 : continue;
3515 :
3516 430752 : if (ranges[i].low != NULL_TREE
3517 403860 : && ranges[i].high != NULL_TREE
3518 348785 : && ranges[i].in_p
3519 628874 : && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3520 : {
3521 170030 : idx = !integer_zerop (ranges[i].low);
3522 170030 : if (idx && !integer_all_onesp (ranges[i].low))
3523 95543 : continue;
3524 : }
3525 260722 : else if (ranges[i].high != NULL_TREE
3526 205612 : && TREE_CODE (ranges[i].high) == INTEGER_CST
3527 205612 : && ranges[i].in_p)
3528 : {
3529 37267 : wide_int w = wi::to_wide (ranges[i].high);
3530 37267 : int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3531 37267 : int l = wi::clz (w);
3532 37267 : idx = 2;
3533 101436 : if (l <= 0
3534 37267 : || l >= prec
3535 68484 : || w != wi::mask (prec - l, false, prec))
3536 26902 : continue;
3537 10365 : if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3538 6398 : && ranges[i].low == NULL_TREE)
3539 10365 : || (ranges[i].low
3540 8441 : && integer_zerop (ranges[i].low))))
3541 3542 : continue;
3542 37267 : }
3543 422750 : else if (ranges[i].high == NULL_TREE
3544 55110 : && ranges[i].low != NULL_TREE
3545 : /* Perform this optimization only in the last
3546 : reassoc pass, as it interferes with the reassociation
3547 : itself or could also with VRP etc. which might not
3548 : be able to virtually undo the optimization. */
3549 55075 : && !reassoc_insert_powi_p
3550 27732 : && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3551 250821 : && integer_zerop (ranges[i].low))
3552 : idx = 3;
3553 : else
3554 199295 : continue;
3555 :
3556 105470 : b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3557 105470 : if (buckets.length () <= b)
3558 91777 : buckets.safe_grow_cleared (b + 1, true);
3559 105470 : if (chains.length () <= (unsigned) i)
3560 105470 : chains.safe_grow (i + 1, true);
3561 105470 : chains[i] = buckets[b];
3562 105470 : buckets[b] = i + 1;
3563 : }
3564 :
3565 16778178 : FOR_EACH_VEC_ELT (buckets, b, i)
3566 15730684 : if (i && chains[i - 1])
3567 : {
3568 6743 : int j, k = i;
3569 6743 : if ((b % 4) == 2)
3570 : {
3571 : /* When ranges[X - 1].high + 1 is a power of two,
3572 : we need to process the same bucket up to
3573 : precision - 1 times, each time split the entries
3574 : with the same high bound into one chain and the
3575 : rest into another one to be processed later. */
3576 : int this_prev = i;
3577 : int other_prev = 0;
3578 142 : for (j = chains[i - 1]; j; j = chains[j - 1])
3579 : {
3580 77 : if (tree_int_cst_equal (ranges[i - 1].high,
3581 77 : ranges[j - 1].high))
3582 : {
3583 68 : chains[this_prev - 1] = j;
3584 68 : this_prev = j;
3585 : }
3586 9 : else if (other_prev == 0)
3587 : {
3588 7 : buckets[b] = j;
3589 7 : other_prev = j;
3590 : }
3591 : else
3592 : {
3593 2 : chains[other_prev - 1] = j;
3594 2 : other_prev = j;
3595 : }
3596 : }
3597 65 : chains[this_prev - 1] = 0;
3598 65 : if (other_prev)
3599 7 : chains[other_prev - 1] = 0;
3600 65 : if (chains[i - 1] == 0)
3601 : {
3602 5 : if (other_prev)
3603 5 : b--;
3604 5 : continue;
3605 : }
3606 : }
3607 16381 : for (j = chains[i - 1]; j; j = chains[j - 1])
3608 : {
3609 9643 : gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3610 9643 : gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3611 9643 : if (reassoc_stmt_dominates_stmt_p (gk, gj))
3612 2660 : k = j;
3613 : }
3614 6738 : tree type1 = TREE_TYPE (ranges[k - 1].exp);
3615 6738 : tree type2 = NULL_TREE;
3616 6738 : candidates.truncate (0);
3617 6738 : if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3618 677 : type1 = pointer_sized_int_node;
3619 23119 : for (j = i; j; j = chains[j - 1])
3620 : {
3621 16381 : tree type = TREE_TYPE (ranges[j - 1].exp);
3622 16381 : if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3623 1369 : type = pointer_sized_int_node;
3624 16381 : if ((b % 4) == 3)
3625 : {
3626 : /* For the signed < 0 cases, the types should be
3627 : really compatible (all signed with the same precision,
3628 : instead put ranges that have different in_p from
3629 : k first. */
3630 3595 : if (!useless_type_conversion_p (type1, type))
3631 0 : continue;
3632 3595 : if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3633 1009 : candidates.safe_push (&ranges[j - 1]);
3634 3595 : type2 = type1;
3635 3595 : continue;
3636 : }
3637 12786 : if (j == k
3638 12786 : || useless_type_conversion_p (type1, type))
3639 : ;
3640 430 : else if (type2 == NULL_TREE
3641 430 : || useless_type_conversion_p (type2, type))
3642 : {
3643 430 : if (type2 == NULL_TREE)
3644 419 : type2 = type;
3645 430 : candidates.safe_push (&ranges[j - 1]);
3646 : }
3647 : }
3648 6738 : unsigned l = candidates.length ();
3649 23119 : for (j = i; j; j = chains[j - 1])
3650 : {
3651 16381 : tree type = TREE_TYPE (ranges[j - 1].exp);
3652 16381 : if (j == k)
3653 6738 : continue;
3654 9643 : if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3655 692 : type = pointer_sized_int_node;
3656 9643 : if ((b % 4) == 3)
3657 : {
3658 1988 : if (!useless_type_conversion_p (type1, type))
3659 0 : continue;
3660 1988 : if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3661 979 : candidates.safe_push (&ranges[j - 1]);
3662 1988 : continue;
3663 : }
3664 7655 : if (useless_type_conversion_p (type1, type))
3665 : ;
3666 860 : else if (type2 == NULL_TREE
3667 430 : || useless_type_conversion_p (type2, type))
3668 430 : continue;
3669 7225 : candidates.safe_push (&ranges[j - 1]);
3670 : }
3671 6738 : gimple_seq seq = NULL;
3672 6738 : tree op = NULL_TREE;
3673 6738 : unsigned int id;
3674 6738 : struct range_entry *r;
3675 6738 : candidates.safe_push (&ranges[k - 1]);
3676 23119 : FOR_EACH_VEC_ELT (candidates, id, r)
3677 : {
3678 16381 : gimple *g;
3679 16381 : enum tree_code code;
3680 16381 : if (id == 0)
3681 : {
3682 6738 : op = r->exp;
3683 6738 : continue;
3684 : }
3685 9643 : if (id == l
3686 8215 : || POINTER_TYPE_P (TREE_TYPE (op))
3687 17287 : || TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3688 : {
3689 2004 : code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3690 2004 : tree type3 = id >= l ? type1 : pointer_sized_int_node;
3691 2004 : if (code == BIT_NOT_EXPR
3692 2004 : && TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3693 : {
3694 0 : g = gimple_build_assign (make_ssa_name (type3),
3695 : NOP_EXPR, op);
3696 0 : gimple_seq_add_stmt_without_update (&seq, g);
3697 0 : op = gimple_assign_lhs (g);
3698 : }
3699 2004 : g = gimple_build_assign (make_ssa_name (type3), code, op);
3700 2004 : gimple_seq_add_stmt_without_update (&seq, g);
3701 2004 : op = gimple_assign_lhs (g);
3702 : }
3703 9643 : tree type = TREE_TYPE (r->exp);
3704 9643 : tree exp = r->exp;
3705 9643 : if (POINTER_TYPE_P (type)
3706 8929 : || TREE_CODE (type) == OFFSET_TYPE
3707 18567 : || (id >= l && !useless_type_conversion_p (type1, type)))
3708 : {
3709 719 : tree type3 = id >= l ? type1 : pointer_sized_int_node;
3710 719 : g = gimple_build_assign (make_ssa_name (type3), NOP_EXPR, exp);
3711 719 : gimple_seq_add_stmt_without_update (&seq, g);
3712 719 : exp = gimple_assign_lhs (g);
3713 : }
3714 9643 : if ((b % 4) == 3)
3715 3262 : code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3716 : else
3717 7655 : code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3718 19286 : g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3719 : code, op, exp);
3720 9643 : gimple_seq_add_stmt_without_update (&seq, g);
3721 9643 : op = gimple_assign_lhs (g);
3722 : }
3723 6738 : type1 = TREE_TYPE (ranges[k - 1].exp);
3724 6738 : if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3725 : {
3726 677 : gimple *g
3727 677 : = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3728 677 : gimple_seq_add_stmt_without_update (&seq, g);
3729 677 : op = gimple_assign_lhs (g);
3730 : }
3731 6738 : candidates.pop ();
3732 6738 : if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3733 : candidates.length (), opcode, ops, op,
3734 6738 : seq, ranges[k - 1].in_p, ranges[k - 1].low,
3735 : ranges[k - 1].high))
3736 : any_changes = true;
3737 : else
3738 0 : gimple_seq_discard (seq);
3739 6798 : if ((b % 4) == 2 && buckets[b] != i)
3740 : /* There is more work to do for this bucket. */
3741 2 : b--;
3742 : }
3743 :
3744 1047494 : return any_changes;
3745 1047494 : }
3746 :
3747 : /* Attempt to optimize for signed a and b where b is known to be >= 0:
3748 : a >= 0 && a < b into (unsigned) a < (unsigned) b
3749 : a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3750 :
3751 : static bool
3752 1047494 : optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3753 : vec<operand_entry *> *ops,
3754 : struct range_entry *ranges,
3755 : basic_block first_bb)
3756 : {
3757 1047494 : int i;
3758 1047494 : bool any_changes = false;
3759 1047494 : hash_map<tree, int> *map = NULL;
3760 :
3761 1834147 : for (i = first; i < length; i++)
3762 : {
3763 786653 : if (ranges[i].exp == NULL_TREE
3764 767561 : || TREE_CODE (ranges[i].exp) != SSA_NAME
3765 763022 : || !ranges[i].in_p)
3766 317010 : continue;
3767 :
3768 469643 : tree type = TREE_TYPE (ranges[i].exp);
3769 893293 : if (!INTEGRAL_TYPE_P (type)
3770 461007 : || TYPE_UNSIGNED (type)
3771 176325 : || ranges[i].low == NULL_TREE
3772 167150 : || !integer_zerop (ranges[i].low)
3773 543515 : || ranges[i].high != NULL_TREE)
3774 423650 : continue;
3775 : /* EXP >= 0 here. */
3776 45993 : if (map == NULL)
3777 44417 : map = new hash_map <tree, int>;
3778 45993 : map->put (ranges[i].exp, i);
3779 : }
3780 :
3781 1047494 : if (map == NULL)
3782 : return false;
3783 :
3784 136903 : for (i = 0; i < length; i++)
3785 : {
3786 92486 : bool in_p = ranges[i].in_p;
3787 92486 : if (ranges[i].low == NULL_TREE
3788 91785 : || ranges[i].high == NULL_TREE)
3789 91961 : continue;
3790 43998 : if (!integer_zerop (ranges[i].low)
3791 43998 : || !integer_zerop (ranges[i].high))
3792 : {
3793 8892 : if (ranges[i].exp
3794 4446 : && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3795 0 : && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3796 0 : && integer_onep (ranges[i].low)
3797 4446 : && integer_onep (ranges[i].high))
3798 0 : in_p = !in_p;
3799 : else
3800 4446 : continue;
3801 : }
3802 :
3803 39552 : gimple *stmt;
3804 39552 : tree_code ccode;
3805 39552 : tree rhs1, rhs2;
3806 39552 : if (ranges[i].exp)
3807 : {
3808 38545 : if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3809 5 : continue;
3810 38540 : stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3811 38540 : if (!is_gimple_assign (stmt))
3812 858 : continue;
3813 37682 : ccode = gimple_assign_rhs_code (stmt);
3814 37682 : rhs1 = gimple_assign_rhs1 (stmt);
3815 37682 : rhs2 = gimple_assign_rhs2 (stmt);
3816 : }
3817 : else
3818 : {
3819 1007 : operand_entry *oe = (*ops)[ranges[i].idx];
3820 1007 : stmt = last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3821 1007 : if (gimple_code (stmt) != GIMPLE_COND)
3822 0 : continue;
3823 1007 : ccode = gimple_cond_code (stmt);
3824 1007 : rhs1 = gimple_cond_lhs (stmt);
3825 1007 : rhs2 = gimple_cond_rhs (stmt);
3826 : }
3827 :
3828 38689 : if (TREE_CODE (rhs1) != SSA_NAME
3829 38147 : || rhs2 == NULL_TREE
3830 38063 : || TREE_CODE (rhs2) != SSA_NAME)
3831 703 : continue;
3832 :
3833 37986 : switch (ccode)
3834 : {
3835 36892 : case GT_EXPR:
3836 36892 : case GE_EXPR:
3837 36892 : case LT_EXPR:
3838 36892 : case LE_EXPR:
3839 36892 : break;
3840 1094 : default:
3841 1094 : continue;
3842 : }
3843 36892 : if (in_p)
3844 801 : ccode = invert_tree_comparison (ccode, false);
3845 36892 : switch (ccode)
3846 : {
3847 14980 : case GT_EXPR:
3848 14980 : case GE_EXPR:
3849 14980 : std::swap (rhs1, rhs2);
3850 14980 : ccode = swap_tree_comparison (ccode);
3851 14980 : break;
3852 : case LT_EXPR:
3853 : case LE_EXPR:
3854 : break;
3855 0 : default:
3856 0 : gcc_unreachable ();
3857 : }
3858 :
3859 36892 : int *idx = map->get (rhs1);
3860 36892 : if (idx == NULL)
3861 930 : continue;
3862 :
3863 : /* maybe_optimize_range_tests allows statements without side-effects
3864 : in the basic blocks as long as they are consumed in the same bb.
3865 : Make sure rhs2's def stmt is not among them, otherwise we can't
3866 : use safely get_nonzero_bits on it. E.g. in:
3867 : # RANGE [-83, 1] NONZERO 173
3868 : # k_32 = PHI <k_47(13), k_12(9)>
3869 : ...
3870 : if (k_32 >= 0)
3871 : goto <bb 5>; [26.46%]
3872 : else
3873 : goto <bb 9>; [73.54%]
3874 :
3875 : <bb 5> [local count: 140323371]:
3876 : # RANGE [0, 1] NONZERO 1
3877 : _5 = (int) k_32;
3878 : # RANGE [0, 4] NONZERO 4
3879 : _21 = _5 << 2;
3880 : # RANGE [0, 4] NONZERO 4
3881 : iftmp.0_44 = (char) _21;
3882 : if (k_32 < iftmp.0_44)
3883 : goto <bb 6>; [84.48%]
3884 : else
3885 : goto <bb 9>; [15.52%]
3886 : the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3887 : k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3888 : to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3889 : those stmts even for negative k_32 and the value ranges would be no
3890 : longer guaranteed and so the optimization would be invalid. */
3891 35962 : while (opcode == ERROR_MARK)
3892 : {
3893 605 : gimple *g = SSA_NAME_DEF_STMT (rhs2);
3894 605 : basic_block bb2 = gimple_bb (g);
3895 605 : if (bb2
3896 605 : && bb2 != first_bb
3897 605 : && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3898 : {
3899 : /* As an exception, handle a few common cases. */
3900 512 : if (gimple_assign_cast_p (g)
3901 512 : && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3902 : {
3903 40 : tree op0 = gimple_assign_rhs1 (g);
3904 40 : if (TYPE_UNSIGNED (TREE_TYPE (op0))
3905 40 : && (TYPE_PRECISION (TREE_TYPE (rhs2))
3906 10 : > TYPE_PRECISION (TREE_TYPE (op0))))
3907 : /* Zero-extension is always ok. */
3908 : break;
3909 30 : else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3910 30 : == TYPE_PRECISION (TREE_TYPE (op0))
3911 30 : && TREE_CODE (op0) == SSA_NAME)
3912 : {
3913 : /* Cast from signed to unsigned or vice versa. Retry
3914 : with the op0 as new rhs2. */
3915 0 : rhs2 = op0;
3916 0 : continue;
3917 : }
3918 : }
3919 472 : else if (is_gimple_assign (g)
3920 472 : && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3921 0 : && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3922 944 : && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3923 : /* Masking with INTEGER_CST with MSB clear is always ok
3924 : too. */
3925 : break;
3926 : rhs2 = NULL_TREE;
3927 : }
3928 : break;
3929 : }
3930 35460 : if (rhs2 == NULL_TREE)
3931 502 : continue;
3932 :
3933 35985 : wide_int nz = get_nonzero_bits (rhs2);
3934 35460 : if (wi::neg_p (nz))
3935 34935 : continue;
3936 :
3937 : /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3938 : and RHS2 is known to be RHS2 >= 0. */
3939 525 : tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3940 :
3941 525 : if (dump_file && (dump_flags & TDF_DETAILS))
3942 : {
3943 7 : struct range_entry *r = &ranges[*idx];
3944 7 : fprintf (dump_file, "Optimizing range test ");
3945 7 : print_generic_expr (dump_file, r->exp);
3946 7 : fprintf (dump_file, " +[");
3947 7 : print_generic_expr (dump_file, r->low);
3948 7 : fprintf (dump_file, ", ");
3949 7 : print_generic_expr (dump_file, r->high);
3950 7 : fprintf (dump_file, "] and comparison ");
3951 7 : print_generic_expr (dump_file, rhs1);
3952 7 : fprintf (dump_file, " %s ", op_symbol_code (ccode));
3953 7 : print_generic_expr (dump_file, rhs2);
3954 7 : fprintf (dump_file, "\n into (");
3955 7 : print_generic_expr (dump_file, utype);
3956 7 : fprintf (dump_file, ") ");
3957 7 : print_generic_expr (dump_file, rhs1);
3958 7 : fprintf (dump_file, " %s (", op_symbol_code (ccode));
3959 7 : print_generic_expr (dump_file, utype);
3960 7 : fprintf (dump_file, ") ");
3961 7 : print_generic_expr (dump_file, rhs2);
3962 7 : fprintf (dump_file, "\n");
3963 : }
3964 :
3965 525 : operand_entry *oe = (*ops)[ranges[i].idx];
3966 525 : ranges[i].in_p = 0;
3967 525 : if (opcode == BIT_IOR_EXPR
3968 486 : || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3969 : {
3970 40 : ranges[i].in_p = 1;
3971 40 : ccode = invert_tree_comparison (ccode, false);
3972 : }
3973 :
3974 525 : unsigned int uid = gimple_uid (stmt);
3975 525 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3976 525 : gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3977 525 : gimple_set_uid (g, uid);
3978 525 : rhs1 = gimple_assign_lhs (g);
3979 525 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3980 525 : if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3981 : {
3982 525 : g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3983 525 : gimple_set_uid (g, uid);
3984 525 : rhs2 = gimple_assign_lhs (g);
3985 525 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3986 : }
3987 525 : if (tree_swap_operands_p (rhs1, rhs2))
3988 : {
3989 486 : std::swap (rhs1, rhs2);
3990 486 : ccode = swap_tree_comparison (ccode);
3991 : }
3992 525 : if (gimple_code (stmt) == GIMPLE_COND)
3993 : {
3994 8 : gcond *c = as_a <gcond *> (stmt);
3995 8 : gimple_cond_set_code (c, ccode);
3996 8 : gimple_cond_set_lhs (c, rhs1);
3997 8 : gimple_cond_set_rhs (c, rhs2);
3998 8 : update_stmt (stmt);
3999 : }
4000 : else
4001 : {
4002 517 : tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
4003 517 : if (!INTEGRAL_TYPE_P (ctype)
4004 517 : || (TREE_CODE (ctype) != BOOLEAN_TYPE
4005 3 : && TYPE_PRECISION (ctype) != 1))
4006 3 : ctype = boolean_type_node;
4007 517 : g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
4008 517 : gimple_set_uid (g, uid);
4009 517 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4010 517 : if (oe->op && ctype != TREE_TYPE (oe->op))
4011 : {
4012 3 : g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
4013 : NOP_EXPR, gimple_assign_lhs (g));
4014 3 : gimple_set_uid (g, uid);
4015 3 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4016 : }
4017 517 : ranges[i].exp = gimple_assign_lhs (g);
4018 517 : oe->op = ranges[i].exp;
4019 517 : ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
4020 517 : ranges[i].high = ranges[i].low;
4021 : }
4022 525 : oe = (*ops)[ranges[*idx].idx];
4023 : /* Now change all the other range test immediate uses, so that
4024 : those tests will be optimized away. */
4025 525 : if (opcode == ERROR_MARK)
4026 : {
4027 11 : if (oe->op)
4028 3 : oe->op = build_int_cst (TREE_TYPE (oe->op),
4029 3 : oe->rank == BIT_IOR_EXPR ? 0 : 1);
4030 : else
4031 8 : oe->op = (oe->rank == BIT_IOR_EXPR
4032 8 : ? boolean_false_node : boolean_true_node);
4033 : }
4034 : else
4035 514 : oe->op = error_mark_node;
4036 525 : ranges[*idx].exp = NULL_TREE;
4037 525 : ranges[*idx].low = NULL_TREE;
4038 525 : ranges[*idx].high = NULL_TREE;
4039 525 : any_changes = true;
4040 : }
4041 :
4042 44417 : delete map;
4043 44417 : return any_changes;
4044 : }
4045 :
4046 : /* Optimize range tests, similarly how fold_range_test optimizes
4047 : it on trees. The tree code for the binary
4048 : operation between all the operands is OPCODE.
4049 : If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4050 : maybe_optimize_range_tests for inter-bb range optimization.
4051 : In that case if oe->op is NULL, oe->id is bb->index whose
4052 : GIMPLE_COND is && or ||ed into the test, and oe->rank says
4053 : the actual opcode.
4054 : FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4055 :
4056 : static bool
4057 1047626 : optimize_range_tests (enum tree_code opcode,
4058 : vec<operand_entry *> *ops, basic_block first_bb)
4059 : {
4060 1047626 : unsigned int length = ops->length (), i, j, first;
4061 1047626 : operand_entry *oe;
4062 1047626 : struct range_entry *ranges;
4063 2095120 : bool any_changes = false;
4064 :
4065 1047626 : if (length == 1)
4066 : return false;
4067 :
4068 1047494 : ranges = XNEWVEC (struct range_entry, length);
4069 4317468 : for (i = 0; i < length; i++)
4070 : {
4071 2222480 : oe = (*ops)[i];
4072 2222480 : ranges[i].idx = i;
4073 2222480 : init_range_entry (ranges + i, oe->op,
4074 2222480 : oe->op
4075 : ? NULL
4076 265363 : : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
4077 : /* For | invert it now, we will invert it again before emitting
4078 : the optimized expression. */
4079 2222480 : if (opcode == BIT_IOR_EXPR
4080 1543750 : || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4081 873629 : ranges[i].in_p = !ranges[i].in_p;
4082 : }
4083 :
4084 1047494 : qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
4085 3530815 : for (i = 0; i < length; i++)
4086 1796307 : if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
4087 : break;
4088 :
4089 : /* Try to merge ranges. */
4090 1820710 : for (first = i; i < length; i++)
4091 : {
4092 773216 : tree low = ranges[i].low;
4093 773216 : tree high = ranges[i].high;
4094 773216 : int in_p = ranges[i].in_p;
4095 773216 : int update_fail_count = 0;
4096 :
4097 786653 : for (j = i + 1; j < length; j++)
4098 : {
4099 426173 : if (ranges[i].exp != ranges[j].exp)
4100 : break;
4101 34305 : if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
4102 34305 : ranges[j].in_p, ranges[j].low, ranges[j].high))
4103 : break;
4104 : }
4105 :
4106 773216 : if (j == i + 1)
4107 760344 : continue;
4108 :
4109 12872 : if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4110 : opcode, ops, ranges[i].exp, NULL, in_p,
4111 : low, high))
4112 : {
4113 12872 : i = j - 1;
4114 12872 : any_changes = true;
4115 : }
4116 : /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4117 : while update_range_test would fail. */
4118 : else if (update_fail_count == 64)
4119 : i = j - 1;
4120 : else
4121 12872 : ++update_fail_count;
4122 : }
4123 :
4124 1047494 : any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4125 : ops, ranges);
4126 :
4127 1047494 : if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4128 1047477 : any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4129 : ops, ranges);
4130 1047494 : if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4131 1047494 : any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4132 : ops, ranges);
4133 1047494 : any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4134 : ranges, first_bb);
4135 1047494 : any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4136 : ops, ranges);
4137 :
4138 1047494 : if (any_changes && opcode != ERROR_MARK)
4139 : {
4140 : j = 0;
4141 35658 : FOR_EACH_VEC_ELT (*ops, i, oe)
4142 : {
4143 24840 : if (oe->op == error_mark_node)
4144 12176 : continue;
4145 12664 : else if (i != j)
4146 5160 : (*ops)[j] = oe;
4147 12664 : j++;
4148 : }
4149 10818 : ops->truncate (j);
4150 : }
4151 :
4152 1047494 : XDELETEVEC (ranges);
4153 1047494 : return any_changes;
4154 : }
4155 :
4156 : /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4157 : the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4158 : otherwise the comparison code. TYPE is a return value that is set
4159 : to type of comparison. */
4160 :
4161 : static tree_code
4162 51121 : ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4163 : tree *lhs, tree *rhs, gassign **vcond)
4164 : {
4165 51121 : if (TREE_CODE (var) != SSA_NAME)
4166 : return ERROR_MARK;
4167 :
4168 45779 : gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4169 32394 : if (stmt == NULL)
4170 : return ERROR_MARK;
4171 32394 : if (vcond)
4172 32394 : *vcond = stmt;
4173 :
4174 : /* ??? If we start creating more COND_EXPR, we could perform
4175 : this same optimization with them. For now, simplify. */
4176 43250 : if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4177 : return ERROR_MARK;
4178 :
4179 1256 : tree cond = gimple_assign_rhs1 (stmt);
4180 1256 : tree_code cmp = TREE_CODE (cond);
4181 1256 : if (cmp != SSA_NAME)
4182 : return ERROR_MARK;
4183 :
4184 52321 : gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4185 1230 : if (assign == NULL
4186 1230 : || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4187 : return ERROR_MARK;
4188 :
4189 1117 : cmp = gimple_assign_rhs_code (assign);
4190 1117 : if (lhs)
4191 1117 : *lhs = gimple_assign_rhs1 (assign);
4192 1117 : if (rhs)
4193 2234 : *rhs = gimple_assign_rhs2 (assign);
4194 :
4195 : /* ??? For now, allow only canonical true and false result vectors.
4196 : We could expand this to other constants should the need arise,
4197 : but at the moment we don't create them. */
4198 1117 : tree t = gimple_assign_rhs2 (stmt);
4199 1117 : tree f = gimple_assign_rhs3 (stmt);
4200 1117 : bool inv;
4201 1117 : if (integer_all_onesp (t))
4202 : inv = false;
4203 1069 : else if (integer_all_onesp (f))
4204 : {
4205 1 : cmp = invert_tree_comparison (cmp, false);
4206 1 : inv = true;
4207 : }
4208 : else
4209 : return ERROR_MARK;
4210 49 : if (!integer_zerop (inv ? t : f))
4211 : return ERROR_MARK;
4212 :
4213 : /* Success! */
4214 30 : if (rets)
4215 30 : *rets = assign;
4216 30 : if (reti)
4217 24 : *reti = inv;
4218 30 : if (type)
4219 24 : *type = TREE_TYPE (cond);
4220 : return cmp;
4221 : }
4222 :
4223 : /* Optimize the condition of VEC_COND_EXPRs which have been combined
4224 : with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4225 :
4226 : static bool
4227 23964 : optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4228 : {
4229 23964 : unsigned int length = ops->length (), i, j;
4230 23964 : bool any_changes = false;
4231 :
4232 23964 : if (length == 1)
4233 : return false;
4234 :
4235 75058 : for (i = 0; i < length; ++i)
4236 : {
4237 51097 : tree &elt0 = (*ops)[i]->op;
4238 :
4239 51097 : gassign *stmt0, *vcond0;
4240 51097 : bool invert;
4241 51097 : tree type, lhs0, rhs0;
4242 51097 : tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4243 : &rhs0, &vcond0);
4244 51097 : if (cmp0 == ERROR_MARK)
4245 51073 : continue;
4246 :
4247 48 : for (j = i + 1; j < length; ++j)
4248 : {
4249 24 : tree &elt1 = (*ops)[j]->op;
4250 :
4251 24 : gassign *stmt1, *vcond1;
4252 24 : tree lhs1, rhs1;
4253 24 : tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4254 : &rhs1, &vcond1);
4255 24 : if (cmp1 == ERROR_MARK)
4256 18 : continue;
4257 :
4258 6 : tree comb;
4259 6 : if (opcode == BIT_AND_EXPR)
4260 0 : comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4261 : cmp1, lhs1, rhs1);
4262 6 : else if (opcode == BIT_IOR_EXPR)
4263 6 : comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4264 : cmp1, lhs1, rhs1);
4265 : else
4266 0 : gcc_unreachable ();
4267 6 : if (comb == NULL)
4268 0 : continue;
4269 :
4270 : /* Success! */
4271 6 : if (dump_file && (dump_flags & TDF_DETAILS))
4272 : {
4273 0 : fprintf (dump_file, "Transforming ");
4274 0 : print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4275 0 : fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4276 0 : print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4277 0 : fprintf (dump_file, " into ");
4278 0 : print_generic_expr (dump_file, comb);
4279 0 : fputc ('\n', dump_file);
4280 : }
4281 :
4282 6 : gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4283 6 : tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4284 : true, GSI_SAME_STMT);
4285 18 : tree res = gimple_build (&gsi, true, GSI_SAME_STMT, UNKNOWN_LOCATION,
4286 6 : VEC_COND_EXPR, TREE_TYPE (elt0), exp,
4287 : constant_boolean_node (true,
4288 6 : TREE_TYPE (elt0)),
4289 : constant_boolean_node (false,
4290 6 : TREE_TYPE (elt0)));
4291 6 : elt0 = res;
4292 :
4293 6 : elt1 = error_mark_node;
4294 6 : any_changes = true;
4295 : }
4296 : }
4297 :
4298 23961 : if (any_changes)
4299 : {
4300 : operand_entry *oe;
4301 : j = 0;
4302 18 : FOR_EACH_VEC_ELT (*ops, i, oe)
4303 : {
4304 12 : if (oe->op == error_mark_node)
4305 6 : continue;
4306 6 : else if (i != j)
4307 0 : (*ops)[j] = oe;
4308 6 : j++;
4309 : }
4310 6 : ops->truncate (j);
4311 : }
4312 :
4313 : return any_changes;
4314 : }
4315 :
4316 : /* Return true if STMT is a cast like:
4317 : <bb N>:
4318 : ...
4319 : _123 = (int) _234;
4320 :
4321 : <bb M>:
4322 : # _345 = PHI <_123(N), 1(...), 1(...)>
4323 : where _234 has bool type, _123 has single use and
4324 : bb N has a single successor M. This is commonly used in
4325 : the last block of a range test.
4326 :
4327 : Also Return true if STMT is tcc_compare like:
4328 : <bb N>:
4329 : ...
4330 : _234 = a_2(D) == 2;
4331 :
4332 : <bb M>:
4333 : # _345 = PHI <_234(N), 1(...), 1(...)>
4334 : _346 = (int) _345;
4335 : where _234 has booltype, single use and
4336 : bb N has a single successor M. This is commonly used in
4337 : the last block of a range test. */
4338 :
4339 : static bool
4340 15286908 : final_range_test_p (gimple *stmt)
4341 : {
4342 15286908 : basic_block bb, rhs_bb, lhs_bb;
4343 15286908 : edge e;
4344 15286908 : tree lhs, rhs;
4345 15286908 : use_operand_p use_p;
4346 15286908 : gimple *use_stmt;
4347 :
4348 15286908 : if (!gimple_assign_cast_p (stmt)
4349 15286908 : && (!is_gimple_assign (stmt)
4350 4814936 : || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4351 : != tcc_comparison)))
4352 : return false;
4353 554735 : bb = gimple_bb (stmt);
4354 15639098 : if (!single_succ_p (bb))
4355 : return false;
4356 554452 : e = single_succ_edge (bb);
4357 554452 : if (e->flags & EDGE_COMPLEX)
4358 : return false;
4359 :
4360 554452 : lhs = gimple_assign_lhs (stmt);
4361 554452 : rhs = gimple_assign_rhs1 (stmt);
4362 554452 : if (gimple_assign_cast_p (stmt)
4363 554452 : && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4364 386172 : || TREE_CODE (rhs) != SSA_NAME
4365 369840 : || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4366 : return false;
4367 :
4368 209374 : if (!gimple_assign_cast_p (stmt)
4369 209374 : && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4370 : return false;
4371 :
4372 : /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4373 209200 : if (!single_imm_use (lhs, &use_p, &use_stmt))
4374 : return false;
4375 :
4376 203897 : if (gimple_code (use_stmt) != GIMPLE_PHI
4377 203897 : || gimple_bb (use_stmt) != e->dest)
4378 : return false;
4379 :
4380 : /* And that the rhs is defined in the same loop. */
4381 202305 : if (gimple_assign_cast_p (stmt))
4382 : {
4383 72831 : if (TREE_CODE (rhs) != SSA_NAME
4384 72831 : || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4385 145656 : || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4386 43 : return false;
4387 : }
4388 : else
4389 : {
4390 129474 : if (TREE_CODE (lhs) != SSA_NAME
4391 129474 : || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4392 258948 : || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4393 0 : return false;
4394 : }
4395 :
4396 : return true;
4397 : }
4398 :
4399 : /* Return true if BB is suitable basic block for inter-bb range test
4400 : optimization. If BACKWARD is true, BB should be the only predecessor
4401 : of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4402 : or compared with to find a common basic block to which all conditions
4403 : branch to if true resp. false. If BACKWARD is false, TEST_BB should
4404 : be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4405 : TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4406 : block points to an empty block that falls through into *OTHER_BB and
4407 : the phi args match that path. */
4408 :
4409 : static bool
4410 11243636 : suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4411 : bool *test_swapped_p, bool backward)
4412 : {
4413 11243636 : edge_iterator ei, ei2;
4414 11243636 : edge e, e2;
4415 11243636 : gimple *stmt;
4416 11243636 : gphi_iterator gsi;
4417 11243636 : bool other_edge_seen = false;
4418 11243636 : bool is_cond;
4419 :
4420 11243636 : if (test_bb == bb)
4421 : return false;
4422 : /* Check last stmt first. */
4423 11243636 : stmt = last_nondebug_stmt (bb);
4424 11243636 : if (stmt == NULL
4425 10385104 : || (gimple_code (stmt) != GIMPLE_COND
4426 506407 : && (backward || !final_range_test_p (stmt)))
4427 9918788 : || gimple_visited_p (stmt)
4428 9859146 : || stmt_could_throw_p (cfun, stmt)
4429 21102656 : || *other_bb == bb)
4430 1384619 : return false;
4431 9859017 : is_cond = gimple_code (stmt) == GIMPLE_COND;
4432 9859017 : if (is_cond)
4433 : {
4434 : /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4435 : goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4436 : to *OTHER_BB (if not set yet, try to find it out). */
4437 18710943 : if (EDGE_COUNT (bb->succs) != 2)
4438 : return false;
4439 19382774 : FOR_EACH_EDGE (e, ei, bb->succs)
4440 : {
4441 15977586 : if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4442 : return false;
4443 15977586 : if (e->dest == test_bb)
4444 : {
4445 5304330 : if (backward)
4446 5302052 : continue;
4447 : else
4448 : return false;
4449 : }
4450 10673256 : if (e->dest == bb)
4451 : return false;
4452 10519017 : if (*other_bb == NULL)
4453 : {
4454 25763022 : FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4455 17175348 : if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4456 : return false;
4457 17175348 : else if (e->dest == e2->dest)
4458 2369565 : *other_bb = e->dest;
4459 8587674 : if (*other_bb == NULL)
4460 : return false;
4461 : }
4462 4300908 : if (e->dest == *other_bb)
4463 : other_edge_seen = true;
4464 934920 : else if (backward)
4465 : return false;
4466 : }
4467 3405188 : if (*other_bb == NULL || !other_edge_seen)
4468 : return false;
4469 : }
4470 39965 : else if (single_succ (bb) != *other_bb)
4471 : return false;
4472 :
4473 : /* Now check all PHIs of *OTHER_BB. */
4474 3405241 : e = find_edge (bb, *other_bb);
4475 3405241 : e2 = find_edge (test_bb, *other_bb);
4476 3411573 : retry:;
4477 4968378 : for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4478 : {
4479 2616633 : gphi *phi = gsi.phi ();
4480 : /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4481 : corresponding to BB and TEST_BB predecessor must be the same. */
4482 2616633 : if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4483 2616633 : gimple_phi_arg_def (phi, e2->dest_idx), 0))
4484 : {
4485 : /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4486 : one of the PHIs should have the lhs of the last stmt in
4487 : that block as PHI arg and that PHI should have 0 or 1
4488 : corresponding to it in all other range test basic blocks
4489 : considered. */
4490 1128770 : if (!is_cond)
4491 : {
4492 41928 : if (gimple_phi_arg_def (phi, e->dest_idx)
4493 41928 : == gimple_assign_lhs (stmt)
4494 41928 : && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4495 18221 : || integer_onep (gimple_phi_arg_def (phi,
4496 18221 : e2->dest_idx))))
4497 37334 : continue;
4498 : }
4499 : else
4500 : {
4501 1086842 : gimple *test_last = last_nondebug_stmt (test_bb);
4502 1086842 : if (gimple_code (test_last) == GIMPLE_COND)
4503 : {
4504 1050683 : if (backward ? e2->src != test_bb : e->src != bb)
4505 : return false;
4506 :
4507 : /* For last_bb, handle also:
4508 : if (x_3(D) == 3)
4509 : goto <bb 6>; [34.00%]
4510 : else
4511 : goto <bb 7>; [66.00%]
4512 :
4513 : <bb 6> [local count: 79512730]:
4514 :
4515 : <bb 7> [local count: 1073741824]:
4516 : # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4517 : where bb 7 is *OTHER_BB, but the PHI values from the
4518 : earlier bbs match the path through the empty bb
4519 : in between. */
4520 1046277 : edge e3;
4521 1046277 : if (backward)
4522 1386349 : e3 = EDGE_SUCC (test_bb,
4523 : e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4524 : else
4525 20967 : e3 = EDGE_SUCC (bb,
4526 : e == EDGE_SUCC (bb, 0) ? 1 : 0);
4527 1046277 : if (empty_block_p (e3->dest)
4528 34703 : && single_succ_p (e3->dest)
4529 34703 : && single_succ (e3->dest) == *other_bb
4530 1082687 : && single_pred_p (e3->dest)
4531 1079874 : && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4532 : {
4533 6332 : if (backward)
4534 5677 : e2 = single_succ_edge (e3->dest);
4535 : else
4536 655 : e = single_succ_edge (e3->dest);
4537 6332 : if (test_swapped_p)
4538 293 : *test_swapped_p = true;
4539 6332 : goto retry;
4540 : }
4541 : }
4542 36159 : else if (gimple_phi_arg_def (phi, e2->dest_idx)
4543 36159 : == gimple_assign_lhs (test_last)
4544 68983 : && (integer_zerop (gimple_phi_arg_def (phi,
4545 32824 : e->dest_idx))
4546 15022 : || integer_onep (gimple_phi_arg_def (phi,
4547 15022 : e->dest_idx))))
4548 31608 : continue;
4549 : }
4550 :
4551 1049090 : return false;
4552 : }
4553 : }
4554 : return true;
4555 : }
4556 :
4557 : /* Return true if BB doesn't have side-effects that would disallow
4558 : range test optimization, all SSA_NAMEs set in the bb are consumed
4559 : in the bb and there are no PHIs. */
4560 :
4561 : bool
4562 5294839 : no_side_effect_bb (basic_block bb)
4563 : {
4564 5294839 : gimple_stmt_iterator gsi;
4565 5294839 : gimple *last;
4566 :
4567 5294839 : if (!gimple_seq_empty_p (phi_nodes (bb)))
4568 : return false;
4569 4202339 : last = last_nondebug_stmt (bb);
4570 14287957 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4571 : {
4572 10085618 : gimple *stmt = gsi_stmt (gsi);
4573 10085618 : tree lhs;
4574 10085618 : imm_use_iterator imm_iter;
4575 10085618 : use_operand_p use_p;
4576 :
4577 10085618 : if (is_gimple_debug (stmt))
4578 4188034 : continue;
4579 5897584 : if (gimple_has_side_effects (stmt))
4580 4202339 : return false;
4581 4989838 : if (stmt == last)
4582 : return true;
4583 3980656 : if (!is_gimple_assign (stmt))
4584 : return false;
4585 3316201 : lhs = gimple_assign_lhs (stmt);
4586 3316201 : if (TREE_CODE (lhs) != SSA_NAME)
4587 : return false;
4588 3079895 : if (gimple_assign_rhs_could_trap_p (stmt))
4589 : return false;
4590 6285203 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4591 : {
4592 2429987 : gimple *use_stmt = USE_STMT (use_p);
4593 2429987 : if (is_gimple_debug (use_stmt))
4594 166419 : continue;
4595 2263568 : if (gimple_bb (use_stmt) != bb)
4596 464726 : return false;
4597 2159971 : }
4598 : }
4599 : return false;
4600 : }
4601 :
4602 : /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4603 : return true and fill in *OPS recursively. */
4604 :
4605 : static bool
4606 102571 : get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4607 : class loop *loop)
4608 : {
4609 102571 : gimple *stmt = SSA_NAME_DEF_STMT (var);
4610 102571 : tree rhs[2];
4611 102571 : int i;
4612 :
4613 102571 : if (!is_reassociable_op (stmt, code, loop))
4614 : return false;
4615 :
4616 23964 : rhs[0] = gimple_assign_rhs1 (stmt);
4617 23964 : rhs[1] = gimple_assign_rhs2 (stmt);
4618 23964 : gimple_set_visited (stmt, true);
4619 71892 : for (i = 0; i < 2; i++)
4620 47928 : if (TREE_CODE (rhs[i]) == SSA_NAME
4621 47928 : && !get_ops (rhs[i], code, ops, loop)
4622 87279 : && has_single_use (rhs[i]))
4623 : {
4624 38635 : operand_entry *oe = operand_entry_pool.allocate ();
4625 :
4626 38635 : oe->op = rhs[i];
4627 38635 : oe->rank = code;
4628 38635 : oe->id = 0;
4629 38635 : oe->count = 1;
4630 38635 : oe->stmt_to_insert = NULL;
4631 38635 : ops->safe_push (oe);
4632 : }
4633 : return true;
4634 : }
4635 :
4636 : /* Find the ops that were added by get_ops starting from VAR, see if
4637 : they were changed during update_range_test and if yes, create new
4638 : stmts. */
4639 :
4640 : static tree
4641 10176 : update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4642 : unsigned int *pidx, class loop *loop)
4643 : {
4644 10176 : gimple *stmt = SSA_NAME_DEF_STMT (var);
4645 10176 : tree rhs[4];
4646 10176 : int i;
4647 :
4648 10176 : if (!is_reassociable_op (stmt, code, loop))
4649 : return NULL;
4650 :
4651 3368 : rhs[0] = gimple_assign_rhs1 (stmt);
4652 3368 : rhs[1] = gimple_assign_rhs2 (stmt);
4653 3368 : rhs[2] = rhs[0];
4654 3368 : rhs[3] = rhs[1];
4655 10104 : for (i = 0; i < 2; i++)
4656 6736 : if (TREE_CODE (rhs[i]) == SSA_NAME)
4657 : {
4658 6736 : rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4659 6736 : if (rhs[2 + i] == NULL_TREE)
4660 : {
4661 6466 : if (has_single_use (rhs[i]))
4662 6440 : rhs[2 + i] = ops[(*pidx)++]->op;
4663 : else
4664 26 : rhs[2 + i] = rhs[i];
4665 : }
4666 : }
4667 3368 : if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4668 3057 : && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4669 : {
4670 3057 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4671 3057 : var = make_ssa_name (TREE_TYPE (var));
4672 3057 : gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4673 : rhs[2], rhs[3]);
4674 3057 : gimple_set_uid (g, gimple_uid (stmt));
4675 3057 : gimple_set_visited (g, true);
4676 3057 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4677 3057 : gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4678 3057 : if (fold_stmt_inplace (&gsi2))
4679 2014 : update_stmt (g);
4680 : }
4681 : return var;
4682 : }
4683 :
4684 : /* Structure to track the initial value passed to get_ops and
4685 : the range in the ops vector for each basic block. */
4686 :
4687 : struct inter_bb_range_test_entry
4688 : {
4689 : tree op;
4690 : unsigned int first_idx, last_idx;
4691 : };
4692 :
4693 : /* Inter-bb range test optimization.
4694 :
4695 : Returns TRUE if a gimple conditional is optimized to a true/false,
4696 : otherwise return FALSE.
4697 :
4698 : This indicates to the caller that it should run a CFG cleanup pass
4699 : once reassociation is completed. */
4700 :
4701 : static bool
4702 18602982 : maybe_optimize_range_tests (gimple *stmt)
4703 : {
4704 18602982 : basic_block first_bb = gimple_bb (stmt);
4705 18602982 : basic_block last_bb = first_bb;
4706 18602982 : basic_block other_bb = NULL;
4707 18602982 : basic_block bb;
4708 18602982 : edge_iterator ei;
4709 18602982 : edge e;
4710 18602982 : auto_vec<operand_entry *> ops;
4711 18602982 : auto_vec<inter_bb_range_test_entry> bbinfo;
4712 18602982 : bool any_changes = false;
4713 18602982 : bool cfg_cleanup_needed = false;
4714 :
4715 : /* Consider only basic blocks that end with GIMPLE_COND or
4716 : a cast statement satisfying final_range_test_p. All
4717 : but the last bb in the first_bb .. last_bb range
4718 : should end with GIMPLE_COND. */
4719 18602982 : if (gimple_code (stmt) == GIMPLE_COND)
4720 : {
4721 27207558 : if (EDGE_COUNT (first_bb->succs) != 2)
4722 : return cfg_cleanup_needed;
4723 : }
4724 9984612 : else if (final_range_test_p (stmt))
4725 84870 : other_bb = single_succ (first_bb);
4726 : else
4727 : return cfg_cleanup_needed;
4728 :
4729 8703240 : if (stmt_could_throw_p (cfun, stmt))
4730 : return cfg_cleanup_needed;
4731 :
4732 : /* As relative ordering of post-dominator sons isn't fixed,
4733 : maybe_optimize_range_tests can be called first on any
4734 : bb in the range we want to optimize. So, start searching
4735 : backwards, if first_bb can be set to a predecessor. */
4736 8705381 : while (single_pred_p (first_bb))
4737 : {
4738 5750267 : basic_block pred_bb = single_pred (first_bb);
4739 5750267 : if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4740 : break;
4741 628092 : if (!no_side_effect_bb (first_bb))
4742 : break;
4743 : first_bb = pred_bb;
4744 : }
4745 : /* If first_bb is last_bb, other_bb hasn't been computed yet.
4746 : Before starting forward search in last_bb successors, find
4747 : out the other_bb. */
4748 8703066 : if (first_bb == last_bb)
4749 : {
4750 8700967 : other_bb = NULL;
4751 : /* As non-GIMPLE_COND last stmt always terminates the range,
4752 : if forward search didn't discover anything, just give up. */
4753 8700967 : if (gimple_code (stmt) != GIMPLE_COND)
4754 : return cfg_cleanup_needed;
4755 : /* Look at both successors. Either it ends with a GIMPLE_COND
4756 : and satisfies suitable_cond_bb, or ends with a cast and
4757 : other_bb is that cast's successor. */
4758 23960513 : FOR_EACH_EDGE (e, ei, first_bb->succs)
4759 16597940 : if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4760 16597940 : || e->dest == first_bb)
4761 : return cfg_cleanup_needed;
4762 25146197 : else if (single_pred_p (e->dest))
4763 : {
4764 9802022 : stmt = last_nondebug_stmt (e->dest);
4765 9802022 : if (stmt
4766 9619464 : && gimple_code (stmt) == GIMPLE_COND
4767 14203690 : && EDGE_COUNT (e->dest->succs) == 2)
4768 : {
4769 4401668 : if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4770 : NULL, true))
4771 : break;
4772 : else
4773 3661117 : other_bb = NULL;
4774 : }
4775 5400354 : else if (stmt
4776 5217796 : && final_range_test_p (stmt)
4777 5477655 : && find_edge (first_bb, single_succ (e->dest)))
4778 : {
4779 38474 : other_bb = single_succ (e->dest);
4780 38474 : if (other_bb == first_bb)
4781 0 : other_bb = NULL;
4782 : }
4783 : }
4784 8103124 : if (other_bb == NULL)
4785 : return cfg_cleanup_needed;
4786 : }
4787 : /* Now do the forward search, moving last_bb to successor bbs
4788 : that aren't other_bb. */
4789 1764163 : while (EDGE_COUNT (last_bb->succs) == 2)
4790 : {
4791 1654323 : FOR_EACH_EDGE (e, ei, last_bb->succs)
4792 1654323 : if (e->dest != other_bb)
4793 : break;
4794 983039 : if (e == NULL)
4795 : break;
4796 983039 : if (!single_pred_p (e->dest))
4797 : break;
4798 946355 : if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4799 : break;
4800 837756 : if (!no_side_effect_bb (e->dest))
4801 : break;
4802 207094 : last_bb = e->dest;
4803 : }
4804 781124 : if (first_bb == last_bb)
4805 : return cfg_cleanup_needed;
4806 : /* Here basic blocks first_bb through last_bb's predecessor
4807 : end with GIMPLE_COND, all of them have one of the edges to
4808 : other_bb and another to another block in the range,
4809 : all blocks except first_bb don't have side-effects and
4810 : last_bb ends with either GIMPLE_COND, or cast satisfying
4811 : final_range_test_p. */
4812 209409 : for (bb = last_bb; ; bb = single_pred (bb))
4813 : {
4814 359934 : enum tree_code code;
4815 359934 : tree lhs, rhs;
4816 359934 : inter_bb_range_test_entry bb_ent;
4817 :
4818 359934 : bb_ent.op = NULL_TREE;
4819 359934 : bb_ent.first_idx = ops.length ();
4820 359934 : bb_ent.last_idx = bb_ent.first_idx;
4821 359934 : e = find_edge (bb, other_bb);
4822 359934 : stmt = last_nondebug_stmt (bb);
4823 359934 : gimple_set_visited (stmt, true);
4824 359934 : if (gimple_code (stmt) != GIMPLE_COND)
4825 : {
4826 5179 : use_operand_p use_p;
4827 5179 : gimple *phi;
4828 5179 : edge e2;
4829 5179 : unsigned int d;
4830 :
4831 5179 : lhs = gimple_assign_lhs (stmt);
4832 5179 : rhs = gimple_assign_rhs1 (stmt);
4833 5179 : gcc_assert (bb == last_bb);
4834 :
4835 : /* stmt is
4836 : _123 = (int) _234;
4837 : OR
4838 : _234 = a_2(D) == 2;
4839 :
4840 : followed by:
4841 : <bb M>:
4842 : # _345 = PHI <_123(N), 1(...), 1(...)>
4843 :
4844 : or 0 instead of 1. If it is 0, the _234
4845 : range test is anded together with all the
4846 : other range tests, if it is 1, it is ored with
4847 : them. */
4848 5179 : single_imm_use (lhs, &use_p, &phi);
4849 5179 : gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4850 5179 : e2 = find_edge (first_bb, other_bb);
4851 5179 : d = e2->dest_idx;
4852 5179 : gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4853 5179 : if (integer_zerop (gimple_phi_arg_def (phi, d)))
4854 : code = BIT_AND_EXPR;
4855 : else
4856 : {
4857 2777 : gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4858 : code = BIT_IOR_EXPR;
4859 : }
4860 :
4861 : /* If _234 SSA_NAME_DEF_STMT is
4862 : _234 = _567 | _789;
4863 : (or &, corresponding to 1/0 in the phi arguments,
4864 : push into ops the individual range test arguments
4865 : of the bitwise or resp. and, recursively. */
4866 5179 : if (TREE_CODE (rhs) == SSA_NAME
4867 5179 : && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4868 : != tcc_comparison)
4869 2732 : && !get_ops (rhs, code, &ops,
4870 : loop_containing_stmt (stmt))
4871 7704 : && has_single_use (rhs))
4872 : {
4873 : /* Otherwise, push the _234 range test itself. */
4874 2515 : operand_entry *oe = operand_entry_pool.allocate ();
4875 :
4876 2515 : oe->op = rhs;
4877 2515 : oe->rank = code;
4878 2515 : oe->id = 0;
4879 2515 : oe->count = 1;
4880 2515 : oe->stmt_to_insert = NULL;
4881 2515 : ops.safe_push (oe);
4882 2515 : bb_ent.last_idx++;
4883 2515 : bb_ent.op = rhs;
4884 : }
4885 2664 : else if (is_gimple_assign (stmt)
4886 2664 : && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4887 : == tcc_comparison)
4888 2447 : && !get_ops (lhs, code, &ops,
4889 : loop_containing_stmt (stmt))
4890 5111 : && has_single_use (lhs))
4891 : {
4892 2447 : operand_entry *oe = operand_entry_pool.allocate ();
4893 2447 : oe->op = lhs;
4894 2447 : oe->rank = code;
4895 2447 : oe->id = 0;
4896 2447 : oe->count = 1;
4897 2447 : ops.safe_push (oe);
4898 2447 : bb_ent.last_idx++;
4899 2447 : bb_ent.op = lhs;
4900 : }
4901 : else
4902 : {
4903 217 : bb_ent.last_idx = ops.length ();
4904 217 : bb_ent.op = rhs;
4905 : }
4906 5179 : bbinfo.safe_push (bb_ent);
4907 10617 : for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4908 5438 : ops[i]->id = bb->index;
4909 5179 : continue;
4910 5179 : }
4911 354755 : else if (bb == last_bb)
4912 : {
4913 : /* For last_bb, handle also:
4914 : if (x_3(D) == 3)
4915 : goto <bb 6>; [34.00%]
4916 : else
4917 : goto <bb 7>; [66.00%]
4918 :
4919 : <bb 6> [local count: 79512730]:
4920 :
4921 : <bb 7> [local count: 1073741824]:
4922 : # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4923 : where bb 7 is OTHER_BB, but the PHI values from the
4924 : earlier bbs match the path through the empty bb
4925 : in between. */
4926 145346 : bool test_swapped_p = false;
4927 145346 : bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4928 : &other_bb, &test_swapped_p, true);
4929 145346 : gcc_assert (ok);
4930 145346 : if (test_swapped_p)
4931 503 : e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4932 : }
4933 : /* Otherwise stmt is GIMPLE_COND. */
4934 354755 : code = gimple_cond_code (stmt);
4935 354755 : lhs = gimple_cond_lhs (stmt);
4936 354755 : rhs = gimple_cond_rhs (stmt);
4937 354755 : if (TREE_CODE (lhs) == SSA_NAME
4938 354665 : && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4939 650772 : && ((code != EQ_EXPR && code != NE_EXPR)
4940 222732 : || rhs != boolean_false_node
4941 : /* Either push into ops the individual bitwise
4942 : or resp. and operands, depending on which
4943 : edge is other_bb. */
4944 49464 : || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4945 49464 : ^ (code == EQ_EXPR))
4946 : ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4947 : loop_containing_stmt (stmt))))
4948 : {
4949 : /* Or push the GIMPLE_COND stmt itself. */
4950 280837 : operand_entry *oe = operand_entry_pool.allocate ();
4951 :
4952 280837 : oe->op = NULL;
4953 561674 : oe->rank = (e->flags & EDGE_TRUE_VALUE)
4954 280837 : ? BIT_IOR_EXPR : BIT_AND_EXPR;
4955 : /* oe->op = NULL signs that there is no SSA_NAME
4956 : for the range test, and oe->id instead is the
4957 : basic block number, at which's end the GIMPLE_COND
4958 : is. */
4959 280837 : oe->id = bb->index;
4960 280837 : oe->count = 1;
4961 280837 : oe->stmt_to_insert = NULL;
4962 280837 : ops.safe_push (oe);
4963 280837 : bb_ent.op = NULL;
4964 280837 : bb_ent.last_idx++;
4965 : }
4966 73918 : else if (ops.length () > bb_ent.first_idx)
4967 : {
4968 15108 : bb_ent.op = lhs;
4969 15108 : bb_ent.last_idx = ops.length ();
4970 : }
4971 354755 : bbinfo.safe_push (bb_ent);
4972 673751 : for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4973 318996 : ops[i]->id = bb->index;
4974 354755 : if (bb == first_bb)
4975 : break;
4976 209409 : }
4977 18753507 : if (ops.length () > 1)
4978 118872 : any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4979 118872 : if (any_changes)
4980 : {
4981 : unsigned int idx, max_idx = 0;
4982 : /* update_ops relies on has_single_use predicates returning the
4983 : same values as it did during get_ops earlier. Additionally it
4984 : never removes statements, only adds new ones and it should walk
4985 : from the single imm use and check the predicate already before
4986 : making those changes.
4987 : On the other side, the handling of GIMPLE_COND directly can turn
4988 : previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4989 : it needs to be done in a separate loop afterwards. */
4990 21161 : for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4991 : {
4992 34955 : if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4993 34955 : && bbinfo[idx].op != NULL_TREE)
4994 : {
4995 3440 : tree new_op;
4996 :
4997 3440 : max_idx = idx;
4998 3440 : stmt = last_nondebug_stmt (bb);
4999 6880 : new_op = update_ops (bbinfo[idx].op,
5000 : (enum tree_code)
5001 3440 : ops[bbinfo[idx].first_idx]->rank,
5002 3440 : ops, &bbinfo[idx].first_idx,
5003 : loop_containing_stmt (stmt));
5004 3440 : if (new_op == NULL_TREE)
5005 : {
5006 342 : gcc_assert (bb == last_bb);
5007 342 : new_op = ops[bbinfo[idx].first_idx++]->op;
5008 : }
5009 3440 : if (bbinfo[idx].op != new_op)
5010 : {
5011 3165 : imm_use_iterator iter;
5012 3165 : use_operand_p use_p;
5013 3165 : gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
5014 :
5015 9505 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
5016 3175 : if (is_gimple_debug (use_stmt))
5017 10 : continue;
5018 3165 : else if (gimple_code (use_stmt) == GIMPLE_COND
5019 3165 : || gimple_code (use_stmt) == GIMPLE_PHI)
5020 8580 : FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5021 2860 : SET_USE (use_p, new_op);
5022 305 : else if ((is_gimple_assign (use_stmt)
5023 305 : && (TREE_CODE_CLASS
5024 : (gimple_assign_rhs_code (use_stmt))
5025 : == tcc_comparison)))
5026 : cast_or_tcc_cmp_stmt = use_stmt;
5027 305 : else if (gimple_assign_cast_p (use_stmt))
5028 : cast_or_tcc_cmp_stmt = use_stmt;
5029 : else
5030 0 : gcc_unreachable ();
5031 :
5032 3165 : if (cast_or_tcc_cmp_stmt)
5033 : {
5034 305 : gcc_assert (bb == last_bb);
5035 305 : tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
5036 305 : tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
5037 305 : enum tree_code rhs_code
5038 305 : = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
5039 305 : ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
5040 : : CONVERT_EXPR;
5041 305 : gassign *g;
5042 305 : if (is_gimple_min_invariant (new_op))
5043 : {
5044 91 : new_op = fold_convert (TREE_TYPE (lhs), new_op);
5045 91 : g = gimple_build_assign (new_lhs, new_op);
5046 : }
5047 : else
5048 214 : g = gimple_build_assign (new_lhs, rhs_code, new_op);
5049 305 : gimple_stmt_iterator gsi
5050 305 : = gsi_for_stmt (cast_or_tcc_cmp_stmt);
5051 305 : gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
5052 305 : gimple_set_visited (g, true);
5053 305 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5054 927 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
5055 317 : if (is_gimple_debug (use_stmt))
5056 12 : continue;
5057 305 : else if (gimple_code (use_stmt) == GIMPLE_COND
5058 305 : || gimple_code (use_stmt) == GIMPLE_PHI)
5059 915 : FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5060 305 : SET_USE (use_p, new_lhs);
5061 : else
5062 305 : gcc_unreachable ();
5063 : }
5064 : }
5065 : }
5066 34955 : if (bb == first_bb)
5067 : break;
5068 21161 : }
5069 21161 : for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5070 : {
5071 34955 : if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5072 31180 : && bbinfo[idx].op == NULL_TREE
5073 66135 : && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
5074 : {
5075 54224 : gcond *cond_stmt = as_a <gcond *> (*gsi_last_bb (bb));
5076 :
5077 27112 : if (idx > max_idx)
5078 : max_idx = idx;
5079 :
5080 : /* If we collapse the conditional to a true/false
5081 : condition, then bubble that knowledge up to our caller. */
5082 27112 : if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
5083 : {
5084 11329 : gimple_cond_make_false (cond_stmt);
5085 11329 : cfg_cleanup_needed = true;
5086 : }
5087 15783 : else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
5088 : {
5089 3621 : gimple_cond_make_true (cond_stmt);
5090 3621 : cfg_cleanup_needed = true;
5091 : }
5092 : else
5093 : {
5094 12162 : gimple_cond_set_code (cond_stmt, NE_EXPR);
5095 12162 : gimple_cond_set_lhs (cond_stmt,
5096 12162 : ops[bbinfo[idx].first_idx]->op);
5097 12162 : gimple_cond_set_rhs (cond_stmt, boolean_false_node);
5098 : }
5099 27112 : update_stmt (cond_stmt);
5100 : }
5101 34955 : if (bb == first_bb)
5102 : break;
5103 21161 : }
5104 :
5105 : /* The above changes could result in basic blocks after the first
5106 : modified one, up to and including last_bb, to be executed even if
5107 : they would not be in the original program. If the value ranges of
5108 : assignment lhs' in those bbs were dependent on the conditions
5109 : guarding those basic blocks which now can change, the VRs might
5110 : be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5111 : are only used within the same bb, it should be not a big deal if
5112 : we just reset all the VRs in those bbs. See PR68671. */
5113 33707 : for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5114 19913 : reset_flow_sensitive_info_in_bb (bb);
5115 : }
5116 : return cfg_cleanup_needed;
5117 18602982 : }
5118 :
5119 : /* Remove def stmt of VAR if VAR has zero uses and recurse
5120 : on rhs1 operand if so. */
5121 :
5122 : static void
5123 70910 : remove_visited_stmt_chain (tree var)
5124 : {
5125 95678 : gimple *stmt;
5126 95678 : gimple_stmt_iterator gsi;
5127 :
5128 120446 : while (1)
5129 : {
5130 95678 : if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5131 : return;
5132 36198 : stmt = SSA_NAME_DEF_STMT (var);
5133 36198 : if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5134 : {
5135 24768 : var = gimple_assign_rhs1 (stmt);
5136 24768 : gsi = gsi_for_stmt (stmt);
5137 24768 : reassoc_remove_stmt (&gsi);
5138 24768 : release_defs (stmt);
5139 : }
5140 : else
5141 : return;
5142 : }
5143 : }
5144 :
5145 : /* This function checks three consecutive operands in
5146 : passed operands vector OPS starting from OPINDEX and
5147 : swaps two operands if it is profitable for binary operation
5148 : consuming OPINDEX + 1 and OPINDEX + 2 operands.
5149 :
5150 : We pair ops with the same rank if possible. */
5151 :
5152 : static void
5153 130030 : swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5154 : unsigned int opindex)
5155 : {
5156 130030 : operand_entry *oe1, *oe2, *oe3;
5157 :
5158 130030 : oe1 = ops[opindex];
5159 130030 : oe2 = ops[opindex + 1];
5160 130030 : oe3 = ops[opindex + 2];
5161 :
5162 130030 : if (oe1->rank == oe2->rank && oe2->rank != oe3->rank)
5163 15976 : std::swap (*oe1, *oe3);
5164 114054 : else if (oe1->rank == oe3->rank && oe2->rank != oe3->rank)
5165 375 : std::swap (*oe1, *oe2);
5166 130030 : }
5167 :
5168 : /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5169 : two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5170 : whether RHS1 op RHS2 can be inserted before or needs to be inserted
5171 : after the returned stmt. */
5172 :
5173 : static inline gimple *
5174 369286 : find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
5175 : {
5176 369286 : insert_before = true;
5177 369286 : if (TREE_CODE (rhs1) == SSA_NAME
5178 369286 : && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5179 : {
5180 10229 : stmt = SSA_NAME_DEF_STMT (rhs1);
5181 10229 : insert_before = false;
5182 : }
5183 369286 : if (TREE_CODE (rhs2) == SSA_NAME
5184 369286 : && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5185 : {
5186 24258 : stmt = SSA_NAME_DEF_STMT (rhs2);
5187 24258 : insert_before = false;
5188 : }
5189 369286 : return stmt;
5190 : }
5191 :
5192 : /* If the stmt that defines operand has to be inserted, insert it
5193 : before the use. */
5194 : static void
5195 85 : insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5196 : {
5197 85 : gcc_assert (is_gimple_assign (stmt_to_insert));
5198 85 : tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5199 85 : tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5200 85 : bool insert_before;
5201 85 : gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
5202 85 : gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5203 85 : gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5204 :
5205 : /* If the insert point is not stmt, then insert_point would be
5206 : the point where operand rhs1 or rhs2 is defined. In this case,
5207 : stmt_to_insert has to be inserted afterwards. This would
5208 : only happen when the stmt insertion point is flexible. */
5209 85 : if (insert_before)
5210 85 : gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5211 : else
5212 0 : insert_stmt_after (stmt_to_insert, insert_point);
5213 85 : }
5214 :
5215 :
5216 : /* Recursively rewrite our linearized statements so that the operators
5217 : match those in OPS[OPINDEX], putting the computation in rank
5218 : order. Return new lhs.
5219 : CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5220 : the current stmt and during recursive invocations.
5221 : NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5222 : recursive invocations. */
5223 :
5224 : static tree
5225 4794911 : rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5226 : const vec<operand_entry *> &ops, bool changed,
5227 : bool next_changed)
5228 : {
5229 4794911 : tree rhs1 = gimple_assign_rhs1 (stmt);
5230 4794911 : tree rhs2 = gimple_assign_rhs2 (stmt);
5231 4794911 : tree lhs = gimple_assign_lhs (stmt);
5232 4794911 : operand_entry *oe;
5233 :
5234 : /* The final recursion case for this function is that you have
5235 : exactly two operations left.
5236 : If we had exactly one op in the entire list to start with, we
5237 : would have never called this function, and the tail recursion
5238 : rewrites them one at a time. */
5239 9589822 : if (opindex + 2 == ops.length ())
5240 : {
5241 4543495 : operand_entry *oe1, *oe2;
5242 :
5243 4543495 : oe1 = ops[opindex];
5244 4543495 : oe2 = ops[opindex + 1];
5245 4543495 : if (commutative_tree_code (rhs_code)
5246 4543495 : && tree_swap_operands_p (oe1->op, oe2->op))
5247 : std::swap (oe1, oe2);
5248 :
5249 4543495 : if (rhs1 != oe1->op || rhs2 != oe2->op)
5250 : {
5251 236274 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5252 236274 : unsigned int uid = gimple_uid (stmt);
5253 :
5254 236274 : if (dump_file && (dump_flags & TDF_DETAILS))
5255 : {
5256 31 : fprintf (dump_file, "Transforming ");
5257 31 : print_gimple_stmt (dump_file, stmt, 0);
5258 : }
5259 :
5260 : /* If the stmt that defines operand has to be inserted, insert it
5261 : before the use. */
5262 236274 : if (oe1->stmt_to_insert)
5263 36 : insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5264 236274 : if (oe2->stmt_to_insert)
5265 49 : insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5266 : /* Even when changed is false, reassociation could have e.g. removed
5267 : some redundant operations, so unless we are just swapping the
5268 : arguments or unless there is no change at all (then we just
5269 : return lhs), force creation of a new SSA_NAME. */
5270 236274 : if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5271 : {
5272 105786 : bool insert_before;
5273 105786 : gimple *insert_point
5274 105786 : = find_insert_point (stmt, oe1->op, oe2->op, insert_before);
5275 105786 : lhs = make_ssa_name (TREE_TYPE (lhs));
5276 105786 : stmt
5277 105786 : = gimple_build_assign (lhs, rhs_code,
5278 : oe1->op, oe2->op);
5279 105786 : gimple_set_uid (stmt, uid);
5280 105786 : gimple_set_visited (stmt, true);
5281 105786 : if (insert_before)
5282 85466 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5283 : else
5284 20320 : insert_stmt_after (stmt, insert_point);
5285 105786 : }
5286 : else
5287 : {
5288 130488 : bool insert_before;
5289 130488 : gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
5290 : insert_before)
5291 : == stmt);
5292 130488 : gimple_assign_set_rhs1 (stmt, oe1->op);
5293 130488 : gimple_assign_set_rhs2 (stmt, oe2->op);
5294 130488 : update_stmt (stmt);
5295 : }
5296 :
5297 236274 : if (rhs1 != oe1->op && rhs1 != oe2->op)
5298 54543 : remove_visited_stmt_chain (rhs1);
5299 :
5300 236274 : if (dump_file && (dump_flags & TDF_DETAILS))
5301 : {
5302 31 : fprintf (dump_file, " into ");
5303 31 : print_gimple_stmt (dump_file, stmt, 0);
5304 : }
5305 : }
5306 4543495 : return lhs;
5307 : }
5308 :
5309 : /* If we hit here, we should have 3 or more ops left. */
5310 251416 : gcc_assert (opindex + 2 < ops.length ());
5311 :
5312 : /* Rewrite the next operator. */
5313 251416 : oe = ops[opindex];
5314 :
5315 : /* If the stmt that defines operand has to be inserted, insert it
5316 : before the use. */
5317 251416 : if (oe->stmt_to_insert)
5318 0 : insert_stmt_before_use (stmt, oe->stmt_to_insert);
5319 :
5320 : /* Recurse on the LHS of the binary operator, which is guaranteed to
5321 : be the non-leaf side. */
5322 251416 : tree new_rhs1
5323 251416 : = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5324 251416 : changed || oe->op != rhs2 || next_changed,
5325 : false);
5326 :
5327 251416 : if (oe->op != rhs2 || new_rhs1 != rhs1)
5328 : {
5329 132927 : if (dump_file && (dump_flags & TDF_DETAILS))
5330 : {
5331 6 : fprintf (dump_file, "Transforming ");
5332 6 : print_gimple_stmt (dump_file, stmt, 0);
5333 : }
5334 :
5335 : /* If changed is false, this is either opindex == 0
5336 : or all outer rhs2's were equal to corresponding oe->op,
5337 : and powi_result is NULL.
5338 : That means lhs is equivalent before and after reassociation.
5339 : Otherwise ensure the old lhs SSA_NAME is not reused and
5340 : create a new stmt as well, so that any debug stmts will be
5341 : properly adjusted. */
5342 132927 : if (changed)
5343 : {
5344 25502 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5345 25502 : unsigned int uid = gimple_uid (stmt);
5346 25502 : bool insert_before;
5347 25502 : gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
5348 : insert_before);
5349 :
5350 25502 : lhs = make_ssa_name (TREE_TYPE (lhs));
5351 25502 : stmt = gimple_build_assign (lhs, rhs_code,
5352 : new_rhs1, oe->op);
5353 25502 : gimple_set_uid (stmt, uid);
5354 25502 : gimple_set_visited (stmt, true);
5355 25502 : if (insert_before)
5356 12373 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5357 : else
5358 13129 : insert_stmt_after (stmt, insert_point);
5359 : }
5360 : else
5361 : {
5362 107425 : bool insert_before;
5363 107425 : gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5364 : insert_before)
5365 : == stmt);
5366 107425 : gimple_assign_set_rhs1 (stmt, new_rhs1);
5367 107425 : gimple_assign_set_rhs2 (stmt, oe->op);
5368 107425 : update_stmt (stmt);
5369 : }
5370 :
5371 132927 : if (dump_file && (dump_flags & TDF_DETAILS))
5372 : {
5373 6 : fprintf (dump_file, " into ");
5374 6 : print_gimple_stmt (dump_file, stmt, 0);
5375 : }
5376 : }
5377 : return lhs;
5378 : }
5379 :
5380 : /* Find out how many cycles we need to compute statements chain.
5381 : OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5382 : maximum number of independent statements we may execute per cycle. */
5383 :
5384 : static int
5385 19008 : get_required_cycles (int ops_num, int cpu_width)
5386 : {
5387 19008 : int res;
5388 19008 : int elog;
5389 19008 : unsigned int rest;
5390 :
5391 : /* While we have more than 2 * cpu_width operands
5392 : we may reduce number of operands by cpu_width
5393 : per cycle. */
5394 19008 : res = ops_num / (2 * cpu_width);
5395 :
5396 : /* Remained operands count may be reduced twice per cycle
5397 : until we have only one operand. */
5398 19008 : rest = (unsigned)(ops_num - res * cpu_width);
5399 19008 : elog = exact_log2 (rest);
5400 8793 : if (elog >= 0)
5401 8793 : res += elog;
5402 : else
5403 20430 : res += floor_log2 (rest) + 1;
5404 :
5405 19008 : return res;
5406 : }
5407 :
5408 : /* Given that the target fully pipelines FMA instructions, return the latency
5409 : of MULT_EXPRs that can't be hidden by the FMAs. WIDTH is the number of
5410 : pipes. */
5411 :
5412 : static inline int
5413 0 : get_mult_latency_consider_fma (int ops_num, int mult_num, int width)
5414 : {
5415 0 : gcc_checking_assert (mult_num && mult_num <= ops_num);
5416 :
5417 : /* For each partition, if mult_num == ops_num, there's latency(MULT)*2.
5418 : e.g:
5419 :
5420 : A * B + C * D
5421 : =>
5422 : _1 = A * B;
5423 : _2 = .FMA (C, D, _1);
5424 :
5425 : Otherwise there's latency(MULT)*1 in the first FMA. */
5426 0 : return CEIL (ops_num, width) == CEIL (mult_num, width) ? 2 : 1;
5427 : }
5428 :
5429 : /* Returns an optimal number of registers to use for computation of
5430 : given statements.
5431 :
5432 : LHS is the result ssa name of OPS. MULT_NUM is number of sub-expressions
5433 : that are MULT_EXPRs, when OPS are PLUS_EXPRs or MINUS_EXPRs. */
5434 :
5435 : static int
5436 22814 : get_reassociation_width (vec<operand_entry *> *ops, int mult_num, tree lhs,
5437 : enum tree_code opc, machine_mode mode)
5438 : {
5439 22814 : int param_width = param_tree_reassoc_width;
5440 22814 : int width;
5441 22814 : int width_min;
5442 22814 : int cycles_best;
5443 22814 : int ops_num = ops->length ();
5444 :
5445 22814 : if (param_width > 0)
5446 : width = param_width;
5447 : else
5448 22769 : width = targetm.sched.reassociation_width (opc, mode);
5449 :
5450 22814 : if (width == 1)
5451 : return width;
5452 :
5453 : /* Get the minimal time required for sequence computation. */
5454 7235 : cycles_best = get_required_cycles (ops_num, width);
5455 :
5456 : /* Check if we may use less width and still compute sequence for
5457 : the same time. It will allow us to reduce registers usage.
5458 : get_required_cycles is monotonically increasing with lower width
5459 : so we can perform a binary search for the minimal width that still
5460 : results in the optimal cycle count. */
5461 7235 : width_min = 1;
5462 :
5463 : /* If the target fully pipelines FMA instruction, the multiply part can start
5464 : already if its operands are ready. Assuming symmetric pipes are used for
5465 : FMUL/FADD/FMA, then for a sequence of FMA like:
5466 :
5467 : _8 = .FMA (_2, _3, _1);
5468 : _9 = .FMA (_5, _4, _8);
5469 : _10 = .FMA (_7, _6, _9);
5470 :
5471 : , if width=1, the latency is latency(MULT) + latency(ADD)*3.
5472 : While with width=2:
5473 :
5474 : _8 = _4 * _5;
5475 : _9 = .FMA (_2, _3, _1);
5476 : _10 = .FMA (_6, _7, _8);
5477 : _11 = _9 + _10;
5478 :
5479 : , it is latency(MULT)*2 + latency(ADD)*2. Assuming latency(MULT) >=
5480 : latency(ADD), the first variant is preferred.
5481 :
5482 : Find out if we can get a smaller width considering FMA.
5483 : Assume FMUL and FMA use the same units that can also do FADD.
5484 : For other scenarios, such as when FMUL and FADD are using separated units,
5485 : the following code may not apply. */
5486 :
5487 7235 : int width_mult = targetm.sched.reassociation_width (MULT_EXPR, mode);
5488 7235 : if (width > 1 && mult_num && param_fully_pipelined_fma
5489 0 : && width_mult <= width)
5490 : {
5491 : /* Latency of MULT_EXPRs. */
5492 0 : int lat_mul
5493 0 : = get_mult_latency_consider_fma (ops_num, mult_num, width_mult);
5494 :
5495 : /* Quick search might not apply. So start from 1. */
5496 0 : for (int i = 1; i < width_mult; i++)
5497 : {
5498 0 : int lat_mul_new
5499 0 : = get_mult_latency_consider_fma (ops_num, mult_num, i);
5500 0 : int lat_add_new = get_required_cycles (ops_num, i);
5501 :
5502 : /* Assume latency(MULT) >= latency(ADD). */
5503 0 : if (lat_mul - lat_mul_new >= lat_add_new - cycles_best)
5504 : {
5505 : width = i;
5506 : break;
5507 : }
5508 : }
5509 : }
5510 : else
5511 : {
5512 17193 : while (width > width_min)
5513 : {
5514 11773 : int width_mid = (width + width_min) / 2;
5515 :
5516 11773 : if (get_required_cycles (ops_num, width_mid) == cycles_best)
5517 : width = width_mid;
5518 2009 : else if (width_min < width_mid)
5519 : width_min = width_mid;
5520 : else
5521 : break;
5522 : }
5523 : }
5524 :
5525 : /* If there's loop dependent FMA result, return width=2 to avoid it. This is
5526 : better than skipping these FMA candidates in widening_mul. */
5527 7235 : if (width == 1
5528 7235 : && maybe_le (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (lhs))),
5529 : param_avoid_fma_max_bits))
5530 : {
5531 : /* Look for cross backedge dependency:
5532 : 1. LHS is a phi argument in the same basic block it is defined.
5533 : 2. And the result of the phi node is used in OPS. */
5534 5000 : basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (lhs));
5535 :
5536 5000 : use_operand_p use_p;
5537 5000 : imm_use_iterator iter;
5538 15877 : FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
5539 8108 : if (gphi *phi = dyn_cast<gphi *> (USE_STMT (use_p)))
5540 : {
5541 4367 : if (gimple_phi_arg_edge (phi, phi_arg_index_from_use (use_p))->src
5542 : != bb)
5543 0 : continue;
5544 4367 : tree phi_result = gimple_phi_result (phi);
5545 4367 : operand_entry *oe;
5546 4367 : unsigned int j;
5547 21114 : FOR_EACH_VEC_ELT (*ops, j, oe)
5548 : {
5549 13101 : if (TREE_CODE (oe->op) != SSA_NAME)
5550 0 : continue;
5551 :
5552 : /* Result of phi is operand of PLUS_EXPR. */
5553 13101 : if (oe->op == phi_result)
5554 2231 : return 2;
5555 :
5556 : /* Check is result of phi is operand of MULT_EXPR. */
5557 10870 : gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5558 10870 : if (is_gimple_assign (def_stmt)
5559 10870 : && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR)
5560 : {
5561 2214 : tree rhs = gimple_assign_rhs1 (def_stmt);
5562 2214 : if (TREE_CODE (rhs) == SSA_NAME)
5563 : {
5564 2214 : if (rhs == phi_result)
5565 : return 2;
5566 2214 : def_stmt = SSA_NAME_DEF_STMT (rhs);
5567 : }
5568 : }
5569 10870 : if (is_gimple_assign (def_stmt)
5570 10870 : && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
5571 : {
5572 8734 : if (gimple_assign_rhs1 (def_stmt) == phi_result
5573 8734 : || gimple_assign_rhs2 (def_stmt) == phi_result)
5574 : return 2;
5575 : }
5576 : }
5577 5000 : }
5578 : }
5579 :
5580 : return width;
5581 : }
5582 :
5583 : #define SPECIAL_BIASED_END_STMT 0 /* It is the end stmt of all ops. */
5584 : #define BIASED_END_STMT 1 /* It is the end stmt of normal or biased ops. */
5585 : #define NORMAL_END_STMT 2 /* It is the end stmt of normal ops. */
5586 :
5587 : /* Rewrite statements with dependency chain with regard the chance to generate
5588 : FMA.
5589 : For the chain with FMA: Try to keep fma opportunity as much as possible.
5590 : For the chain without FMA: Putting the computation in rank order and trying
5591 : to allow operations to be executed in parallel.
5592 : E.g.
5593 : e + f + a * b + c * d;
5594 :
5595 : ssa1 = e + a * b;
5596 : ssa2 = f + c * d;
5597 : ssa3 = ssa1 + ssa2;
5598 :
5599 : This reassociation approach preserves the chance of fma generation as much
5600 : as possible.
5601 :
5602 : Another thing is to avoid adding loop-carried ops to long chains, otherwise
5603 : the whole chain will have dependencies across the loop iteration. Just keep
5604 : loop-carried ops in a separate chain.
5605 : E.g.
5606 : x_1 = phi (x_0, x_2)
5607 : y_1 = phi (y_0, y_2)
5608 :
5609 : a + b + c + d + e + x1 + y1
5610 :
5611 : SSA1 = a + b;
5612 : SSA2 = c + d;
5613 : SSA3 = SSA1 + e;
5614 : SSA4 = SSA3 + SSA2;
5615 : SSA5 = x1 + y1;
5616 : SSA6 = SSA4 + SSA5;
5617 : */
5618 : static void
5619 1815 : rewrite_expr_tree_parallel (gassign *stmt, int width, bool has_fma,
5620 : const vec<operand_entry *> &ops)
5621 : {
5622 1815 : enum tree_code opcode = gimple_assign_rhs_code (stmt);
5623 1815 : int op_num = ops.length ();
5624 1815 : int op_normal_num = op_num;
5625 1815 : gcc_assert (op_num > 0);
5626 1815 : int stmt_num = op_num - 1;
5627 1815 : gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5628 1815 : int i = 0, j = 0;
5629 1815 : tree tmp_op[2], op1;
5630 1815 : operand_entry *oe;
5631 1815 : gimple *stmt1 = NULL;
5632 1815 : tree last_rhs1 = gimple_assign_rhs1 (stmt);
5633 1815 : int last_rhs1_stmt_index = 0, last_rhs2_stmt_index = 0;
5634 1815 : int width_active = 0, width_count = 0;
5635 1815 : bool has_biased = false, ops_changed = false;
5636 1815 : auto_vec<operand_entry *> ops_normal;
5637 1815 : auto_vec<operand_entry *> ops_biased;
5638 1815 : vec<operand_entry *> *ops1;
5639 :
5640 : /* We start expression rewriting from the top statements.
5641 : So, in this loop we create a full list of statements
5642 : we will work with. */
5643 1815 : stmts[stmt_num - 1] = stmt;
5644 8936 : for (i = stmt_num - 2; i >= 0; i--)
5645 7121 : stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5646 :
5647 : /* Avoid adding loop-carried ops to long chains, first filter out the
5648 : loop-carried. But we need to make sure that the length of the remainder
5649 : is not less than 4, which is the smallest ops length we can break the
5650 : dependency. */
5651 12566 : FOR_EACH_VEC_ELT (ops, i, oe)
5652 : {
5653 10751 : if (TREE_CODE (oe->op) == SSA_NAME
5654 10591 : && bitmap_bit_p (biased_names, SSA_NAME_VERSION (oe->op))
5655 11005 : && op_normal_num > 4)
5656 : {
5657 225 : ops_biased.safe_push (oe);
5658 225 : has_biased = true;
5659 225 : op_normal_num --;
5660 : }
5661 : else
5662 10526 : ops_normal.safe_push (oe);
5663 : }
5664 :
5665 : /* Width should not be larger than ops length / 2, since we can not create
5666 : more parallel dependency chains that exceeds such value. */
5667 1815 : int width_normal = op_normal_num / 2;
5668 1815 : int width_biased = (op_num - op_normal_num) / 2;
5669 1815 : width_normal = width <= width_normal ? width : width_normal;
5670 1815 : width_biased = width <= width_biased ? width : width_biased;
5671 :
5672 1815 : ops1 = &ops_normal;
5673 1815 : width_count = width_active = width_normal;
5674 :
5675 : /* Build parallel dependency chain according to width. */
5676 10751 : for (i = 0; i < stmt_num; i++)
5677 : {
5678 8936 : if (dump_file && (dump_flags & TDF_DETAILS))
5679 : {
5680 6 : fprintf (dump_file, "Transforming ");
5681 6 : print_gimple_stmt (dump_file, stmts[i], 0);
5682 : }
5683 :
5684 : /* When the work of normal ops is over, but the loop is not over,
5685 : continue to do biased ops. */
5686 8936 : if (width_count == 0 && ops1 == &ops_normal)
5687 : {
5688 221 : ops1 = &ops_biased;
5689 221 : width_count = width_active = width_biased;
5690 221 : ops_changed = true;
5691 : }
5692 :
5693 : /* Swap the operands if no FMA in the chain. */
5694 8936 : if (ops1->length () > 2 && !has_fma)
5695 4376 : swap_ops_for_binary_stmt (*ops1, ops1->length () - 3);
5696 :
5697 8936 : if (i < width_active
5698 5113 : || (ops_changed && i <= (last_rhs1_stmt_index + width_active)))
5699 : {
5700 11478 : for (j = 0; j < 2; j++)
5701 : {
5702 7652 : oe = ops1->pop ();
5703 7652 : tmp_op[j] = oe->op;
5704 : /* If the stmt that defines operand has to be inserted, insert it
5705 : before the use. */
5706 7652 : stmt1 = oe->stmt_to_insert;
5707 7652 : if (stmt1)
5708 0 : insert_stmt_before_use (stmts[i], stmt1);
5709 7652 : stmt1 = NULL;
5710 : }
5711 3826 : stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5712 : tmp_op[1],
5713 : tmp_op[0],
5714 : opcode);
5715 3826 : gimple_set_visited (stmts[i], true);
5716 :
5717 : }
5718 : else
5719 : {
5720 : /* We keep original statement only for the last one. All others are
5721 : recreated. */
5722 5110 : if (!ops1->length ())
5723 : {
5724 : /* For biased length equal to 2. */
5725 2011 : if (width_count == BIASED_END_STMT && !last_rhs2_stmt_index)
5726 1 : last_rhs2_stmt_index = i - 1;
5727 :
5728 : /* When width_count == 2 and there is no biased, just finish. */
5729 2011 : if (width_count == NORMAL_END_STMT && !has_biased)
5730 : {
5731 1594 : last_rhs1_stmt_index = i - 1;
5732 1594 : last_rhs2_stmt_index = i - 2;
5733 : }
5734 2011 : if (last_rhs1_stmt_index && (last_rhs2_stmt_index || !has_biased))
5735 : {
5736 : /* We keep original statement only for the last one. All
5737 : others are recreated. */
5738 1596 : gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5739 1596 : (stmts[last_rhs1_stmt_index]));
5740 1596 : gimple_assign_set_rhs2 (stmts[i], gimple_assign_lhs
5741 1596 : (stmts[last_rhs2_stmt_index]));
5742 1596 : update_stmt (stmts[i]);
5743 : }
5744 : else
5745 : {
5746 1245 : stmts[i] =
5747 415 : build_and_add_sum (TREE_TYPE (last_rhs1),
5748 415 : gimple_assign_lhs (stmts[i-width_count]),
5749 : gimple_assign_lhs
5750 415 : (stmts[i-width_count+1]),
5751 : opcode);
5752 415 : gimple_set_visited (stmts[i], true);
5753 415 : width_count--;
5754 :
5755 : /* It is the end of normal or biased ops.
5756 : last_rhs1_stmt_index used to record the last stmt index
5757 : for normal ops. last_rhs2_stmt_index used to record the
5758 : last stmt index for biased ops. */
5759 415 : if (width_count == BIASED_END_STMT)
5760 : {
5761 222 : gcc_assert (has_biased);
5762 222 : if (ops_biased.length ())
5763 : last_rhs1_stmt_index = i;
5764 : else
5765 1 : last_rhs2_stmt_index = i;
5766 : width_count--;
5767 : }
5768 : }
5769 : }
5770 : else
5771 : {
5772 : /* Attach the rest ops to the parallel dependency chain. */
5773 3099 : oe = ops1->pop ();
5774 3099 : op1 = oe->op;
5775 3099 : stmt1 = oe->stmt_to_insert;
5776 3099 : if (stmt1)
5777 0 : insert_stmt_before_use (stmts[i], stmt1);
5778 3099 : stmt1 = NULL;
5779 :
5780 : /* For only one biased ops. */
5781 3099 : if (width_count == SPECIAL_BIASED_END_STMT)
5782 : {
5783 : /* We keep original statement only for the last one. All
5784 : others are recreated. */
5785 219 : gcc_assert (has_biased);
5786 219 : gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5787 219 : (stmts[last_rhs1_stmt_index]));
5788 219 : gimple_assign_set_rhs2 (stmts[i], op1);
5789 219 : update_stmt (stmts[i]);
5790 : }
5791 : else
5792 : {
5793 2880 : stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5794 : gimple_assign_lhs
5795 2880 : (stmts[i-width_active]),
5796 : op1,
5797 : opcode);
5798 2880 : gimple_set_visited (stmts[i], true);
5799 : }
5800 : }
5801 : }
5802 :
5803 8936 : if (dump_file && (dump_flags & TDF_DETAILS))
5804 : {
5805 6 : fprintf (dump_file, " into ");
5806 6 : print_gimple_stmt (dump_file, stmts[i], 0);
5807 : }
5808 : }
5809 :
5810 1815 : remove_visited_stmt_chain (last_rhs1);
5811 1815 : }
5812 :
5813 : /* Transform STMT, which is really (A +B) + (C + D) into the left
5814 : linear form, ((A+B)+C)+D.
5815 : Recurse on D if necessary. */
5816 :
5817 : static void
5818 2315 : linearize_expr (gimple *stmt)
5819 : {
5820 2315 : gimple_stmt_iterator gsi;
5821 2315 : gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5822 2315 : gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5823 2315 : gimple *oldbinrhs = binrhs;
5824 2315 : enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5825 2315 : gimple *newbinrhs = NULL;
5826 2315 : class loop *loop = loop_containing_stmt (stmt);
5827 2315 : tree lhs = gimple_assign_lhs (stmt);
5828 :
5829 2315 : gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5830 : && is_reassociable_op (binrhs, rhscode, loop));
5831 :
5832 2315 : gsi = gsi_for_stmt (stmt);
5833 :
5834 2315 : gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5835 2315 : binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5836 : gimple_assign_rhs_code (binrhs),
5837 : gimple_assign_lhs (binlhs),
5838 : gimple_assign_rhs2 (binrhs));
5839 2315 : gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5840 2315 : gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5841 2315 : gimple_set_uid (binrhs, gimple_uid (stmt));
5842 :
5843 2315 : if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5844 2311 : newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5845 :
5846 2315 : if (dump_file && (dump_flags & TDF_DETAILS))
5847 : {
5848 0 : fprintf (dump_file, "Linearized: ");
5849 0 : print_gimple_stmt (dump_file, stmt, 0);
5850 : }
5851 :
5852 2315 : reassociate_stats.linearized++;
5853 2315 : update_stmt (stmt);
5854 :
5855 2315 : gsi = gsi_for_stmt (oldbinrhs);
5856 2315 : reassoc_remove_stmt (&gsi);
5857 2315 : release_defs (oldbinrhs);
5858 :
5859 2315 : gimple_set_visited (stmt, true);
5860 2315 : gimple_set_visited (binlhs, true);
5861 2315 : gimple_set_visited (binrhs, true);
5862 :
5863 : /* Tail recurse on the new rhs if it still needs reassociation. */
5864 2315 : if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5865 : /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5866 : want to change the algorithm while converting to tuples. */
5867 542 : linearize_expr (stmt);
5868 2315 : }
5869 :
5870 : /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5871 : it. Otherwise, return NULL. */
5872 :
5873 : static gimple *
5874 408853 : get_single_immediate_use (tree lhs)
5875 : {
5876 408853 : use_operand_p immuse;
5877 408853 : gimple *immusestmt;
5878 :
5879 408853 : if (TREE_CODE (lhs) == SSA_NAME
5880 408853 : && single_imm_use (lhs, &immuse, &immusestmt)
5881 724244 : && is_gimple_assign (immusestmt))
5882 : return immusestmt;
5883 :
5884 : return NULL;
5885 : }
5886 :
5887 : /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5888 : representing the negated value. Insertions of any necessary
5889 : instructions go before GSI.
5890 : This function is recursive in that, if you hand it "a_5" as the
5891 : value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5892 : transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5893 :
5894 : static tree
5895 78329 : negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5896 : {
5897 78329 : gimple *negatedefstmt = NULL;
5898 78329 : tree resultofnegate;
5899 78329 : gimple_stmt_iterator gsi;
5900 78329 : unsigned int uid;
5901 :
5902 : /* If we are trying to negate a name, defined by an add, negate the
5903 : add operands instead. */
5904 78329 : if (TREE_CODE (tonegate) == SSA_NAME)
5905 76540 : negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5906 78329 : if (TREE_CODE (tonegate) == SSA_NAME
5907 76540 : && is_gimple_assign (negatedefstmt)
5908 67812 : && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5909 67812 : && has_single_use (gimple_assign_lhs (negatedefstmt))
5910 128951 : && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5911 : {
5912 1064 : tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5913 1064 : tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5914 1064 : tree lhs = gimple_assign_lhs (negatedefstmt);
5915 1064 : gimple *g;
5916 :
5917 1064 : gsi = gsi_for_stmt (negatedefstmt);
5918 1064 : rhs1 = negate_value (rhs1, &gsi);
5919 :
5920 1064 : gsi = gsi_for_stmt (negatedefstmt);
5921 1064 : rhs2 = negate_value (rhs2, &gsi);
5922 :
5923 1064 : gsi = gsi_for_stmt (negatedefstmt);
5924 1064 : lhs = make_ssa_name (TREE_TYPE (lhs));
5925 1064 : gimple_set_visited (negatedefstmt, true);
5926 1064 : g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5927 1064 : gimple_set_uid (g, gimple_uid (negatedefstmt));
5928 1064 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5929 1064 : return lhs;
5930 : }
5931 :
5932 77265 : tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5933 77265 : resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5934 : NULL_TREE, true, GSI_SAME_STMT);
5935 77265 : gsi = *gsip;
5936 77265 : uid = gimple_uid (gsi_stmt (gsi));
5937 305482 : for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5938 : {
5939 148430 : gimple *stmt = gsi_stmt (gsi);
5940 148430 : if (gimple_uid (stmt) != 0)
5941 : break;
5942 75476 : gimple_set_uid (stmt, uid);
5943 : }
5944 : return resultofnegate;
5945 : }
5946 :
5947 : /* Return true if we should break up the subtract in STMT into an add
5948 : with negate. This is true when we the subtract operands are really
5949 : adds, or the subtract itself is used in an add expression. In
5950 : either case, breaking up the subtract into an add with negate
5951 : exposes the adds to reassociation. */
5952 :
5953 : static bool
5954 292653 : should_break_up_subtract (gimple *stmt)
5955 : {
5956 292653 : tree lhs = gimple_assign_lhs (stmt);
5957 292653 : tree binlhs = gimple_assign_rhs1 (stmt);
5958 292653 : tree binrhs = gimple_assign_rhs2 (stmt);
5959 292653 : gimple *immusestmt;
5960 292653 : class loop *loop = loop_containing_stmt (stmt);
5961 :
5962 292653 : if (TREE_CODE (binlhs) == SSA_NAME
5963 292653 : && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5964 : return true;
5965 :
5966 274065 : if (TREE_CODE (binrhs) == SSA_NAME
5967 274065 : && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5968 : return true;
5969 :
5970 273186 : if (TREE_CODE (lhs) == SSA_NAME
5971 273186 : && (immusestmt = get_single_immediate_use (lhs))
5972 132659 : && is_gimple_assign (immusestmt)
5973 405845 : && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5974 89138 : || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5975 2287 : && gimple_assign_rhs1 (immusestmt) == lhs)
5976 86861 : || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5977 : return true;
5978 : return false;
5979 : }
5980 :
5981 : /* Transform STMT from A - B into A + -B. */
5982 :
5983 : static void
5984 76201 : break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5985 : {
5986 76201 : tree rhs1 = gimple_assign_rhs1 (stmt);
5987 76201 : tree rhs2 = gimple_assign_rhs2 (stmt);
5988 :
5989 76201 : if (dump_file && (dump_flags & TDF_DETAILS))
5990 : {
5991 0 : fprintf (dump_file, "Breaking up subtract ");
5992 0 : print_gimple_stmt (dump_file, stmt, 0);
5993 : }
5994 :
5995 76201 : rhs2 = negate_value (rhs2, gsip);
5996 76201 : gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5997 76201 : update_stmt (stmt);
5998 76201 : }
5999 :
6000 : /* Determine whether STMT is a builtin call that raises an SSA name
6001 : to an integer power and has only one use. If so, and this is early
6002 : reassociation and unsafe math optimizations are permitted, place
6003 : the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
6004 : If any of these conditions does not hold, return FALSE. */
6005 :
6006 : static bool
6007 127 : acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
6008 : {
6009 127 : tree arg1;
6010 127 : REAL_VALUE_TYPE c, cint;
6011 :
6012 127 : switch (gimple_call_combined_fn (stmt))
6013 : {
6014 19 : CASE_CFN_POW:
6015 19 : if (flag_errno_math)
6016 : return false;
6017 :
6018 19 : *base = gimple_call_arg (stmt, 0);
6019 19 : arg1 = gimple_call_arg (stmt, 1);
6020 :
6021 19 : if (TREE_CODE (arg1) != REAL_CST)
6022 : return false;
6023 :
6024 16 : c = TREE_REAL_CST (arg1);
6025 :
6026 16 : if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
6027 : return false;
6028 :
6029 16 : *exponent = real_to_integer (&c);
6030 16 : real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
6031 16 : if (!real_identical (&c, &cint))
6032 : return false;
6033 :
6034 : break;
6035 :
6036 10 : CASE_CFN_POWI:
6037 10 : *base = gimple_call_arg (stmt, 0);
6038 10 : arg1 = gimple_call_arg (stmt, 1);
6039 :
6040 10 : if (!tree_fits_shwi_p (arg1))
6041 : return false;
6042 :
6043 10 : *exponent = tree_to_shwi (arg1);
6044 10 : break;
6045 :
6046 : default:
6047 : return false;
6048 : }
6049 :
6050 : /* Expanding negative exponents is generally unproductive, so we don't
6051 : complicate matters with those. Exponents of zero and one should
6052 : have been handled by expression folding. */
6053 18 : if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
6054 : return false;
6055 :
6056 : return true;
6057 : }
6058 :
6059 : /* Try to derive and add operand entry for OP to *OPS. Return false if
6060 : unsuccessful. */
6061 :
6062 : static bool
6063 9478853 : try_special_add_to_ops (vec<operand_entry *> *ops,
6064 : enum tree_code code,
6065 : tree op, gimple* def_stmt)
6066 : {
6067 9478853 : tree base = NULL_TREE;
6068 9478853 : HOST_WIDE_INT exponent = 0;
6069 :
6070 9478853 : if (TREE_CODE (op) != SSA_NAME
6071 9478853 : || ! has_single_use (op))
6072 : return false;
6073 :
6074 3542404 : if (code == MULT_EXPR
6075 759525 : && reassoc_insert_powi_p
6076 360750 : && flag_unsafe_math_optimizations
6077 30116 : && is_gimple_call (def_stmt)
6078 3542531 : && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
6079 : {
6080 18 : add_repeat_to_ops_vec (ops, base, exponent);
6081 18 : gimple_set_visited (def_stmt, true);
6082 18 : return true;
6083 : }
6084 3542386 : else if (code == MULT_EXPR
6085 759507 : && is_gimple_assign (def_stmt)
6086 719989 : && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
6087 264 : && !HONOR_SNANS (TREE_TYPE (op))
6088 264 : && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
6089 0 : || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))
6090 3542650 : && (!FLOAT_TYPE_P (TREE_TYPE (op))
6091 51 : || !DECIMAL_FLOAT_MODE_P (element_mode (op))))
6092 : {
6093 257 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
6094 257 : tree cst = build_minus_one_cst (TREE_TYPE (op));
6095 257 : add_to_ops_vec (ops, rhs1);
6096 257 : add_to_ops_vec (ops, cst);
6097 257 : gimple_set_visited (def_stmt, true);
6098 257 : return true;
6099 : }
6100 :
6101 : return false;
6102 : }
6103 :
6104 : /* Recursively linearize a binary expression that is the RHS of STMT.
6105 : Place the operands of the expression tree in the vector named OPS. */
6106 :
6107 : static void
6108 4879446 : linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
6109 : bool is_associative, bool set_visited)
6110 : {
6111 4879446 : tree binlhs = gimple_assign_rhs1 (stmt);
6112 4879446 : tree binrhs = gimple_assign_rhs2 (stmt);
6113 4879446 : gimple *binlhsdef = NULL, *binrhsdef = NULL;
6114 4879446 : bool binlhsisreassoc = false;
6115 4879446 : bool binrhsisreassoc = false;
6116 4879446 : enum tree_code rhscode = gimple_assign_rhs_code (stmt);
6117 4879446 : class loop *loop = loop_containing_stmt (stmt);
6118 :
6119 4879446 : if (set_visited)
6120 4839199 : gimple_set_visited (stmt, true);
6121 :
6122 4879446 : if (TREE_CODE (binlhs) == SSA_NAME)
6123 : {
6124 4876825 : binlhsdef = SSA_NAME_DEF_STMT (binlhs);
6125 4876825 : binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
6126 4876825 : && !stmt_could_throw_p (cfun, binlhsdef));
6127 : }
6128 :
6129 4879446 : if (TREE_CODE (binrhs) == SSA_NAME)
6130 : {
6131 1585493 : binrhsdef = SSA_NAME_DEF_STMT (binrhs);
6132 1585493 : binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
6133 1585493 : && !stmt_could_throw_p (cfun, binrhsdef));
6134 : }
6135 :
6136 : /* If the LHS is not reassociable, but the RHS is, we need to swap
6137 : them. If neither is reassociable, there is nothing we can do, so
6138 : just put them in the ops vector. If the LHS is reassociable,
6139 : linearize it. If both are reassociable, then linearize the RHS
6140 : and the LHS. */
6141 :
6142 4879446 : if (!binlhsisreassoc)
6143 : {
6144 : /* If this is not a associative operation like division, give up. */
6145 4666890 : if (!is_associative)
6146 : {
6147 15 : add_to_ops_vec (ops, binrhs);
6148 15 : return;
6149 : }
6150 :
6151 4666875 : if (!binrhsisreassoc)
6152 : {
6153 4599422 : bool swap = false;
6154 4599422 : if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6155 : /* If we add ops for the rhs we expect to be able to recurse
6156 : to it via the lhs during expression rewrite so swap
6157 : operands. */
6158 : swap = true;
6159 : else
6160 4599256 : add_to_ops_vec (ops, binrhs);
6161 :
6162 4599422 : if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
6163 4599317 : add_to_ops_vec (ops, binlhs);
6164 :
6165 4599422 : if (!swap)
6166 : return;
6167 : }
6168 :
6169 67619 : if (dump_file && (dump_flags & TDF_DETAILS))
6170 : {
6171 9 : fprintf (dump_file, "swapping operands of ");
6172 9 : print_gimple_stmt (dump_file, stmt, 0);
6173 : }
6174 :
6175 67619 : swap_ssa_operands (stmt,
6176 : gimple_assign_rhs1_ptr (stmt),
6177 : gimple_assign_rhs2_ptr (stmt));
6178 67619 : update_stmt (stmt);
6179 :
6180 67619 : if (dump_file && (dump_flags & TDF_DETAILS))
6181 : {
6182 9 : fprintf (dump_file, " is now ");
6183 9 : print_gimple_stmt (dump_file, stmt, 0);
6184 : }
6185 67619 : if (!binrhsisreassoc)
6186 : return;
6187 :
6188 : /* We want to make it so the lhs is always the reassociative op,
6189 : so swap. */
6190 : std::swap (binlhs, binrhs);
6191 : }
6192 212556 : else if (binrhsisreassoc)
6193 : {
6194 1773 : linearize_expr (stmt);
6195 1773 : binlhs = gimple_assign_rhs1 (stmt);
6196 1773 : binrhs = gimple_assign_rhs2 (stmt);
6197 : }
6198 :
6199 280009 : gcc_assert (TREE_CODE (binrhs) != SSA_NAME
6200 : || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
6201 : rhscode, loop));
6202 280009 : linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
6203 : is_associative, set_visited);
6204 :
6205 280009 : if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6206 280005 : add_to_ops_vec (ops, binrhs);
6207 : }
6208 :
6209 : /* Repropagate the negates back into subtracts, since no other pass
6210 : currently does it. */
6211 :
6212 : static void
6213 2079310 : repropagate_negates (void)
6214 : {
6215 2079310 : unsigned int i = 0;
6216 2079310 : tree negate;
6217 :
6218 2214977 : FOR_EACH_VEC_ELT (plus_negates, i, negate)
6219 : {
6220 135667 : gimple *user = get_single_immediate_use (negate);
6221 135667 : if (!user || !is_gimple_assign (user))
6222 22273 : continue;
6223 :
6224 113394 : tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
6225 113404 : if (TREE_CODE (negateop) == SSA_NAME
6226 113394 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
6227 10 : continue;
6228 :
6229 : /* The negate operand can be either operand of a PLUS_EXPR
6230 : (it can be the LHS if the RHS is a constant for example).
6231 :
6232 : Force the negate operand to the RHS of the PLUS_EXPR, then
6233 : transform the PLUS_EXPR into a MINUS_EXPR. */
6234 113384 : if (gimple_assign_rhs_code (user) == PLUS_EXPR)
6235 : {
6236 : /* If the negated operand appears on the LHS of the
6237 : PLUS_EXPR, exchange the operands of the PLUS_EXPR
6238 : to force the negated operand to the RHS of the PLUS_EXPR. */
6239 84502 : if (gimple_assign_rhs1 (user) == negate)
6240 : {
6241 25807 : swap_ssa_operands (user,
6242 : gimple_assign_rhs1_ptr (user),
6243 : gimple_assign_rhs2_ptr (user));
6244 : }
6245 :
6246 : /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6247 : the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6248 84502 : if (gimple_assign_rhs2 (user) == negate)
6249 : {
6250 84502 : tree rhs1 = gimple_assign_rhs1 (user);
6251 84502 : gimple_stmt_iterator gsi = gsi_for_stmt (user);
6252 84502 : gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
6253 : negateop);
6254 84502 : update_stmt (user);
6255 : }
6256 : }
6257 28882 : else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
6258 : {
6259 1942 : if (gimple_assign_rhs1 (user) == negate)
6260 : {
6261 : /* We have
6262 : x = -negateop
6263 : y = x - b
6264 : which we transform into
6265 : x = negateop + b
6266 : y = -x .
6267 : This pushes down the negate which we possibly can merge
6268 : into some other operation, hence insert it into the
6269 : plus_negates vector. */
6270 1942 : gimple *feed = SSA_NAME_DEF_STMT (negate);
6271 1942 : tree b = gimple_assign_rhs2 (user);
6272 1942 : gimple_stmt_iterator gsi = gsi_for_stmt (feed);
6273 1942 : gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
6274 1942 : tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
6275 1942 : gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
6276 1942 : gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
6277 1942 : gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
6278 1942 : user = gsi_stmt (gsi2);
6279 1942 : update_stmt (user);
6280 1942 : reassoc_remove_stmt (&gsi);
6281 1942 : release_defs (feed);
6282 1942 : plus_negates.safe_push (gimple_assign_lhs (user));
6283 : }
6284 : else
6285 : {
6286 : /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6287 : getting rid of one operation. */
6288 0 : tree rhs1 = gimple_assign_rhs1 (user);
6289 0 : gimple_stmt_iterator gsi = gsi_for_stmt (user);
6290 0 : gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
6291 0 : update_stmt (gsi_stmt (gsi));
6292 : }
6293 : }
6294 : }
6295 2079310 : }
6296 :
6297 : /* Break up subtract operations in block BB.
6298 :
6299 : We do this top down because we don't know whether the subtract is
6300 : part of a possible chain of reassociation except at the top.
6301 :
6302 : IE given
6303 : d = f + g
6304 : c = a + e
6305 : b = c - d
6306 : q = b - r
6307 : k = t - q
6308 :
6309 : we want to break up k = t - q, but we won't until we've transformed q
6310 : = b - r, which won't be broken up until we transform b = c - d.
6311 :
6312 : En passant, clear the GIMPLE visited flag on every statement
6313 : and set UIDs within each basic block. */
6314 :
6315 : static void
6316 19224378 : break_up_subtract_bb (basic_block bb)
6317 : {
6318 19224378 : gimple_stmt_iterator gsi;
6319 19224378 : unsigned int uid = 1;
6320 :
6321 205241236 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6322 : {
6323 166792480 : gimple *stmt = gsi_stmt (gsi);
6324 166792480 : gimple_set_visited (stmt, false);
6325 166792480 : gimple_set_uid (stmt, uid++);
6326 :
6327 166792480 : if (!is_gimple_assign (stmt)
6328 46580253 : || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
6329 183976061 : || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
6330 149609229 : continue;
6331 :
6332 : /* Look for simple gimple subtract operations. */
6333 17183251 : if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6334 : {
6335 292927 : if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6336 292927 : || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6337 274 : continue;
6338 :
6339 : /* Check for a subtract used only in an addition. If this
6340 : is the case, transform it into add of a negate for better
6341 : reassociation. IE transform C = A-B into C = A + -B if C
6342 : is only used in an addition. */
6343 292653 : if (should_break_up_subtract (stmt))
6344 76201 : break_up_subtract (stmt, &gsi);
6345 : }
6346 16890324 : else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6347 16890324 : && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6348 45787 : plus_negates.safe_push (gimple_assign_lhs (stmt));
6349 : }
6350 19224378 : }
6351 :
6352 : /* Used for repeated factor analysis. */
6353 : struct repeat_factor
6354 : {
6355 : /* An SSA name that occurs in a multiply chain. */
6356 : tree factor;
6357 :
6358 : /* Cached rank of the factor. */
6359 : unsigned rank;
6360 :
6361 : /* Number of occurrences of the factor in the chain. */
6362 : HOST_WIDE_INT count;
6363 :
6364 : /* An SSA name representing the product of this factor and
6365 : all factors appearing later in the repeated factor vector. */
6366 : tree repr;
6367 : };
6368 :
6369 :
6370 : static vec<repeat_factor> repeat_factor_vec;
6371 :
6372 : /* Used for sorting the repeat factor vector. Sort primarily by
6373 : ascending occurrence count, secondarily by descending rank. */
6374 :
6375 : static int
6376 247035 : compare_repeat_factors (const void *x1, const void *x2)
6377 : {
6378 247035 : const repeat_factor *rf1 = (const repeat_factor *) x1;
6379 247035 : const repeat_factor *rf2 = (const repeat_factor *) x2;
6380 :
6381 247035 : if (rf1->count < rf2->count)
6382 : return -1;
6383 246401 : else if (rf1->count > rf2->count)
6384 : return 1;
6385 :
6386 245908 : if (rf1->rank < rf2->rank)
6387 : return 1;
6388 134075 : else if (rf1->rank > rf2->rank)
6389 110706 : return -1;
6390 :
6391 : return 0;
6392 : }
6393 :
6394 : /* Look for repeated operands in OPS in the multiply tree rooted at
6395 : STMT. Replace them with an optimal sequence of multiplies and powi
6396 : builtin calls, and remove the used operands from OPS. Return an
6397 : SSA name representing the value of the replacement sequence. */
6398 :
6399 : static tree
6400 486593 : attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6401 : {
6402 486593 : unsigned i, j, vec_len;
6403 486593 : int ii;
6404 486593 : operand_entry *oe;
6405 486593 : repeat_factor *rf1, *rf2;
6406 486593 : repeat_factor rfnew;
6407 486593 : tree result = NULL_TREE;
6408 486593 : tree target_ssa, iter_result;
6409 486593 : tree type = TREE_TYPE (gimple_get_lhs (stmt));
6410 486593 : tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6411 486593 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6412 486593 : gimple *mul_stmt, *pow_stmt;
6413 :
6414 : /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6415 : target, unless type is integral. */
6416 486593 : if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6417 : return NULL_TREE;
6418 :
6419 : /* Allocate the repeated factor vector. */
6420 482937 : repeat_factor_vec.create (10);
6421 :
6422 : /* Scan the OPS vector for all SSA names in the product and build
6423 : up a vector of occurrence counts for each factor. */
6424 1936049 : FOR_EACH_VEC_ELT (*ops, i, oe)
6425 : {
6426 970175 : if (TREE_CODE (oe->op) == SSA_NAME)
6427 : {
6428 612277 : FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6429 : {
6430 67096 : if (rf1->factor == oe->op)
6431 : {
6432 2760 : rf1->count += oe->count;
6433 2760 : break;
6434 : }
6435 : }
6436 :
6437 547941 : if (j >= repeat_factor_vec.length ())
6438 : {
6439 545181 : rfnew.factor = oe->op;
6440 545181 : rfnew.rank = oe->rank;
6441 545181 : rfnew.count = oe->count;
6442 545181 : rfnew.repr = NULL_TREE;
6443 545181 : repeat_factor_vec.safe_push (rfnew);
6444 : }
6445 : }
6446 : }
6447 :
6448 : /* Sort the repeated factor vector by (a) increasing occurrence count,
6449 : and (b) decreasing rank. */
6450 482937 : repeat_factor_vec.qsort (compare_repeat_factors);
6451 :
6452 : /* It is generally best to combine as many base factors as possible
6453 : into a product before applying __builtin_powi to the result.
6454 : However, the sort order chosen for the repeated factor vector
6455 : allows us to cache partial results for the product of the base
6456 : factors for subsequent use. When we already have a cached partial
6457 : result from a previous iteration, it is best to make use of it
6458 : before looking for another __builtin_pow opportunity.
6459 :
6460 : As an example, consider x * x * y * y * y * z * z * z * z.
6461 : We want to first compose the product x * y * z, raise it to the
6462 : second power, then multiply this by y * z, and finally multiply
6463 : by z. This can be done in 5 multiplies provided we cache y * z
6464 : for use in both expressions:
6465 :
6466 : t1 = y * z
6467 : t2 = t1 * x
6468 : t3 = t2 * t2
6469 : t4 = t1 * t3
6470 : result = t4 * z
6471 :
6472 : If we instead ignored the cached y * z and first multiplied by
6473 : the __builtin_pow opportunity z * z, we would get the inferior:
6474 :
6475 : t1 = y * z
6476 : t2 = t1 * x
6477 : t3 = t2 * t2
6478 : t4 = z * z
6479 : t5 = t3 * t4
6480 : result = t5 * y */
6481 :
6482 965874 : vec_len = repeat_factor_vec.length ();
6483 :
6484 : /* Repeatedly look for opportunities to create a builtin_powi call. */
6485 484669 : while (true)
6486 : {
6487 484669 : HOST_WIDE_INT power;
6488 :
6489 : /* First look for the largest cached product of factors from
6490 : preceding iterations. If found, create a builtin_powi for
6491 : it if the minimum occurrence count for its factors is at
6492 : least 2, or just use this cached product as our next
6493 : multiplicand if the minimum occurrence count is 1. */
6494 1031883 : FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6495 : {
6496 547223 : if (rf1->repr && rf1->count > 0)
6497 : break;
6498 : }
6499 :
6500 484669 : if (j < vec_len)
6501 : {
6502 9 : power = rf1->count;
6503 :
6504 9 : if (power == 1)
6505 : {
6506 7 : iter_result = rf1->repr;
6507 :
6508 7 : if (dump_file && (dump_flags & TDF_DETAILS))
6509 : {
6510 0 : unsigned elt;
6511 0 : repeat_factor *rf;
6512 0 : fputs ("Multiplying by cached product ", dump_file);
6513 0 : for (elt = j; elt < vec_len; elt++)
6514 : {
6515 0 : rf = &repeat_factor_vec[elt];
6516 0 : print_generic_expr (dump_file, rf->factor);
6517 0 : if (elt < vec_len - 1)
6518 0 : fputs (" * ", dump_file);
6519 : }
6520 0 : fputs ("\n", dump_file);
6521 : }
6522 : }
6523 : else
6524 : {
6525 2 : if (INTEGRAL_TYPE_P (type))
6526 : {
6527 0 : gcc_assert (power > 1);
6528 0 : gimple_stmt_iterator gsip = gsi;
6529 0 : gsi_prev (&gsip);
6530 0 : iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6531 : rf1->repr, power);
6532 0 : gimple_stmt_iterator gsic = gsi;
6533 0 : while (gsi_stmt (gsic) != gsi_stmt (gsip))
6534 : {
6535 0 : gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6536 0 : gimple_set_visited (gsi_stmt (gsic), true);
6537 0 : gsi_prev (&gsic);
6538 : }
6539 : }
6540 : else
6541 : {
6542 2 : iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6543 2 : pow_stmt
6544 2 : = gimple_build_call (powi_fndecl, 2, rf1->repr,
6545 : build_int_cst (integer_type_node,
6546 2 : power));
6547 2 : gimple_call_set_lhs (pow_stmt, iter_result);
6548 2 : gimple_set_location (pow_stmt, gimple_location (stmt));
6549 2 : gimple_set_uid (pow_stmt, gimple_uid (stmt));
6550 2 : gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6551 : }
6552 :
6553 2 : if (dump_file && (dump_flags & TDF_DETAILS))
6554 : {
6555 0 : unsigned elt;
6556 0 : repeat_factor *rf;
6557 0 : fputs ("Building __builtin_pow call for cached product (",
6558 : dump_file);
6559 0 : for (elt = j; elt < vec_len; elt++)
6560 : {
6561 0 : rf = &repeat_factor_vec[elt];
6562 0 : print_generic_expr (dump_file, rf->factor);
6563 0 : if (elt < vec_len - 1)
6564 0 : fputs (" * ", dump_file);
6565 : }
6566 0 : fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6567 : power);
6568 : }
6569 : }
6570 : }
6571 : else
6572 : {
6573 : /* Otherwise, find the first factor in the repeated factor
6574 : vector whose occurrence count is at least 2. If no such
6575 : factor exists, there are no builtin_powi opportunities
6576 : remaining. */
6577 1030107 : FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6578 : {
6579 547170 : if (rf1->count >= 2)
6580 : break;
6581 : }
6582 :
6583 484660 : if (j >= vec_len)
6584 : break;
6585 :
6586 1723 : power = rf1->count;
6587 :
6588 1723 : if (dump_file && (dump_flags & TDF_DETAILS))
6589 : {
6590 0 : unsigned elt;
6591 0 : repeat_factor *rf;
6592 0 : fputs ("Building __builtin_pow call for (", dump_file);
6593 0 : for (elt = j; elt < vec_len; elt++)
6594 : {
6595 0 : rf = &repeat_factor_vec[elt];
6596 0 : print_generic_expr (dump_file, rf->factor);
6597 0 : if (elt < vec_len - 1)
6598 0 : fputs (" * ", dump_file);
6599 : }
6600 0 : fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6601 : }
6602 :
6603 1723 : reassociate_stats.pows_created++;
6604 :
6605 : /* Visit each element of the vector in reverse order (so that
6606 : high-occurrence elements are visited first, and within the
6607 : same occurrence count, lower-ranked elements are visited
6608 : first). Form a linear product of all elements in this order
6609 : whose occurrencce count is at least that of element J.
6610 : Record the SSA name representing the product of each element
6611 : with all subsequent elements in the vector. */
6612 1723 : if (j == vec_len - 1)
6613 1702 : rf1->repr = rf1->factor;
6614 : else
6615 : {
6616 50 : for (ii = vec_len - 2; ii >= (int)j; ii--)
6617 : {
6618 29 : tree op1, op2;
6619 :
6620 29 : rf1 = &repeat_factor_vec[ii];
6621 29 : rf2 = &repeat_factor_vec[ii + 1];
6622 :
6623 : /* Init the last factor's representative to be itself. */
6624 29 : if (!rf2->repr)
6625 21 : rf2->repr = rf2->factor;
6626 :
6627 29 : op1 = rf1->factor;
6628 29 : op2 = rf2->repr;
6629 :
6630 29 : target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6631 29 : mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6632 : op1, op2);
6633 29 : gimple_set_location (mul_stmt, gimple_location (stmt));
6634 29 : gimple_set_uid (mul_stmt, gimple_uid (stmt));
6635 29 : gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6636 29 : rf1->repr = target_ssa;
6637 :
6638 : /* Don't reprocess the multiply we just introduced. */
6639 29 : gimple_set_visited (mul_stmt, true);
6640 : }
6641 : }
6642 :
6643 : /* Form a call to __builtin_powi for the maximum product
6644 : just formed, raised to the power obtained earlier. */
6645 1723 : rf1 = &repeat_factor_vec[j];
6646 1723 : if (INTEGRAL_TYPE_P (type))
6647 : {
6648 1161 : gcc_assert (power > 1);
6649 1161 : gimple_stmt_iterator gsip = gsi;
6650 1161 : gsi_prev (&gsip);
6651 1161 : iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6652 : rf1->repr, power);
6653 1161 : gimple_stmt_iterator gsic = gsi;
6654 1161 : while (gsi_stmt (gsic) != gsi_stmt (gsip))
6655 : {
6656 2357 : gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6657 2357 : gimple_set_visited (gsi_stmt (gsic), true);
6658 3518 : gsi_prev (&gsic);
6659 : }
6660 : }
6661 : else
6662 : {
6663 562 : iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6664 562 : pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6665 : build_int_cst (integer_type_node,
6666 562 : power));
6667 562 : gimple_call_set_lhs (pow_stmt, iter_result);
6668 562 : gimple_set_location (pow_stmt, gimple_location (stmt));
6669 562 : gimple_set_uid (pow_stmt, gimple_uid (stmt));
6670 562 : gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6671 : }
6672 : }
6673 :
6674 : /* If we previously formed at least one other builtin_powi call,
6675 : form the product of this one and those others. */
6676 1732 : if (result)
6677 : {
6678 9 : tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6679 9 : mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6680 : result, iter_result);
6681 9 : gimple_set_location (mul_stmt, gimple_location (stmt));
6682 9 : gimple_set_uid (mul_stmt, gimple_uid (stmt));
6683 9 : gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6684 9 : gimple_set_visited (mul_stmt, true);
6685 9 : result = new_result;
6686 : }
6687 : else
6688 : result = iter_result;
6689 :
6690 : /* Decrement the occurrence count of each element in the product
6691 : by the count found above, and remove this many copies of each
6692 : factor from OPS. */
6693 3498 : for (i = j; i < vec_len; i++)
6694 : {
6695 1766 : unsigned k = power;
6696 1766 : unsigned n;
6697 :
6698 1766 : rf1 = &repeat_factor_vec[i];
6699 1766 : rf1->count -= power;
6700 :
6701 8465 : FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6702 : {
6703 4933 : if (oe->op == rf1->factor)
6704 : {
6705 4518 : if (oe->count <= k)
6706 : {
6707 4512 : ops->ordered_remove (n);
6708 4512 : k -= oe->count;
6709 :
6710 4512 : if (k == 0)
6711 : break;
6712 : }
6713 : else
6714 : {
6715 6 : oe->count -= k;
6716 6 : break;
6717 : }
6718 : }
6719 : }
6720 : }
6721 : }
6722 :
6723 : /* At this point all elements in the repeated factor vector have a
6724 : remaining occurrence count of 0 or 1, and those with a count of 1
6725 : don't have cached representatives. Re-sort the ops vector and
6726 : clean up. */
6727 482937 : ops->qsort (sort_by_operand_rank);
6728 482937 : repeat_factor_vec.release ();
6729 :
6730 : /* Return the final product computed herein. Note that there may
6731 : still be some elements with single occurrence count left in OPS;
6732 : those will be handled by the normal reassociation logic. */
6733 482937 : return result;
6734 : }
6735 :
6736 : /* Attempt to optimize
6737 : CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6738 : CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6739 :
6740 : static void
6741 1073911 : attempt_builtin_copysign (vec<operand_entry *> *ops)
6742 : {
6743 1073911 : operand_entry *oe;
6744 1073911 : unsigned int i;
6745 1073911 : unsigned int length = ops->length ();
6746 1073911 : tree cst = ops->last ()->op;
6747 :
6748 1073911 : if (length == 1 || TREE_CODE (cst) != REAL_CST)
6749 : return;
6750 :
6751 4125 : FOR_EACH_VEC_ELT (*ops, i, oe)
6752 : {
6753 2940 : if (TREE_CODE (oe->op) == SSA_NAME
6754 2940 : && has_single_use (oe->op))
6755 : {
6756 866 : gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6757 2978 : if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6758 : {
6759 54 : tree arg0, arg1;
6760 54 : switch (gimple_call_combined_fn (old_call))
6761 : {
6762 20 : CASE_CFN_COPYSIGN:
6763 20 : CASE_CFN_COPYSIGN_FN:
6764 20 : arg0 = gimple_call_arg (old_call, 0);
6765 20 : arg1 = gimple_call_arg (old_call, 1);
6766 : /* The first argument of copysign must be a constant,
6767 : otherwise there's nothing to do. */
6768 20 : if (TREE_CODE (arg0) == REAL_CST)
6769 : {
6770 20 : tree type = TREE_TYPE (arg0);
6771 20 : tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6772 : /* If we couldn't fold to a single constant, skip it.
6773 : That happens e.g. for inexact multiplication when
6774 : -frounding-math. */
6775 20 : if (mul == NULL_TREE)
6776 : break;
6777 : /* Instead of adjusting OLD_CALL, let's build a new
6778 : call to not leak the LHS and prevent keeping bogus
6779 : debug statements. DCE will clean up the old call. */
6780 16 : gcall *new_call;
6781 16 : if (gimple_call_internal_p (old_call))
6782 0 : new_call = gimple_build_call_internal
6783 0 : (IFN_COPYSIGN, 2, mul, arg1);
6784 : else
6785 16 : new_call = gimple_build_call
6786 16 : (gimple_call_fndecl (old_call), 2, mul, arg1);
6787 16 : tree lhs = make_ssa_name (type);
6788 16 : gimple_call_set_lhs (new_call, lhs);
6789 16 : gimple_set_location (new_call,
6790 : gimple_location (old_call));
6791 16 : insert_stmt_after (new_call, old_call);
6792 : /* We've used the constant, get rid of it. */
6793 16 : ops->pop ();
6794 16 : bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6795 : /* Handle the CST1 < 0 case by negating the result. */
6796 16 : if (cst1_neg)
6797 : {
6798 7 : tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6799 7 : gimple *negate_stmt
6800 7 : = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6801 7 : insert_stmt_after (negate_stmt, new_call);
6802 7 : oe->op = negrhs;
6803 : }
6804 : else
6805 9 : oe->op = lhs;
6806 16 : if (dump_file && (dump_flags & TDF_DETAILS))
6807 : {
6808 14 : fprintf (dump_file, "Optimizing copysign: ");
6809 14 : print_generic_expr (dump_file, cst);
6810 14 : fprintf (dump_file, " * COPYSIGN (");
6811 14 : print_generic_expr (dump_file, arg0);
6812 14 : fprintf (dump_file, ", ");
6813 14 : print_generic_expr (dump_file, arg1);
6814 23 : fprintf (dump_file, ") into %sCOPYSIGN (",
6815 : cst1_neg ? "-" : "");
6816 14 : print_generic_expr (dump_file, mul);
6817 14 : fprintf (dump_file, ", ");
6818 14 : print_generic_expr (dump_file, arg1);
6819 14 : fprintf (dump_file, "\n");
6820 : }
6821 16 : return;
6822 : }
6823 : break;
6824 : default:
6825 : break;
6826 : }
6827 : }
6828 : }
6829 : }
6830 : }
6831 :
6832 : /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6833 :
6834 : static void
6835 14358 : transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6836 : {
6837 14358 : tree rhs1;
6838 :
6839 14358 : if (dump_file && (dump_flags & TDF_DETAILS))
6840 : {
6841 28 : fprintf (dump_file, "Transforming ");
6842 28 : print_gimple_stmt (dump_file, stmt, 0);
6843 : }
6844 :
6845 14358 : rhs1 = gimple_assign_rhs1 (stmt);
6846 14358 : gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6847 14358 : update_stmt (stmt);
6848 14358 : remove_visited_stmt_chain (rhs1);
6849 :
6850 14358 : if (dump_file && (dump_flags & TDF_DETAILS))
6851 : {
6852 28 : fprintf (dump_file, " into ");
6853 28 : print_gimple_stmt (dump_file, stmt, 0);
6854 : }
6855 14358 : }
6856 :
6857 : /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6858 :
6859 : static void
6860 194 : transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6861 : tree rhs1, tree rhs2)
6862 : {
6863 194 : if (dump_file && (dump_flags & TDF_DETAILS))
6864 : {
6865 0 : fprintf (dump_file, "Transforming ");
6866 0 : print_gimple_stmt (dump_file, stmt, 0);
6867 : }
6868 :
6869 194 : gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6870 194 : update_stmt (gsi_stmt (*gsi));
6871 194 : remove_visited_stmt_chain (rhs1);
6872 :
6873 194 : if (dump_file && (dump_flags & TDF_DETAILS))
6874 : {
6875 0 : fprintf (dump_file, " into ");
6876 0 : print_gimple_stmt (dump_file, stmt, 0);
6877 : }
6878 194 : }
6879 :
6880 : /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs.
6881 : Put no-mult ops and mult ops alternately at the end of the queue, which is
6882 : conducive to generating more FMA and reducing the loss of FMA when breaking
6883 : the chain.
6884 : E.g.
6885 : a * b + c * d + e generates:
6886 :
6887 : _4 = c_9(D) * d_10(D);
6888 : _12 = .FMA (a_7(D), b_8(D), _4);
6889 : _11 = e_6(D) + _12;
6890 :
6891 : Rearrange ops to -> e + a * b + c * d generates:
6892 :
6893 : _4 = .FMA (c_7(D), d_8(D), _3);
6894 : _11 = .FMA (a_5(D), b_6(D), _4);
6895 :
6896 : Return the number of MULT_EXPRs in the chain. */
6897 : static int
6898 16814 : rank_ops_for_fma (vec<operand_entry *> *ops)
6899 : {
6900 16814 : operand_entry *oe;
6901 16814 : unsigned int i;
6902 16814 : unsigned int ops_length = ops->length ();
6903 16814 : auto_vec<operand_entry *> ops_mult;
6904 16814 : auto_vec<operand_entry *> ops_others;
6905 :
6906 56124 : FOR_EACH_VEC_ELT (*ops, i, oe)
6907 : {
6908 39310 : if (TREE_CODE (oe->op) == SSA_NAME)
6909 : {
6910 39294 : gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6911 39294 : if (is_gimple_assign (def_stmt))
6912 : {
6913 28123 : if (gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
6914 13468 : ops_mult.safe_push (oe);
6915 : /* A negate on the multiplication leads to FNMA. */
6916 14655 : else if (gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
6917 14655 : && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
6918 : {
6919 2807 : gimple *neg_def_stmt
6920 2807 : = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
6921 2807 : if (is_gimple_assign (neg_def_stmt)
6922 2804 : && gimple_bb (neg_def_stmt) == gimple_bb (def_stmt)
6923 5606 : && gimple_assign_rhs_code (neg_def_stmt) == MULT_EXPR)
6924 2760 : ops_mult.safe_push (oe);
6925 : else
6926 47 : ops_others.safe_push (oe);
6927 : }
6928 : else
6929 11848 : ops_others.safe_push (oe);
6930 : }
6931 : else
6932 11171 : ops_others.safe_push (oe);
6933 : }
6934 : else
6935 16 : ops_others.safe_push (oe);
6936 : }
6937 : /* 1. When ops_mult.length == 2, like the following case,
6938 :
6939 : a * b + c * d + e.
6940 :
6941 : we need to rearrange the ops.
6942 :
6943 : Putting ops that not def from mult in front can generate more FMAs.
6944 :
6945 : 2. If all ops are defined with mult, we don't need to rearrange them. */
6946 16814 : unsigned mult_num = ops_mult.length ();
6947 16814 : if (mult_num >= 2 && mult_num != ops_length)
6948 : {
6949 : /* Put no-mult ops and mult ops alternately at the end of the
6950 : queue, which is conducive to generating more FMA and reducing the
6951 : loss of FMA when breaking the chain. */
6952 5424 : ops->truncate (0);
6953 5424 : ops->splice (ops_mult);
6954 5424 : int j, opindex = ops->length ();
6955 5424 : int others_length = ops_others.length ();
6956 10853 : for (j = 0; j < others_length; j++)
6957 : {
6958 5429 : oe = ops_others.pop ();
6959 5429 : ops->quick_insert (opindex, oe);
6960 5429 : if (opindex > 0)
6961 5428 : opindex--;
6962 : }
6963 : }
6964 16814 : return mult_num;
6965 16814 : }
6966 : /* Reassociate expressions in basic block BB and its post-dominator as
6967 : children.
6968 :
6969 : Bubble up return status from maybe_optimize_range_tests. */
6970 :
6971 : static bool
6972 19224314 : reassociate_bb (basic_block bb)
6973 : {
6974 19224314 : gimple_stmt_iterator gsi;
6975 19224314 : gimple *stmt = last_nondebug_stmt (bb);
6976 19224314 : bool cfg_cleanup_needed = false;
6977 :
6978 19224314 : if (stmt && !gimple_visited_p (stmt))
6979 18602982 : cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6980 :
6981 19224314 : bool do_prev = false;
6982 38448628 : for (gsi = gsi_last_bb (bb);
6983 186284512 : !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6984 : {
6985 167060198 : do_prev = true;
6986 167060198 : stmt = gsi_stmt (gsi);
6987 :
6988 167060198 : if (is_gimple_assign (stmt)
6989 167060198 : && !stmt_could_throw_p (cfun, stmt))
6990 : {
6991 44816656 : tree lhs, rhs1, rhs2;
6992 44816656 : enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6993 :
6994 : /* If this was part of an already processed statement,
6995 : we don't need to touch it again. */
6996 44816656 : if (gimple_visited_p (stmt))
6997 : {
6998 : /* This statement might have become dead because of previous
6999 : reassociations. */
7000 427409 : if (has_zero_uses (gimple_get_lhs (stmt)))
7001 : {
7002 139400 : reassoc_remove_stmt (&gsi);
7003 139400 : release_defs (stmt);
7004 : /* We might end up removing the last stmt above which
7005 : places the iterator to the end of the sequence.
7006 : Reset it to the last stmt in this case and make sure
7007 : we don't do gsi_prev in that case. */
7008 139400 : if (gsi_end_p (gsi))
7009 : {
7010 403 : gsi = gsi_last_bb (bb);
7011 403 : do_prev = false;
7012 : }
7013 : }
7014 427409 : continue;
7015 : }
7016 :
7017 : /* If this is not a gimple binary expression, there is
7018 : nothing for us to do with it. */
7019 44389247 : if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
7020 32823611 : continue;
7021 :
7022 11565636 : lhs = gimple_assign_lhs (stmt);
7023 11565636 : rhs1 = gimple_assign_rhs1 (stmt);
7024 11565636 : rhs2 = gimple_assign_rhs2 (stmt);
7025 :
7026 : /* For non-bit or min/max operations we can't associate
7027 : all types. Verify that here. */
7028 16988234 : if ((rhs_code != BIT_IOR_EXPR
7029 11565636 : && rhs_code != BIT_AND_EXPR
7030 10598647 : && rhs_code != BIT_XOR_EXPR
7031 10598647 : && rhs_code != MIN_EXPR
7032 10462972 : && rhs_code != MAX_EXPR
7033 10348531 : && !can_reassociate_type_p (TREE_TYPE (lhs)))
7034 6147651 : || !can_reassociate_op_p (rhs1)
7035 17710198 : || !can_reassociate_op_p (rhs2))
7036 5422598 : continue;
7037 :
7038 6143038 : if (associative_tree_code (rhs_code))
7039 : {
7040 4606186 : auto_vec<operand_entry *> ops;
7041 4606186 : tree powi_result = NULL_TREE;
7042 4606186 : bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
7043 :
7044 : /* There may be no immediate uses left by the time we
7045 : get here because we may have eliminated them all. */
7046 4606186 : if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
7047 46324 : continue;
7048 :
7049 4559862 : gimple_set_visited (stmt, true);
7050 4559862 : linearize_expr_tree (&ops, stmt, true, true);
7051 4559862 : ops.qsort (sort_by_operand_rank);
7052 4559862 : int orig_len = ops.length ();
7053 4559862 : optimize_ops_list (rhs_code, &ops);
7054 9119724 : if (undistribute_ops_list (rhs_code, &ops,
7055 : loop_containing_stmt (stmt)))
7056 : {
7057 266 : ops.qsort (sort_by_operand_rank);
7058 266 : optimize_ops_list (rhs_code, &ops);
7059 : }
7060 9119724 : if (undistribute_bitref_for_vector (rhs_code, &ops,
7061 : loop_containing_stmt (stmt)))
7062 : {
7063 40 : ops.qsort (sort_by_operand_rank);
7064 40 : optimize_ops_list (rhs_code, &ops);
7065 : }
7066 4559862 : if (rhs_code == PLUS_EXPR
7067 4559862 : && transform_add_to_multiply (&ops))
7068 73 : ops.qsort (sort_by_operand_rank);
7069 :
7070 4559862 : if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
7071 : {
7072 952718 : if (is_vector)
7073 23964 : optimize_vec_cond_expr (rhs_code, &ops);
7074 : else
7075 928754 : optimize_range_tests (rhs_code, &ops, NULL);
7076 : }
7077 :
7078 4559862 : if (rhs_code == MULT_EXPR && !is_vector)
7079 : {
7080 1073911 : attempt_builtin_copysign (&ops);
7081 :
7082 1073911 : if (reassoc_insert_powi_p
7083 1073911 : && (flag_unsafe_math_optimizations
7084 443285 : || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
7085 486593 : powi_result = attempt_builtin_powi (stmt, &ops);
7086 : }
7087 :
7088 4559862 : operand_entry *last;
7089 4559862 : bool negate_result = false;
7090 4559862 : if (ops.length () > 1
7091 4559862 : && rhs_code == MULT_EXPR)
7092 : {
7093 1096955 : last = ops.last ();
7094 1096955 : if ((integer_minus_onep (last->op)
7095 1096746 : || real_minus_onep (last->op))
7096 237 : && !HONOR_SNANS (TREE_TYPE (lhs))
7097 1097192 : && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
7098 0 : || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
7099 : {
7100 237 : ops.pop ();
7101 237 : negate_result = true;
7102 : }
7103 : }
7104 :
7105 4559862 : tree new_lhs = lhs;
7106 : /* If the operand vector is now empty, all operands were
7107 : consumed by the __builtin_powi optimization. */
7108 4559862 : if (ops.length () == 0)
7109 1400 : transform_stmt_to_copy (&gsi, stmt, powi_result);
7110 4558462 : else if (ops.length () == 1)
7111 : {
7112 13152 : tree last_op = ops.last ()->op;
7113 :
7114 : /* If the stmt that defines operand has to be inserted, insert it
7115 : before the use. */
7116 13152 : if (ops.last ()->stmt_to_insert)
7117 0 : insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
7118 13152 : if (powi_result)
7119 194 : transform_stmt_to_multiply (&gsi, stmt, last_op,
7120 : powi_result);
7121 : else
7122 12958 : transform_stmt_to_copy (&gsi, stmt, last_op);
7123 : }
7124 : else
7125 : {
7126 4545310 : machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
7127 4545310 : int ops_num = ops.length ();
7128 4545310 : int width = 0;
7129 4545310 : int mult_num = 0;
7130 :
7131 : /* For binary bit operations, if there are at least 3
7132 : operands and the last operand in OPS is a constant,
7133 : move it to the front. This helps ensure that we generate
7134 : (X & Y) & C rather than (X & C) & Y. The former will
7135 : often match a canonical bit test when we get to RTL. */
7136 4545310 : if (ops.length () > 2
7137 184135 : && (rhs_code == BIT_AND_EXPR
7138 : || rhs_code == BIT_IOR_EXPR
7139 163250 : || rhs_code == BIT_XOR_EXPR)
7140 4570218 : && TREE_CODE (ops.last ()->op) == INTEGER_CST)
7141 1606 : std::swap (*ops[0], *ops[ops_num - 1]);
7142 :
7143 4545310 : optimization_type opt_type = bb_optimization_type (bb);
7144 :
7145 : /* If the target support FMA, rank_ops_for_fma will detect if
7146 : the chain has fmas and rearrange the ops if so. */
7147 4545310 : if (!reassoc_insert_powi_p
7148 2788433 : && direct_internal_fn_supported_p (IFN_FMA,
7149 2788433 : TREE_TYPE (lhs),
7150 : opt_type)
7151 4580016 : && (rhs_code == PLUS_EXPR || rhs_code == MINUS_EXPR))
7152 : {
7153 16814 : mult_num = rank_ops_for_fma (&ops);
7154 : }
7155 :
7156 : /* Only rewrite the expression tree to parallel in the
7157 : last reassoc pass to avoid useless work back-and-forth
7158 : with initial linearization. */
7159 4545310 : bool has_fma = mult_num >= 2 && mult_num != ops_num;
7160 4545310 : if (!reassoc_insert_powi_p
7161 2788433 : && ops.length () > 3
7162 4562704 : && (width = get_reassociation_width (&ops, mult_num, lhs,
7163 : rhs_code, mode))
7164 : > 1)
7165 : {
7166 1815 : if (dump_file && (dump_flags & TDF_DETAILS))
7167 2 : fprintf (dump_file,
7168 : "Width = %d was chosen for reassociation\n",
7169 : width);
7170 1815 : rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
7171 : width,
7172 : has_fma,
7173 : ops);
7174 : }
7175 : else
7176 : {
7177 : /* When there are three operands left, we want
7178 : to make sure the ones that get the double
7179 : binary op are chosen wisely. */
7180 4543495 : int len = ops.length ();
7181 4543495 : if (!reassoc_insert_powi_p
7182 2786618 : && len >= 3
7183 4672338 : && (!has_fma
7184 : /* width > 1 means ranking ops results in better
7185 : parallelism. Check current value to avoid
7186 : calling get_reassociation_width again. */
7187 5420 : || (width != 1
7188 5420 : && get_reassociation_width (
7189 : &ops, mult_num, lhs, rhs_code, mode)
7190 : > 1)))
7191 125654 : swap_ops_for_binary_stmt (ops, len - 3);
7192 :
7193 4543495 : new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
7194 4543495 : powi_result != NULL
7195 4543495 : || negate_result,
7196 : len != orig_len);
7197 : }
7198 :
7199 : /* If we combined some repeated factors into a
7200 : __builtin_powi call, multiply that result by the
7201 : reassociated operands. */
7202 4545310 : if (powi_result)
7203 : {
7204 129 : gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
7205 129 : tree type = TREE_TYPE (lhs);
7206 129 : tree target_ssa = make_temp_ssa_name (type, NULL,
7207 : "reassocpow");
7208 129 : gimple_set_lhs (lhs_stmt, target_ssa);
7209 129 : update_stmt (lhs_stmt);
7210 129 : if (lhs != new_lhs)
7211 : {
7212 129 : target_ssa = new_lhs;
7213 129 : new_lhs = lhs;
7214 : }
7215 129 : mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
7216 : powi_result, target_ssa);
7217 129 : gimple_set_location (mul_stmt, gimple_location (stmt));
7218 129 : gimple_set_uid (mul_stmt, gimple_uid (stmt));
7219 129 : gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
7220 : }
7221 : }
7222 :
7223 4559862 : if (negate_result)
7224 : {
7225 237 : stmt = SSA_NAME_DEF_STMT (lhs);
7226 237 : tree tmp = make_ssa_name (TREE_TYPE (lhs));
7227 237 : gimple_set_lhs (stmt, tmp);
7228 237 : if (lhs != new_lhs)
7229 227 : tmp = new_lhs;
7230 237 : gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
7231 : tmp);
7232 237 : gimple_set_uid (neg_stmt, gimple_uid (stmt));
7233 237 : gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
7234 237 : update_stmt (stmt);
7235 : }
7236 4606186 : }
7237 : }
7238 : }
7239 :
7240 19224314 : return cfg_cleanup_needed;
7241 : }
7242 :
7243 : /* Add jumps around shifts for range tests turned into bit tests.
7244 : For each SSA_NAME VAR we have code like:
7245 : VAR = ...; // final stmt of range comparison
7246 : // bit test here...;
7247 : OTHERVAR = ...; // final stmt of the bit test sequence
7248 : RES = VAR | OTHERVAR;
7249 : Turn the above into:
7250 : VAR = ...;
7251 : if (VAR != 0)
7252 : goto <l3>;
7253 : else
7254 : goto <l2>;
7255 : <l2>:
7256 : // bit test here...;
7257 : OTHERVAR = ...;
7258 : <l3>:
7259 : # RES = PHI<1(l1), OTHERVAR(l2)>; */
7260 :
7261 : static void
7262 2079310 : branch_fixup (void)
7263 : {
7264 2079310 : tree var;
7265 2079310 : unsigned int i;
7266 :
7267 2079709 : FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
7268 : {
7269 399 : gimple *def_stmt = SSA_NAME_DEF_STMT (var);
7270 399 : gimple *use_stmt;
7271 399 : use_operand_p use;
7272 399 : bool ok = single_imm_use (var, &use, &use_stmt);
7273 399 : gcc_assert (ok
7274 : && is_gimple_assign (use_stmt)
7275 : && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
7276 : && gimple_bb (def_stmt) == gimple_bb (use_stmt));
7277 :
7278 399 : basic_block cond_bb = gimple_bb (def_stmt);
7279 399 : basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
7280 399 : basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
7281 :
7282 399 : gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7283 399 : gimple *g = gimple_build_cond (NE_EXPR, var,
7284 399 : build_zero_cst (TREE_TYPE (var)),
7285 : NULL_TREE, NULL_TREE);
7286 399 : location_t loc = gimple_location (use_stmt);
7287 399 : gimple_set_location (g, loc);
7288 399 : gsi_insert_after (&gsi, g, GSI_NEW_STMT);
7289 :
7290 399 : edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
7291 399 : etrue->probability = profile_probability::even ();
7292 399 : edge efalse = find_edge (cond_bb, then_bb);
7293 399 : efalse->flags = EDGE_FALSE_VALUE;
7294 399 : efalse->probability -= etrue->probability;
7295 399 : then_bb->count -= etrue->count ();
7296 :
7297 399 : tree othervar = NULL_TREE;
7298 399 : if (gimple_assign_rhs1 (use_stmt) == var)
7299 280 : othervar = gimple_assign_rhs2 (use_stmt);
7300 119 : else if (gimple_assign_rhs2 (use_stmt) == var)
7301 : othervar = gimple_assign_rhs1 (use_stmt);
7302 : else
7303 0 : gcc_unreachable ();
7304 399 : tree lhs = gimple_assign_lhs (use_stmt);
7305 399 : gphi *phi = create_phi_node (lhs, merge_bb);
7306 399 : add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
7307 399 : add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
7308 399 : gsi = gsi_for_stmt (use_stmt);
7309 399 : gsi_remove (&gsi, true);
7310 :
7311 399 : set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
7312 399 : set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
7313 : }
7314 2079310 : reassoc_branch_fixups.release ();
7315 2079310 : }
7316 :
7317 : void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
7318 : void debug_ops_vector (vec<operand_entry *> ops);
7319 :
7320 : /* Dump the operand entry vector OPS to FILE. */
7321 :
7322 : void
7323 0 : dump_ops_vector (FILE *file, vec<operand_entry *> ops)
7324 : {
7325 0 : operand_entry *oe;
7326 0 : unsigned int i;
7327 :
7328 0 : FOR_EACH_VEC_ELT (ops, i, oe)
7329 : {
7330 0 : fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
7331 0 : print_generic_expr (file, oe->op);
7332 0 : fprintf (file, "\n");
7333 : }
7334 0 : }
7335 :
7336 : /* Dump the operand entry vector OPS to STDERR. */
7337 :
7338 : DEBUG_FUNCTION void
7339 0 : debug_ops_vector (vec<operand_entry *> ops)
7340 : {
7341 0 : dump_ops_vector (stderr, ops);
7342 0 : }
7343 :
7344 : /* Bubble up return status from reassociate_bb. */
7345 :
7346 : static bool
7347 2079310 : do_reassoc ()
7348 : {
7349 2079310 : bool cfg_cleanup_needed = false;
7350 2079310 : basic_block *worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
7351 :
7352 2079310 : unsigned sp = 0;
7353 2079310 : for (auto son = first_dom_son (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
7354 4158620 : son; son = next_dom_son (CDI_DOMINATORS, son))
7355 2079310 : worklist[sp++] = son;
7356 21303688 : while (sp)
7357 : {
7358 19224378 : basic_block bb = worklist[--sp];
7359 19224378 : break_up_subtract_bb (bb);
7360 19224378 : for (auto son = first_dom_son (CDI_DOMINATORS, bb);
7361 36369446 : son; son = next_dom_son (CDI_DOMINATORS, son))
7362 17145068 : worklist[sp++] = son;
7363 : }
7364 :
7365 10703868 : for (auto son = first_dom_son (CDI_POST_DOMINATORS,
7366 2079310 : EXIT_BLOCK_PTR_FOR_FN (cfun));
7367 10703868 : son; son = next_dom_son (CDI_POST_DOMINATORS, son))
7368 8624558 : worklist[sp++] = son;
7369 21303624 : while (sp)
7370 : {
7371 19224314 : basic_block bb = worklist[--sp];
7372 19224314 : cfg_cleanup_needed |= reassociate_bb (bb);
7373 19224314 : for (auto son = first_dom_son (CDI_POST_DOMINATORS, bb);
7374 29824070 : son; son = next_dom_son (CDI_POST_DOMINATORS, son))
7375 10599756 : worklist[sp++] = son;
7376 : }
7377 :
7378 2079310 : free (worklist);
7379 2079310 : return cfg_cleanup_needed;
7380 : }
7381 :
7382 : /* Initialize the reassociation pass. */
7383 :
7384 : static void
7385 2079310 : init_reassoc (void)
7386 : {
7387 2079310 : int i;
7388 2079310 : int64_t rank = 2;
7389 2079310 : int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
7390 :
7391 : /* Find the loops, so that we can prevent moving calculations in
7392 : them. */
7393 2079310 : loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
7394 :
7395 2079310 : memset (&reassociate_stats, 0, sizeof (reassociate_stats));
7396 :
7397 2079310 : next_operand_entry_id = 0;
7398 :
7399 : /* Reverse RPO (Reverse Post Order) will give us something where
7400 : deeper loops come later. */
7401 2079310 : pre_and_rev_post_order_compute (NULL, bbs, false);
7402 2079310 : bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
7403 2079310 : operand_rank = new hash_map<tree, int64_t>;
7404 :
7405 : /* Give each default definition a distinct rank. This includes
7406 : parameters and the static chain. Walk backwards over all
7407 : SSA names so that we get proper rank ordering according
7408 : to tree_swap_operands_p. */
7409 108031119 : for (i = num_ssa_names - 1; i > 0; --i)
7410 : {
7411 103872499 : tree name = ssa_name (i);
7412 177682307 : if (name && SSA_NAME_IS_DEFAULT_DEF (name))
7413 6143011 : insert_operand_rank (name, ++rank);
7414 : }
7415 :
7416 : /* Set up rank for each BB */
7417 21303624 : for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
7418 19224314 : bb_rank[bbs[i]] = ++rank << 16;
7419 :
7420 2079310 : free (bbs);
7421 2079310 : calculate_dominance_info (CDI_POST_DOMINATORS);
7422 2079310 : plus_negates = vNULL;
7423 2079310 : mark_ssa_maybe_undefs ();
7424 2079310 : }
7425 :
7426 : /* Cleanup after the reassociation pass, and print stats if
7427 : requested. */
7428 :
7429 : static void
7430 2079310 : fini_reassoc (void)
7431 : {
7432 2079310 : statistics_counter_event (cfun, "Linearized",
7433 : reassociate_stats.linearized);
7434 2079310 : statistics_counter_event (cfun, "Constants eliminated",
7435 : reassociate_stats.constants_eliminated);
7436 2079310 : statistics_counter_event (cfun, "Ops eliminated",
7437 : reassociate_stats.ops_eliminated);
7438 2079310 : statistics_counter_event (cfun, "Statements rewritten",
7439 : reassociate_stats.rewritten);
7440 2079310 : statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
7441 : reassociate_stats.pows_encountered);
7442 2079310 : statistics_counter_event (cfun, "Built-in powi calls created",
7443 : reassociate_stats.pows_created);
7444 :
7445 4158620 : delete operand_rank;
7446 2079310 : bitmap_clear (biased_names);
7447 2079310 : operand_entry_pool.release ();
7448 2079310 : free (bb_rank);
7449 2079310 : plus_negates.release ();
7450 2079310 : free_dominance_info (CDI_POST_DOMINATORS);
7451 2079310 : loop_optimizer_finalize ();
7452 2079310 : }
7453 :
7454 : /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7455 : insertion of __builtin_powi calls.
7456 :
7457 : Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7458 : optimization of a gimple conditional. Otherwise returns zero. */
7459 :
7460 : static unsigned int
7461 2079310 : execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
7462 : {
7463 2079310 : reassoc_insert_powi_p = insert_powi_p;
7464 2079310 : reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
7465 :
7466 2079310 : init_reassoc ();
7467 :
7468 2079310 : bool cfg_cleanup_needed;
7469 2079310 : cfg_cleanup_needed = do_reassoc ();
7470 2079310 : repropagate_negates ();
7471 2079310 : branch_fixup ();
7472 :
7473 2079310 : fini_reassoc ();
7474 2079310 : return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7475 : }
7476 :
7477 : namespace {
7478 :
7479 : const pass_data pass_data_reassoc =
7480 : {
7481 : GIMPLE_PASS, /* type */
7482 : "reassoc", /* name */
7483 : OPTGROUP_NONE, /* optinfo_flags */
7484 : TV_TREE_REASSOC, /* tv_id */
7485 : ( PROP_cfg | PROP_ssa ), /* properties_required */
7486 : 0, /* properties_provided */
7487 : 0, /* properties_destroyed */
7488 : 0, /* todo_flags_start */
7489 : TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7490 : };
7491 :
7492 : class pass_reassoc : public gimple_opt_pass
7493 : {
7494 : public:
7495 597656 : pass_reassoc (gcc::context *ctxt)
7496 1195312 : : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7497 : {}
7498 :
7499 : /* opt_pass methods: */
7500 298828 : opt_pass * clone () final override { return new pass_reassoc (m_ctxt); }
7501 597656 : void set_pass_param (unsigned int n, bool param) final override
7502 : {
7503 597656 : gcc_assert (n == 0);
7504 597656 : insert_powi_p = param;
7505 597656 : bias_loop_carried_phi_ranks_p = !param;
7506 597656 : }
7507 2079638 : bool gate (function *) final override { return flag_tree_reassoc != 0; }
7508 2079310 : unsigned int execute (function *) final override
7509 : {
7510 2079310 : return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7511 : }
7512 :
7513 : private:
7514 : /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7515 : point 3a in the pass header comment. */
7516 : bool insert_powi_p;
7517 : bool bias_loop_carried_phi_ranks_p;
7518 : }; // class pass_reassoc
7519 :
7520 : } // anon namespace
7521 :
7522 : gimple_opt_pass *
7523 298828 : make_pass_reassoc (gcc::context *ctxt)
7524 : {
7525 298828 : return new pass_reassoc (ctxt);
7526 : }
|