Line data Source code
1 : /* Global, SSA-based optimizations using mathematical identities.
2 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 : operations. These are common in sequences such as this one:
22 :
23 : modulus = sqrt(x*x + y*y + z*z);
24 : x = x / modulus;
25 : y = y / modulus;
26 : z = z / modulus;
27 :
28 : that can be optimized to
29 :
30 : modulus = sqrt(x*x + y*y + z*z);
31 : rmodulus = 1.0 / modulus;
32 : x = x * rmodulus;
33 : y = y * rmodulus;
34 : z = z * rmodulus;
35 :
36 : We do this for loop invariant divisors, and with this pass whenever
37 : we notice that a division has the same divisor multiple times.
38 :
39 : Of course, like in PRE, we don't insert a division if a dominator
40 : already has one. However, this cannot be done as an extension of
41 : PRE for several reasons.
42 :
43 : First of all, with some experiments it was found out that the
44 : transformation is not always useful if there are only two divisions
45 : by the same divisor. This is probably because modern processors
46 : can pipeline the divisions; on older, in-order processors it should
47 : still be effective to optimize two divisions by the same number.
48 : We make this a param, and it shall be called N in the remainder of
49 : this comment.
50 :
51 : Second, if trapping math is active, we have less freedom on where
52 : to insert divisions: we can only do so in basic blocks that already
53 : contain one. (If divisions don't trap, instead, we can insert
54 : divisions elsewhere, which will be in blocks that are common dominators
55 : of those that have the division).
56 :
57 : We really don't want to compute the reciprocal unless a division will
58 : be found. To do this, we won't insert the division in a basic block
59 : that has less than N divisions *post-dominating* it.
60 :
61 : The algorithm constructs a subset of the dominator tree, holding the
62 : blocks containing the divisions and the common dominators to them,
63 : and walk it twice. The first walk is in post-order, and it annotates
64 : each block with the number of divisions that post-dominate it: this
65 : gives information on where divisions can be inserted profitably.
66 : The second walk is in pre-order, and it inserts divisions as explained
67 : above, and replaces divisions by multiplications.
68 :
69 : In the best case, the cost of the pass is O(n_statements). In the
70 : worst-case, the cost is due to creating the dominator tree subset,
71 : with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 : for n_statements / n_basic_blocks statements. So, the amortized cost
73 : of creating the dominator tree subset is O(n_basic_blocks) and the
74 : worst-case cost of the pass is O(n_statements * n_basic_blocks).
75 :
76 : More practically, the cost will be small because there are few
77 : divisions, and they tend to be in the same basic block, so insert_bb
78 : is called very few times.
79 :
80 : If we did this using domwalk.cc, an efficient implementation would have
81 : to work on all the variables in a single pass, because we could not
82 : work on just a subset of the dominator tree, as we do now, and the
83 : cost would also be something like O(n_statements * n_basic_blocks).
84 : The data structures would be more complex in order to work on all the
85 : variables in a single pass. */
86 :
87 : #include "config.h"
88 : #include "system.h"
89 : #include "coretypes.h"
90 : #include "backend.h"
91 : #include "target.h"
92 : #include "rtl.h"
93 : #include "tree.h"
94 : #include "gimple.h"
95 : #include "predict.h"
96 : #include "alloc-pool.h"
97 : #include "tree-pass.h"
98 : #include "ssa.h"
99 : #include "optabs-tree.h"
100 : #include "gimple-pretty-print.h"
101 : #include "alias.h"
102 : #include "fold-const.h"
103 : #include "gimple-iterator.h"
104 : #include "gimple-fold.h"
105 : #include "gimplify.h"
106 : #include "gimplify-me.h"
107 : #include "stor-layout.h"
108 : #include "tree-cfg.h"
109 : #include "tree-dfa.h"
110 : #include "tree-ssa.h"
111 : #include "builtins.h"
112 : #include "internal-fn.h"
113 : #include "case-cfn-macros.h"
114 : #include "optabs-libfuncs.h"
115 : #include "tree-eh.h"
116 : #include "targhooks.h"
117 : #include "domwalk.h"
118 : #include "tree-ssa-math-opts.h"
119 : #include "dbgcnt.h"
120 : #include "cfghooks.h"
121 :
122 : /* This structure represents one basic block that either computes a
123 : division, or is a common dominator for basic block that compute a
124 : division. */
125 : struct occurrence {
126 : /* The basic block represented by this structure. */
127 : basic_block bb = basic_block();
128 :
129 : /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
130 : inserted in BB. */
131 : tree recip_def = tree();
132 :
133 : /* If non-NULL, the SSA_NAME holding the definition for a squared
134 : reciprocal inserted in BB. */
135 : tree square_recip_def = tree();
136 :
137 : /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
138 : was inserted in BB. */
139 : gimple *recip_def_stmt = nullptr;
140 :
141 : /* Pointer to a list of "struct occurrence"s for blocks dominated
142 : by BB. */
143 : struct occurrence *children = nullptr;
144 :
145 : /* Pointer to the next "struct occurrence"s in the list of blocks
146 : sharing a common dominator. */
147 : struct occurrence *next = nullptr;
148 :
149 : /* The number of divisions that are in BB before compute_merit. The
150 : number of divisions that are in BB or post-dominate it after
151 : compute_merit. */
152 : int num_divisions = 0;
153 :
154 : /* True if the basic block has a division, false if it is a common
155 : dominator for basic blocks that do. If it is false and trapping
156 : math is active, BB is not a candidate for inserting a reciprocal. */
157 : bool bb_has_division = false;
158 :
159 : /* Construct a struct occurrence for basic block BB, and whose
160 : children list is headed by CHILDREN. */
161 621 : occurrence (basic_block bb, struct occurrence *children)
162 621 : : bb (bb), children (children)
163 : {
164 621 : bb->aux = this;
165 : }
166 :
167 : /* Destroy a struct occurrence and remove it from its basic block. */
168 621 : ~occurrence ()
169 : {
170 621 : bb->aux = nullptr;
171 621 : }
172 :
173 : /* Allocate memory for a struct occurrence from OCC_POOL. */
174 : static void* operator new (size_t);
175 :
176 : /* Return memory for a struct occurrence to OCC_POOL. */
177 : static void operator delete (void*, size_t);
178 : };
179 :
180 : static struct
181 : {
182 : /* Number of 1.0/X ops inserted. */
183 : int rdivs_inserted;
184 :
185 : /* Number of 1.0/FUNC ops inserted. */
186 : int rfuncs_inserted;
187 : } reciprocal_stats;
188 :
189 : static struct
190 : {
191 : /* Number of cexpi calls inserted. */
192 : int inserted;
193 :
194 : /* Number of conversions removed. */
195 : int conv_removed;
196 :
197 : } sincos_stats;
198 :
199 : static struct
200 : {
201 : /* Number of widening multiplication ops inserted. */
202 : int widen_mults_inserted;
203 :
204 : /* Number of integer multiply-and-accumulate ops inserted. */
205 : int maccs_inserted;
206 :
207 : /* Number of fp fused multiply-add ops inserted. */
208 : int fmas_inserted;
209 :
210 : /* Number of divmod calls inserted. */
211 : int divmod_calls_inserted;
212 :
213 : /* Number of highpart multiplication ops inserted. */
214 : int highpart_mults_inserted;
215 : } widen_mul_stats;
216 :
217 : /* The instance of "struct occurrence" representing the highest
218 : interesting block in the dominator tree. */
219 : static struct occurrence *occ_head;
220 :
221 : /* Allocation pool for getting instances of "struct occurrence". */
222 : static object_allocator<occurrence> *occ_pool;
223 :
224 621 : void* occurrence::operator new (size_t n)
225 : {
226 621 : gcc_assert (n == sizeof(occurrence));
227 621 : return occ_pool->allocate_raw ();
228 : }
229 :
230 621 : void occurrence::operator delete (void *occ, size_t n)
231 : {
232 621 : gcc_assert (n == sizeof(occurrence));
233 621 : occ_pool->remove_raw (occ);
234 621 : }
235 :
236 : /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
237 : list of "struct occurrence"s, one per basic block, having IDOM as
238 : their common dominator.
239 :
240 : We try to insert NEW_OCC as deep as possible in the tree, and we also
241 : insert any other block that is a common dominator for BB and one
242 : block already in the tree. */
243 :
244 : static void
245 609 : insert_bb (struct occurrence *new_occ, basic_block idom,
246 : struct occurrence **p_head)
247 : {
248 615 : struct occurrence *occ, **p_occ;
249 :
250 641 : for (p_occ = p_head; (occ = *p_occ) != NULL; )
251 : {
252 32 : basic_block bb = new_occ->bb, occ_bb = occ->bb;
253 32 : basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
254 32 : if (dom == bb)
255 : {
256 : /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
257 : from its list. */
258 7 : *p_occ = occ->next;
259 7 : occ->next = new_occ->children;
260 7 : new_occ->children = occ;
261 :
262 : /* Try the next block (it may as well be dominated by BB). */
263 : }
264 :
265 25 : else if (dom == occ_bb)
266 : {
267 : /* OCC_BB dominates BB. Tail recurse to look deeper. */
268 6 : insert_bb (new_occ, dom, &occ->children);
269 6 : return;
270 : }
271 :
272 19 : else if (dom != idom)
273 : {
274 12 : gcc_assert (!dom->aux);
275 :
276 : /* There is a dominator between IDOM and BB, add it and make
277 : two children out of NEW_OCC and OCC. First, remove OCC from
278 : its list. */
279 12 : *p_occ = occ->next;
280 12 : new_occ->next = occ;
281 12 : occ->next = NULL;
282 :
283 : /* None of the previous blocks has DOM as a dominator: if we tail
284 : recursed, we would reexamine them uselessly. Just switch BB with
285 : DOM, and go on looking for blocks dominated by DOM. */
286 12 : new_occ = new occurrence (dom, new_occ);
287 : }
288 :
289 : else
290 : {
291 : /* Nothing special, go on with the next element. */
292 7 : p_occ = &occ->next;
293 : }
294 : }
295 :
296 : /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
297 609 : new_occ->next = *p_head;
298 609 : *p_head = new_occ;
299 : }
300 :
301 : /* Register that we found a division in BB.
302 : IMPORTANCE is a measure of how much weighting to give
303 : that division. Use IMPORTANCE = 2 to register a single
304 : division. If the division is going to be found multiple
305 : times use 1 (as it is with squares). */
306 :
307 : static inline void
308 710 : register_division_in (basic_block bb, int importance)
309 : {
310 710 : struct occurrence *occ;
311 :
312 710 : occ = (struct occurrence *) bb->aux;
313 710 : if (!occ)
314 : {
315 609 : occ = new occurrence (bb, NULL);
316 609 : insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
317 : }
318 :
319 710 : occ->bb_has_division = true;
320 710 : occ->num_divisions += importance;
321 710 : }
322 :
323 :
324 : /* Compute the number of divisions that postdominate each block in OCC and
325 : its children. */
326 :
327 : static void
328 31 : compute_merit (struct occurrence *occ)
329 : {
330 31 : struct occurrence *occ_child;
331 31 : basic_block dom = occ->bb;
332 :
333 60 : for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
334 : {
335 29 : basic_block bb;
336 29 : if (occ_child->children)
337 5 : compute_merit (occ_child);
338 :
339 29 : if (flag_exceptions)
340 6 : bb = single_noncomplex_succ (dom);
341 : else
342 : bb = dom;
343 :
344 29 : if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
345 12 : occ->num_divisions += occ_child->num_divisions;
346 : }
347 31 : }
348 :
349 :
350 : /* Return whether USE_STMT is a floating-point division by DEF. */
351 : static inline bool
352 347627 : is_division_by (gimple *use_stmt, tree def)
353 : {
354 347627 : return is_gimple_assign (use_stmt)
355 237740 : && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
356 1293 : && gimple_assign_rhs2 (use_stmt) == def
357 : /* Do not recognize x / x as valid division, as we are getting
358 : confused later by replacing all immediate uses x in such
359 : a stmt. */
360 887 : && gimple_assign_rhs1 (use_stmt) != def
361 348514 : && !stmt_can_throw_internal (cfun, use_stmt);
362 : }
363 :
364 : /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
365 : static inline bool
366 343870 : is_mult_by (gimple *use_stmt, tree def, tree a)
367 : {
368 343870 : if (gimple_code (use_stmt) == GIMPLE_ASSIGN
369 343870 : && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
370 : {
371 78084 : tree op0 = gimple_assign_rhs1 (use_stmt);
372 78084 : tree op1 = gimple_assign_rhs2 (use_stmt);
373 :
374 78084 : return (op0 == def && op1 == a)
375 78084 : || (op0 == a && op1 == def);
376 : }
377 : return 0;
378 : }
379 :
380 : /* Return whether USE_STMT is DEF * DEF. */
381 : static inline bool
382 343825 : is_square_of (gimple *use_stmt, tree def)
383 : {
384 5 : return is_mult_by (use_stmt, def, def);
385 : }
386 :
387 : /* Return whether USE_STMT is a floating-point division by
388 : DEF * DEF. */
389 : static inline bool
390 181 : is_division_by_square (gimple *use_stmt, tree def)
391 : {
392 181 : if (gimple_code (use_stmt) == GIMPLE_ASSIGN
393 174 : && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
394 7 : && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
395 188 : && !stmt_can_throw_internal (cfun, use_stmt))
396 : {
397 7 : tree denominator = gimple_assign_rhs2 (use_stmt);
398 7 : if (TREE_CODE (denominator) == SSA_NAME)
399 7 : return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
400 : }
401 : return 0;
402 : }
403 :
404 : /* Walk the subset of the dominator tree rooted at OCC, setting the
405 : RECIP_DEF field to a definition of 1.0 / DEF that can be used in
406 : the given basic block. The field may be left NULL, of course,
407 : if it is not possible or profitable to do the optimization.
408 :
409 : DEF_BSI is an iterator pointing at the statement defining DEF.
410 : If RECIP_DEF is set, a dominator already has a computation that can
411 : be used.
412 :
413 : If should_insert_square_recip is set, then this also inserts
414 : the square of the reciprocal immediately after the definition
415 : of the reciprocal. */
416 :
417 : static void
418 55 : insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
419 : tree def, tree recip_def, tree square_recip_def,
420 : int should_insert_square_recip, int threshold)
421 : {
422 55 : tree type;
423 55 : gassign *new_stmt, *new_square_stmt;
424 55 : gimple_stmt_iterator gsi;
425 55 : struct occurrence *occ_child;
426 :
427 55 : if (!recip_def
428 39 : && (occ->bb_has_division || !flag_trapping_math)
429 : /* Divide by two as all divisions are counted twice in
430 : the costing loop. */
431 35 : && occ->num_divisions / 2 >= threshold)
432 : {
433 : /* Make a variable with the replacement and substitute it. */
434 24 : type = TREE_TYPE (def);
435 24 : recip_def = create_tmp_reg (type, "reciptmp");
436 24 : new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
437 : build_one_cst (type), def);
438 :
439 24 : if (should_insert_square_recip)
440 : {
441 4 : square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
442 4 : new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
443 : recip_def, recip_def);
444 : }
445 :
446 24 : if (occ->bb_has_division)
447 : {
448 : /* Case 1: insert before an existing division. */
449 21 : gsi = gsi_after_labels (occ->bb);
450 201 : while (!gsi_end_p (gsi)
451 201 : && (!is_division_by (gsi_stmt (gsi), def))
452 382 : && (!is_division_by_square (gsi_stmt (gsi), def)))
453 180 : gsi_next (&gsi);
454 :
455 21 : gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
456 21 : if (should_insert_square_recip)
457 3 : gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
458 : }
459 3 : else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
460 : {
461 : /* Case 2: insert right after the definition. Note that this will
462 : never happen if the definition statement can throw, because in
463 : that case the sole successor of the statement's basic block will
464 : dominate all the uses as well. */
465 2 : gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
466 2 : if (should_insert_square_recip)
467 1 : gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
468 : }
469 : else
470 : {
471 : /* Case 3: insert in a basic block not containing defs/uses. */
472 1 : gsi = gsi_after_labels (occ->bb);
473 1 : gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
474 1 : if (should_insert_square_recip)
475 0 : gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
476 : }
477 :
478 24 : reciprocal_stats.rdivs_inserted++;
479 :
480 24 : occ->recip_def_stmt = new_stmt;
481 : }
482 :
483 55 : occ->recip_def = recip_def;
484 55 : occ->square_recip_def = square_recip_def;
485 84 : for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
486 29 : insert_reciprocals (def_gsi, occ_child, def, recip_def,
487 : square_recip_def, should_insert_square_recip,
488 : threshold);
489 55 : }
490 :
491 : /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
492 : Take as argument the use for (x * x). */
493 : static inline void
494 4 : replace_reciprocal_squares (use_operand_p use_p)
495 : {
496 4 : gimple *use_stmt = USE_STMT (use_p);
497 4 : basic_block bb = gimple_bb (use_stmt);
498 4 : struct occurrence *occ = (struct occurrence *) bb->aux;
499 :
500 8 : if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
501 8 : && occ->recip_def)
502 : {
503 4 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
504 4 : gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
505 4 : gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
506 4 : SET_USE (use_p, occ->square_recip_def);
507 4 : fold_stmt_inplace (&gsi);
508 4 : update_stmt (use_stmt);
509 : }
510 4 : }
511 :
512 :
513 : /* Replace the division at USE_P with a multiplication by the reciprocal, if
514 : possible. */
515 :
516 : static inline void
517 115 : replace_reciprocal (use_operand_p use_p)
518 : {
519 115 : gimple *use_stmt = USE_STMT (use_p);
520 115 : basic_block bb = gimple_bb (use_stmt);
521 115 : struct occurrence *occ = (struct occurrence *) bb->aux;
522 :
523 115 : if (optimize_bb_for_speed_p (bb)
524 115 : && occ->recip_def && use_stmt != occ->recip_def_stmt)
525 : {
526 80 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
527 80 : gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
528 80 : SET_USE (use_p, occ->recip_def);
529 80 : fold_stmt_inplace (&gsi);
530 80 : update_stmt (use_stmt);
531 : }
532 115 : }
533 :
534 :
535 : /* Free OCC and return one more "struct occurrence" to be freed. */
536 :
537 : static struct occurrence *
538 621 : free_bb (struct occurrence *occ)
539 : {
540 621 : struct occurrence *child, *next;
541 :
542 : /* First get the two pointers hanging off OCC. */
543 621 : next = occ->next;
544 621 : child = occ->children;
545 621 : delete occ;
546 :
547 : /* Now ensure that we don't recurse unless it is necessary. */
548 621 : if (!child)
549 : return next;
550 : else
551 : {
552 24 : while (next)
553 3 : next = free_bb (next);
554 :
555 : return child;
556 : }
557 : }
558 :
559 : /* Transform sequences like
560 : t = sqrt (a)
561 : x = 1.0 / t;
562 : r1 = x * x;
563 : r2 = a * x;
564 : into:
565 : t = sqrt (a)
566 : r1 = 1.0 / a;
567 : r2 = t;
568 : x = r1 * r2;
569 : depending on the uses of x, r1, r2. This removes one multiplication and
570 : allows the sqrt and division operations to execute in parallel.
571 : DEF_GSI is the gsi of the initial division by sqrt that defines
572 : DEF (x in the example above). */
573 :
574 : static void
575 572 : optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
576 : {
577 572 : gimple *use_stmt;
578 572 : imm_use_iterator use_iter;
579 572 : gimple *stmt = gsi_stmt (*def_gsi);
580 572 : tree x = def;
581 572 : tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
582 572 : tree div_rhs1 = gimple_assign_rhs1 (stmt);
583 :
584 572 : if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
585 567 : || TREE_CODE (div_rhs1) != REAL_CST
586 742 : || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
587 478 : return;
588 :
589 94 : gcall *sqrt_stmt
590 602 : = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
591 :
592 42 : if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
593 : return;
594 :
595 42 : switch (gimple_call_combined_fn (sqrt_stmt))
596 : {
597 31 : CASE_CFN_SQRT:
598 31 : CASE_CFN_SQRT_FN:
599 31 : break;
600 :
601 : default:
602 : return;
603 : }
604 31 : tree a = gimple_call_arg (sqrt_stmt, 0);
605 :
606 : /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
607 :
608 : /* Statements that use x in x * x. */
609 43 : auto_vec<gimple *> sqr_stmts;
610 : /* Statements that use x in a * x. */
611 12 : auto_vec<gimple *> mult_stmts;
612 31 : bool has_other_use = false;
613 31 : bool mult_on_main_path = false;
614 :
615 89 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
616 : {
617 58 : if (is_gimple_debug (use_stmt))
618 1 : continue;
619 57 : if (is_square_of (use_stmt, x))
620 : {
621 12 : sqr_stmts.safe_push (use_stmt);
622 12 : if (gimple_bb (use_stmt) == gimple_bb (stmt))
623 17 : mult_on_main_path = true;
624 : }
625 45 : else if (is_mult_by (use_stmt, x, a))
626 : {
627 14 : mult_stmts.safe_push (use_stmt);
628 14 : if (gimple_bb (use_stmt) == gimple_bb (stmt))
629 17 : mult_on_main_path = true;
630 : }
631 : else
632 : has_other_use = true;
633 31 : }
634 :
635 : /* In the x * x and a * x cases we just rewire stmt operands or
636 : remove multiplications. In the has_other_use case we introduce
637 : a multiplication so make sure we don't introduce a multiplication
638 : on a path where there was none. */
639 31 : if (has_other_use && !mult_on_main_path)
640 19 : return;
641 :
642 12 : if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
643 : return;
644 :
645 : /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
646 : to be able to compose it from the sqr and mult cases. */
647 41 : if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
648 : return;
649 :
650 12 : if (dump_file)
651 : {
652 10 : fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
653 10 : print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
654 10 : print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
655 10 : fprintf (dump_file, "\n");
656 : }
657 :
658 12 : bool delete_div = !has_other_use;
659 12 : tree sqr_ssa_name = NULL_TREE;
660 12 : if (!sqr_stmts.is_empty ())
661 : {
662 : /* r1 = x * x. Transform the original
663 : x = 1.0 / t
664 : into
665 : tmp1 = 1.0 / a
666 : r1 = tmp1. */
667 :
668 10 : sqr_ssa_name
669 10 : = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
670 :
671 10 : if (dump_file)
672 : {
673 10 : fprintf (dump_file, "Replacing original division\n");
674 10 : print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
675 10 : fprintf (dump_file, "with new division\n");
676 : }
677 10 : stmt
678 10 : = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
679 : gimple_assign_rhs1 (stmt), a);
680 10 : gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
681 10 : gsi_remove (def_gsi, true);
682 10 : *def_gsi = gsi_for_stmt (stmt);
683 10 : fold_stmt_inplace (def_gsi);
684 10 : update_stmt (stmt);
685 :
686 10 : if (dump_file)
687 10 : print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
688 :
689 20 : delete_div = false;
690 : gimple *sqr_stmt;
691 : unsigned int i;
692 20 : FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
693 : {
694 10 : gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
695 10 : gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
696 10 : update_stmt (sqr_stmt);
697 : }
698 : }
699 12 : if (!mult_stmts.is_empty ())
700 : {
701 : /* r2 = a * x. Transform this into:
702 : r2 = t (The original sqrt (a)). */
703 : unsigned int i;
704 24 : gimple *mult_stmt = NULL;
705 24 : FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
706 : {
707 12 : gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
708 :
709 12 : if (dump_file)
710 : {
711 10 : fprintf (dump_file, "Replacing squaring multiplication\n");
712 10 : print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
713 10 : fprintf (dump_file, "with assignment\n");
714 : }
715 12 : gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
716 12 : fold_stmt_inplace (&gsi2);
717 12 : update_stmt (mult_stmt);
718 12 : if (dump_file)
719 10 : print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
720 : }
721 : }
722 :
723 12 : if (has_other_use)
724 : {
725 : /* Using the two temporaries tmp1, tmp2 from above
726 : the original x is now:
727 : x = tmp1 * tmp2. */
728 10 : gcc_assert (orig_sqrt_ssa_name);
729 10 : gcc_assert (sqr_ssa_name);
730 :
731 10 : gimple *new_stmt
732 10 : = gimple_build_assign (x, MULT_EXPR,
733 : orig_sqrt_ssa_name, sqr_ssa_name);
734 10 : gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
735 10 : update_stmt (stmt);
736 : }
737 2 : else if (delete_div)
738 : {
739 : /* Remove the original division. */
740 2 : gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
741 2 : gsi_remove (&gsi2, true);
742 2 : release_defs (stmt);
743 : }
744 : else
745 0 : release_ssa_name (x);
746 : }
747 :
748 : /* Look for floating-point divisions among DEF's uses, and try to
749 : replace them by multiplications with the reciprocal. Add
750 : as many statements computing the reciprocal as needed.
751 :
752 : DEF must be a GIMPLE register of a floating-point type. */
753 :
754 : static void
755 208956 : execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
756 : {
757 208956 : use_operand_p use_p, square_use_p;
758 208956 : imm_use_iterator use_iter, square_use_iter;
759 208956 : tree square_def;
760 208956 : struct occurrence *occ;
761 208956 : int count = 0;
762 208956 : int threshold;
763 208956 : int square_recip_count = 0;
764 208956 : int sqrt_recip_count = 0;
765 :
766 208956 : gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
767 208956 : threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
768 :
769 : /* If DEF is a square (x * x), count the number of divisions by x.
770 : If there are more divisions by x than by (DEF * DEF), prefer to optimize
771 : the reciprocal of x instead of DEF. This improves cases like:
772 : def = x * x
773 : t0 = a / def
774 : t1 = b / def
775 : t2 = c / x
776 : Reciprocal optimization of x results in 1 division rather than 2 or 3. */
777 208956 : gimple *def_stmt = SSA_NAME_DEF_STMT (def);
778 :
779 208956 : if (is_gimple_assign (def_stmt)
780 162876 : && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
781 39565 : && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
782 248444 : && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
783 : {
784 665 : tree op0 = gimple_assign_rhs1 (def_stmt);
785 :
786 3379 : FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
787 : {
788 2049 : gimple *use_stmt = USE_STMT (use_p);
789 2049 : if (is_division_by (use_stmt, op0))
790 14 : sqrt_recip_count++;
791 665 : }
792 : }
793 :
794 761668 : FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
795 : {
796 343756 : gimple *use_stmt = USE_STMT (use_p);
797 343756 : if (is_division_by (use_stmt, def))
798 : {
799 644 : register_division_in (gimple_bb (use_stmt), 2);
800 644 : count++;
801 : }
802 :
803 343756 : if (is_square_of (use_stmt, def))
804 : {
805 1338 : square_def = gimple_assign_lhs (use_stmt);
806 4156 : FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
807 : {
808 1480 : gimple *square_use_stmt = USE_STMT (square_use_p);
809 1480 : if (is_division_by (square_use_stmt, square_def))
810 : {
811 : /* This is executed twice for each division by a square. */
812 66 : register_division_in (gimple_bb (square_use_stmt), 1);
813 66 : square_recip_count++;
814 : }
815 1338 : }
816 : }
817 208956 : }
818 :
819 : /* Square reciprocals were counted twice above. */
820 208956 : square_recip_count /= 2;
821 :
822 : /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
823 208956 : if (sqrt_recip_count > square_recip_count)
824 14 : goto out;
825 :
826 : /* Do the expensive part only if we can hope to optimize something. */
827 208942 : if (count + square_recip_count >= threshold && count >= 1)
828 : {
829 26 : gimple *use_stmt;
830 52 : for (occ = occ_head; occ; occ = occ->next)
831 : {
832 26 : compute_merit (occ);
833 26 : insert_reciprocals (def_gsi, occ, def, NULL, NULL,
834 : square_recip_count, threshold);
835 : }
836 :
837 185 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
838 : {
839 133 : if (is_division_by (use_stmt, def))
840 : {
841 345 : FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
842 115 : replace_reciprocal (use_p);
843 : }
844 23 : else if (square_recip_count > 0 && is_square_of (use_stmt, def))
845 : {
846 16 : FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
847 : {
848 : /* Find all uses of the square that are divisions and
849 : * replace them by multiplications with the inverse. */
850 8 : imm_use_iterator square_iterator;
851 8 : gimple *powmult_use_stmt = USE_STMT (use_p);
852 8 : tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
853 :
854 24 : FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
855 : square_iterator, powmult_def_name)
856 24 : FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
857 : {
858 8 : gimple *powmult_use_stmt = USE_STMT (square_use_p);
859 8 : if (is_division_by (powmult_use_stmt, powmult_def_name))
860 4 : replace_reciprocal_squares (square_use_p);
861 8 : }
862 : }
863 : }
864 26 : }
865 : }
866 :
867 208916 : out:
868 209574 : for (occ = occ_head; occ; )
869 618 : occ = free_bb (occ);
870 :
871 208956 : occ_head = NULL;
872 208956 : }
873 :
874 : /* Return an internal function that implements the reciprocal of CALL,
875 : or IFN_LAST if there is no such function that the target supports. */
876 :
877 : internal_fn
878 131 : internal_fn_reciprocal (gcall *call)
879 : {
880 131 : internal_fn ifn;
881 :
882 131 : switch (gimple_call_combined_fn (call))
883 : {
884 115 : CASE_CFN_SQRT:
885 115 : CASE_CFN_SQRT_FN:
886 115 : ifn = IFN_RSQRT;
887 115 : break;
888 :
889 : default:
890 : return IFN_LAST;
891 : }
892 :
893 115 : tree_pair types = direct_internal_fn_types (ifn, call);
894 115 : if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
895 : return IFN_LAST;
896 :
897 : return ifn;
898 : }
899 :
900 : /* Go through all the floating-point SSA_NAMEs, and call
901 : execute_cse_reciprocals_1 on each of them. */
902 : namespace {
903 :
904 : const pass_data pass_data_cse_reciprocals =
905 : {
906 : GIMPLE_PASS, /* type */
907 : "recip", /* name */
908 : OPTGROUP_NONE, /* optinfo_flags */
909 : TV_TREE_RECIP, /* tv_id */
910 : PROP_ssa, /* properties_required */
911 : 0, /* properties_provided */
912 : 0, /* properties_destroyed */
913 : 0, /* todo_flags_start */
914 : TODO_update_ssa, /* todo_flags_finish */
915 : };
916 :
917 : class pass_cse_reciprocals : public gimple_opt_pass
918 : {
919 : public:
920 287872 : pass_cse_reciprocals (gcc::context *ctxt)
921 575744 : : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
922 : {}
923 :
924 : /* opt_pass methods: */
925 1038027 : bool gate (function *) final override
926 : {
927 1038027 : return optimize && flag_reciprocal_math;
928 : }
929 : unsigned int execute (function *) final override;
930 :
931 : }; // class pass_cse_reciprocals
932 :
933 : unsigned int
934 8745 : pass_cse_reciprocals::execute (function *fun)
935 : {
936 8745 : basic_block bb;
937 8745 : tree arg;
938 :
939 8745 : occ_pool = new object_allocator<occurrence> ("dominators for recip");
940 :
941 8745 : memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
942 8745 : calculate_dominance_info (CDI_DOMINATORS);
943 8745 : calculate_dominance_info (CDI_POST_DOMINATORS);
944 :
945 8745 : if (flag_checking)
946 94532 : FOR_EACH_BB_FN (bb, fun)
947 85787 : gcc_assert (!bb->aux);
948 :
949 21574 : for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
950 20311 : if (FLOAT_TYPE_P (TREE_TYPE (arg))
951 13916 : && is_gimple_reg (arg))
952 : {
953 6433 : tree name = ssa_default_def (fun, arg);
954 6433 : if (name)
955 5438 : execute_cse_reciprocals_1 (NULL, name);
956 : }
957 :
958 94532 : FOR_EACH_BB_FN (bb, fun)
959 : {
960 85787 : tree def;
961 :
962 194341 : for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
963 108554 : gsi_next (&gsi))
964 : {
965 108554 : gphi *phi = gsi.phi ();
966 108554 : def = PHI_RESULT (phi);
967 108554 : if (! virtual_operand_p (def)
968 108554 : && FLOAT_TYPE_P (TREE_TYPE (def)))
969 30537 : execute_cse_reciprocals_1 (NULL, def);
970 : }
971 :
972 1379809 : for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
973 1294022 : gsi_next (&gsi))
974 : {
975 1294022 : gimple *stmt = gsi_stmt (gsi);
976 :
977 2588044 : if (gimple_has_lhs (stmt)
978 813043 : && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
979 772039 : && FLOAT_TYPE_P (TREE_TYPE (def))
980 196062 : && TREE_CODE (def) == SSA_NAME)
981 : {
982 172981 : execute_cse_reciprocals_1 (&gsi, def);
983 172981 : stmt = gsi_stmt (gsi);
984 172981 : if (flag_unsafe_math_optimizations
985 172952 : && is_gimple_assign (stmt)
986 162849 : && gimple_assign_lhs (stmt) == def
987 162847 : && !stmt_can_throw_internal (cfun, stmt)
988 335784 : && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
989 572 : optimize_recip_sqrt (&gsi, def);
990 : }
991 : }
992 :
993 85787 : if (optimize_bb_for_size_p (bb))
994 5349 : continue;
995 :
996 : /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
997 1352694 : for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
998 1272256 : gsi_next (&gsi))
999 : {
1000 1272256 : gimple *stmt = gsi_stmt (gsi);
1001 :
1002 1272256 : if (is_gimple_assign (stmt)
1003 1272256 : && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
1004 : {
1005 603 : tree arg1 = gimple_assign_rhs2 (stmt);
1006 603 : gimple *stmt1;
1007 :
1008 603 : if (TREE_CODE (arg1) != SSA_NAME)
1009 5 : continue;
1010 :
1011 598 : stmt1 = SSA_NAME_DEF_STMT (arg1);
1012 :
1013 598 : if (is_gimple_call (stmt1)
1014 598 : && gimple_call_lhs (stmt1))
1015 : {
1016 131 : bool fail;
1017 131 : imm_use_iterator ui;
1018 131 : use_operand_p use_p;
1019 131 : tree fndecl = NULL_TREE;
1020 :
1021 131 : gcall *call = as_a <gcall *> (stmt1);
1022 131 : internal_fn ifn = internal_fn_reciprocal (call);
1023 131 : if (ifn == IFN_LAST)
1024 : {
1025 66 : fndecl = gimple_call_fndecl (call);
1026 132 : if (!fndecl
1027 66 : || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1028 68 : continue;
1029 0 : fndecl = targetm.builtin_reciprocal (fndecl);
1030 0 : if (!fndecl)
1031 0 : continue;
1032 : }
1033 :
1034 : /* Check that all uses of the SSA name are divisions,
1035 : otherwise replacing the defining statement will do
1036 : the wrong thing. */
1037 65 : fail = false;
1038 195 : FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1039 : {
1040 67 : gimple *stmt2 = USE_STMT (use_p);
1041 67 : if (is_gimple_debug (stmt2))
1042 0 : continue;
1043 67 : if (!is_gimple_assign (stmt2)
1044 67 : || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1045 65 : || gimple_assign_rhs1 (stmt2) == arg1
1046 132 : || gimple_assign_rhs2 (stmt2) != arg1)
1047 : {
1048 : fail = true;
1049 : break;
1050 : }
1051 65 : }
1052 65 : if (fail)
1053 2 : continue;
1054 :
1055 63 : gimple_replace_ssa_lhs (call, arg1);
1056 63 : reset_flow_sensitive_info (arg1);
1057 63 : if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1058 : {
1059 46 : auto_vec<tree, 4> args;
1060 46 : for (unsigned int i = 0;
1061 92 : i < gimple_call_num_args (call); i++)
1062 46 : args.safe_push (gimple_call_arg (call, i));
1063 46 : gcall *stmt2;
1064 46 : if (ifn == IFN_LAST)
1065 0 : stmt2 = gimple_build_call_vec (fndecl, args);
1066 : else
1067 46 : stmt2 = gimple_build_call_internal_vec (ifn, args);
1068 46 : gimple_call_set_lhs (stmt2, arg1);
1069 46 : gimple_move_vops (stmt2, call);
1070 46 : gimple_call_set_nothrow (stmt2,
1071 46 : gimple_call_nothrow_p (call));
1072 46 : gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1073 46 : gsi_replace (&gsi2, stmt2, true);
1074 46 : }
1075 : else
1076 : {
1077 17 : if (ifn == IFN_LAST)
1078 0 : gimple_call_set_fndecl (call, fndecl);
1079 : else
1080 17 : gimple_call_set_internal_fn (call, ifn);
1081 17 : update_stmt (call);
1082 : }
1083 63 : reciprocal_stats.rfuncs_inserted++;
1084 :
1085 189 : FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1086 : {
1087 63 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1088 63 : gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1089 63 : fold_stmt_inplace (&gsi);
1090 63 : update_stmt (stmt);
1091 63 : }
1092 : }
1093 : }
1094 : }
1095 : }
1096 :
1097 8745 : statistics_counter_event (fun, "reciprocal divs inserted",
1098 : reciprocal_stats.rdivs_inserted);
1099 8745 : statistics_counter_event (fun, "reciprocal functions inserted",
1100 : reciprocal_stats.rfuncs_inserted);
1101 :
1102 8745 : free_dominance_info (CDI_DOMINATORS);
1103 8745 : free_dominance_info (CDI_POST_DOMINATORS);
1104 17490 : delete occ_pool;
1105 8745 : return 0;
1106 : }
1107 :
1108 : } // anon namespace
1109 :
1110 : gimple_opt_pass *
1111 287872 : make_pass_cse_reciprocals (gcc::context *ctxt)
1112 : {
1113 287872 : return new pass_cse_reciprocals (ctxt);
1114 : }
1115 :
1116 : /* If NAME is the result of a type conversion, look for other
1117 : equivalent dominating or dominated conversions, and replace all
1118 : uses with the earliest dominating name, removing the redundant
1119 : conversions. Return the prevailing name. */
1120 :
1121 : static tree
1122 1031 : execute_cse_conv_1 (tree name, bool *cfg_changed)
1123 : {
1124 1031 : if (SSA_NAME_IS_DEFAULT_DEF (name)
1125 1031 : || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1126 : return name;
1127 :
1128 939 : gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1129 :
1130 939 : if (!gimple_assign_cast_p (def_stmt))
1131 : return name;
1132 :
1133 136 : tree src = gimple_assign_rhs1 (def_stmt);
1134 :
1135 136 : if (TREE_CODE (src) != SSA_NAME)
1136 : return name;
1137 :
1138 136 : imm_use_iterator use_iter;
1139 136 : gimple *use_stmt;
1140 :
1141 : /* Find the earliest dominating def. */
1142 657 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1143 : {
1144 763 : if (use_stmt == def_stmt
1145 385 : || !gimple_assign_cast_p (use_stmt))
1146 763 : continue;
1147 :
1148 7 : tree lhs = gimple_assign_lhs (use_stmt);
1149 :
1150 7 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1151 14 : || (gimple_assign_rhs1 (use_stmt)
1152 7 : != gimple_assign_rhs1 (def_stmt))
1153 14 : || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1154 0 : continue;
1155 :
1156 7 : bool use_dominates;
1157 7 : if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1158 : {
1159 0 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1160 0 : while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1161 0 : gsi_next (&gsi);
1162 0 : use_dominates = !gsi_end_p (gsi);
1163 : }
1164 7 : else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1165 7 : gimple_bb (def_stmt)))
1166 : use_dominates = false;
1167 7 : else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1168 7 : gimple_bb (use_stmt)))
1169 : use_dominates = true;
1170 : else
1171 4 : continue;
1172 :
1173 0 : if (use_dominates)
1174 : {
1175 : std::swap (name, lhs);
1176 : std::swap (def_stmt, use_stmt);
1177 : }
1178 136 : }
1179 :
1180 : /* Now go through all uses of SRC again, replacing the equivalent
1181 : dominated conversions. We may replace defs that were not
1182 : dominated by the then-prevailing defs when we first visited
1183 : them. */
1184 657 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1185 : {
1186 763 : if (use_stmt == def_stmt
1187 385 : || !gimple_assign_cast_p (use_stmt))
1188 378 : continue;
1189 :
1190 7 : tree lhs = gimple_assign_lhs (use_stmt);
1191 :
1192 7 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1193 14 : || (gimple_assign_rhs1 (use_stmt)
1194 7 : != gimple_assign_rhs1 (def_stmt))
1195 14 : || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1196 0 : continue;
1197 :
1198 7 : basic_block use_bb = gimple_bb (use_stmt);
1199 7 : if (gimple_bb (def_stmt) == use_bb
1200 7 : || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (def_stmt)))
1201 : {
1202 3 : sincos_stats.conv_removed++;
1203 :
1204 3 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1205 3 : replace_uses_by (lhs, name);
1206 3 : if (gsi_remove (&gsi, true)
1207 3 : && gimple_purge_dead_eh_edges (use_bb))
1208 3 : *cfg_changed = true;
1209 3 : release_defs (use_stmt);
1210 : }
1211 136 : }
1212 :
1213 136 : return name;
1214 : }
1215 :
1216 : /* Records an occurrence at statement USE_STMT in the vector of trees
1217 : STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1218 : is not yet initialized. Returns true if the occurrence was pushed on
1219 : the vector. Adjusts *TOP_BB to be the basic block dominating all
1220 : statements in the vector. */
1221 :
1222 : static bool
1223 1241 : maybe_record_sincos (vec<gimple *> *stmts,
1224 : basic_block *top_bb, gimple *use_stmt)
1225 : {
1226 1241 : basic_block use_bb = gimple_bb (use_stmt);
1227 1241 : if (*top_bb
1228 1241 : && (*top_bb == use_bb
1229 66 : || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1230 151 : stmts->safe_push (use_stmt);
1231 1090 : else if (!*top_bb
1232 1090 : || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1233 : {
1234 1070 : stmts->safe_push (use_stmt);
1235 1070 : *top_bb = use_bb;
1236 : }
1237 : else
1238 : return false;
1239 :
1240 : return true;
1241 : }
1242 :
1243 : /* Look for sin, cos and cexpi calls with the same argument NAME and
1244 : create a single call to cexpi CSEing the result in this case.
1245 : We first walk over all immediate uses of the argument collecting
1246 : statements that we can CSE in a vector and in a second pass replace
1247 : the statement rhs with a REALPART or IMAGPART expression on the
1248 : result of the cexpi call we insert before the use statement that
1249 : dominates all other candidates. */
1250 :
1251 : static bool
1252 1031 : execute_cse_sincos_1 (tree name)
1253 : {
1254 1031 : gimple_stmt_iterator gsi;
1255 1031 : imm_use_iterator use_iter;
1256 1031 : tree fndecl, res, type = NULL_TREE;
1257 1031 : gimple *def_stmt, *use_stmt, *stmt;
1258 1031 : int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1259 1031 : auto_vec<gimple *> stmts;
1260 1031 : basic_block top_bb = NULL;
1261 1031 : int i;
1262 1031 : bool cfg_changed = false;
1263 :
1264 1031 : name = execute_cse_conv_1 (name, &cfg_changed);
1265 :
1266 5000 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1267 : {
1268 2940 : if (gimple_code (use_stmt) != GIMPLE_CALL
1269 2940 : || !gimple_call_lhs (use_stmt))
1270 1670 : continue;
1271 :
1272 1270 : switch (gimple_call_combined_fn (use_stmt))
1273 : {
1274 451 : CASE_CFN_COS:
1275 451 : seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1276 451 : break;
1277 :
1278 784 : CASE_CFN_SIN:
1279 784 : seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1280 784 : break;
1281 :
1282 6 : CASE_CFN_CEXPI:
1283 6 : seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1284 6 : break;
1285 :
1286 29 : default:;
1287 29 : continue;
1288 : }
1289 :
1290 1241 : auto stmt_cfn = gimple_call_combined_fn (use_stmt);
1291 1241 : tree t = mathfn_built_in_type (stmt_cfn);
1292 1241 : if (!t)
1293 : {
1294 : /* It is possible to get IFN_{SIN,COS} calls, for which
1295 : mathfn_built_in_type will return NULL. Those are normally only
1296 : present for vector operations. We won't be able to CSE those
1297 : at the moment. */
1298 2 : gcc_checking_assert (internal_fn_p (stmt_cfn));
1299 : return false;
1300 : }
1301 :
1302 1239 : if (!type)
1303 : {
1304 1029 : type = t;
1305 1029 : t = TREE_TYPE (name);
1306 : }
1307 : /* This checks that NAME has the right type in the first round,
1308 : and, in subsequent rounds, that the built_in type is the same
1309 : type, or a compatible type. */
1310 1239 : if (type != t && !types_compatible_p (type, t))
1311 : return false;
1312 2 : }
1313 1029 : if (seen_cos + seen_sin + seen_cexpi <= 1)
1314 : return false;
1315 :
1316 : /* Simply insert cexpi at the beginning of top_bb but not earlier than
1317 : the name def statement. */
1318 190 : fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1319 190 : if (!fndecl)
1320 : return false;
1321 146 : stmt = gimple_build_call (fndecl, 1, name);
1322 146 : res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1323 146 : gimple_call_set_lhs (stmt, res);
1324 :
1325 146 : def_stmt = SSA_NAME_DEF_STMT (name);
1326 146 : if (!SSA_NAME_IS_DEFAULT_DEF (name)
1327 121 : && gimple_code (def_stmt) != GIMPLE_PHI
1328 259 : && gimple_bb (def_stmt) == top_bb)
1329 : {
1330 113 : gsi = gsi_for_stmt (def_stmt);
1331 113 : gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1332 : }
1333 : else
1334 : {
1335 33 : gsi = gsi_after_labels (top_bb);
1336 33 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1337 : }
1338 146 : sincos_stats.inserted++;
1339 :
1340 : /* And adjust the recorded old call sites. */
1341 438 : for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1342 : {
1343 292 : tree rhs = NULL;
1344 :
1345 292 : switch (gimple_call_combined_fn (use_stmt))
1346 : {
1347 146 : CASE_CFN_COS:
1348 146 : rhs = fold_build1 (REALPART_EXPR, type, res);
1349 146 : break;
1350 :
1351 146 : CASE_CFN_SIN:
1352 146 : rhs = fold_build1 (IMAGPART_EXPR, type, res);
1353 146 : break;
1354 :
1355 : CASE_CFN_CEXPI:
1356 : rhs = res;
1357 : break;
1358 :
1359 0 : default:;
1360 0 : gcc_unreachable ();
1361 : }
1362 :
1363 : /* Replace call with a copy. */
1364 292 : stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1365 :
1366 292 : gsi = gsi_for_stmt (use_stmt);
1367 292 : gsi_replace (&gsi, stmt, true);
1368 292 : if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1369 0 : cfg_changed = true;
1370 : }
1371 :
1372 146 : return cfg_changed;
1373 1031 : }
1374 :
1375 : /* To evaluate powi(x,n), the floating point value x raised to the
1376 : constant integer exponent n, we use a hybrid algorithm that
1377 : combines the "window method" with look-up tables. For an
1378 : introduction to exponentiation algorithms and "addition chains",
1379 : see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1380 : "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1381 : 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1382 : Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1383 :
1384 : /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1385 : multiplications to inline before calling the system library's pow
1386 : function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1387 : so this default never requires calling pow, powf or powl. */
1388 :
1389 : #ifndef POWI_MAX_MULTS
1390 : #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1391 : #endif
1392 :
1393 : /* The size of the "optimal power tree" lookup table. All
1394 : exponents less than this value are simply looked up in the
1395 : powi_table below. This threshold is also used to size the
1396 : cache of pseudo registers that hold intermediate results. */
1397 : #define POWI_TABLE_SIZE 256
1398 :
1399 : /* The size, in bits of the window, used in the "window method"
1400 : exponentiation algorithm. This is equivalent to a radix of
1401 : (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1402 : #define POWI_WINDOW_SIZE 3
1403 :
1404 : /* The following table is an efficient representation of an
1405 : "optimal power tree". For each value, i, the corresponding
1406 : value, j, in the table states than an optimal evaluation
1407 : sequence for calculating pow(x,i) can be found by evaluating
1408 : pow(x,j)*pow(x,i-j). An optimal power tree for the first
1409 : 100 integers is given in Knuth's "Seminumerical algorithms". */
1410 :
1411 : static const unsigned char powi_table[POWI_TABLE_SIZE] =
1412 : {
1413 : 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1414 : 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1415 : 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1416 : 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1417 : 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1418 : 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1419 : 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1420 : 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1421 : 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1422 : 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1423 : 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1424 : 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1425 : 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1426 : 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1427 : 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1428 : 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1429 : 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1430 : 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1431 : 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1432 : 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1433 : 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1434 : 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1435 : 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1436 : 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1437 : 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1438 : 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1439 : 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1440 : 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1441 : 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1442 : 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1443 : 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1444 : 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1445 : };
1446 :
1447 :
1448 : /* Return the number of multiplications required to calculate
1449 : powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1450 : subroutine of powi_cost. CACHE is an array indicating
1451 : which exponents have already been calculated. */
1452 :
1453 : static int
1454 1120 : powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1455 : {
1456 : /* If we've already calculated this exponent, then this evaluation
1457 : doesn't require any additional multiplications. */
1458 1861 : if (cache[n])
1459 1120 : return 0;
1460 :
1461 741 : cache[n] = true;
1462 741 : return powi_lookup_cost (n - powi_table[n], cache)
1463 741 : + powi_lookup_cost (powi_table[n], cache) + 1;
1464 : }
1465 :
1466 : /* Return the number of multiplications required to calculate
1467 : powi(x,n) for an arbitrary x, given the exponent N. This
1468 : function needs to be kept in sync with powi_as_mults below. */
1469 :
1470 : static int
1471 384 : powi_cost (HOST_WIDE_INT n)
1472 : {
1473 384 : bool cache[POWI_TABLE_SIZE];
1474 384 : unsigned HOST_WIDE_INT digit;
1475 384 : unsigned HOST_WIDE_INT val;
1476 384 : int result;
1477 :
1478 384 : if (n == 0)
1479 : return 0;
1480 :
1481 : /* Ignore the reciprocal when calculating the cost. */
1482 379 : val = absu_hwi (n);
1483 :
1484 : /* Initialize the exponent cache. */
1485 379 : memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1486 379 : cache[1] = true;
1487 :
1488 379 : result = 0;
1489 :
1490 379 : while (val >= POWI_TABLE_SIZE)
1491 : {
1492 0 : if (val & 1)
1493 : {
1494 0 : digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1495 0 : result += powi_lookup_cost (digit, cache)
1496 0 : + POWI_WINDOW_SIZE + 1;
1497 0 : val >>= POWI_WINDOW_SIZE;
1498 : }
1499 : else
1500 : {
1501 0 : val >>= 1;
1502 0 : result++;
1503 : }
1504 : }
1505 :
1506 379 : return result + powi_lookup_cost (val, cache);
1507 : }
1508 :
1509 : /* Recursive subroutine of powi_as_mults. This function takes the
1510 : array, CACHE, of already calculated exponents and an exponent N and
1511 : returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1512 :
1513 : static tree
1514 6113 : powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1515 : unsigned HOST_WIDE_INT n, tree *cache)
1516 : {
1517 6113 : tree op0, op1, ssa_target;
1518 6113 : unsigned HOST_WIDE_INT digit;
1519 6113 : gassign *mult_stmt;
1520 :
1521 6113 : if (n < POWI_TABLE_SIZE && cache[n])
1522 : return cache[n];
1523 :
1524 2166 : ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1525 :
1526 2166 : if (n < POWI_TABLE_SIZE)
1527 : {
1528 2163 : cache[n] = ssa_target;
1529 2163 : op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1530 2163 : op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1531 : }
1532 3 : else if (n & 1)
1533 : {
1534 1 : digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1535 1 : op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1536 1 : op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1537 : }
1538 : else
1539 : {
1540 2 : op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1541 2 : op1 = op0;
1542 : }
1543 :
1544 2166 : mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1545 2166 : gimple_set_location (mult_stmt, loc);
1546 2166 : gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1547 :
1548 2166 : return ssa_target;
1549 : }
1550 :
1551 : /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1552 : This function needs to be kept in sync with powi_cost above. */
1553 :
1554 : tree
1555 1783 : powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1556 : tree arg0, HOST_WIDE_INT n)
1557 : {
1558 1783 : tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1559 1783 : gassign *div_stmt;
1560 1783 : tree target;
1561 :
1562 1783 : if (n == 0)
1563 0 : return build_one_cst (type);
1564 :
1565 1783 : memset (cache, 0, sizeof (cache));
1566 1783 : cache[1] = arg0;
1567 :
1568 1783 : result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache);
1569 1783 : if (n >= 0)
1570 : return result;
1571 :
1572 : /* If the original exponent was negative, reciprocate the result. */
1573 8 : target = make_temp_ssa_name (type, NULL, "powmult");
1574 8 : div_stmt = gimple_build_assign (target, RDIV_EXPR,
1575 : build_real (type, dconst1), result);
1576 8 : gimple_set_location (div_stmt, loc);
1577 8 : gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1578 :
1579 8 : return target;
1580 : }
1581 :
1582 : /* ARG0 and N are the two arguments to a powi builtin in GSI with
1583 : location info LOC. If the arguments are appropriate, create an
1584 : equivalent sequence of statements prior to GSI using an optimal
1585 : number of multiplications, and return an expession holding the
1586 : result. */
1587 :
1588 : static tree
1589 630 : gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1590 : tree arg0, HOST_WIDE_INT n)
1591 : {
1592 630 : if ((n >= -1 && n <= 2)
1593 630 : || (optimize_function_for_speed_p (cfun)
1594 351 : && powi_cost (n) <= POWI_MAX_MULTS))
1595 622 : return powi_as_mults (gsi, loc, arg0, n);
1596 :
1597 : return NULL_TREE;
1598 : }
1599 :
1600 : /* Build a gimple call statement that calls FN with argument ARG.
1601 : Set the lhs of the call statement to a fresh SSA name. Insert the
1602 : statement prior to GSI's current position, and return the fresh
1603 : SSA name. */
1604 :
1605 : static tree
1606 44 : build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1607 : tree fn, tree arg)
1608 : {
1609 44 : gcall *call_stmt;
1610 44 : tree ssa_target;
1611 :
1612 44 : call_stmt = gimple_build_call (fn, 1, arg);
1613 44 : ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1614 44 : gimple_set_lhs (call_stmt, ssa_target);
1615 44 : gimple_set_location (call_stmt, loc);
1616 44 : gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1617 :
1618 44 : return ssa_target;
1619 : }
1620 :
1621 : /* Build a gimple binary operation with the given CODE and arguments
1622 : ARG0, ARG1, assigning the result to a new SSA name for variable
1623 : TARGET. Insert the statement prior to GSI's current position, and
1624 : return the fresh SSA name.*/
1625 :
1626 : static tree
1627 905 : build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1628 : const char *name, enum tree_code code,
1629 : tree arg0, tree arg1)
1630 : {
1631 905 : tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1632 905 : gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1633 905 : gimple_set_location (stmt, loc);
1634 905 : gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1635 905 : return result;
1636 : }
1637 :
1638 : /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1639 : prior to GSI's current position, and return the fresh SSA name. */
1640 :
1641 : static tree
1642 16932 : build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1643 : tree type, tree val)
1644 : {
1645 0 : return gimple_convert (gsi, true, GSI_SAME_STMT, loc, type, val);
1646 : }
1647 :
1648 : struct pow_synth_sqrt_info
1649 : {
1650 : bool *factors;
1651 : unsigned int deepest;
1652 : unsigned int num_mults;
1653 : };
1654 :
1655 : /* Return true iff the real value C can be represented as a
1656 : sum of powers of 0.5 up to N. That is:
1657 : C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1658 : Record in INFO the various parameters of the synthesis algorithm such
1659 : as the factors a[i], the maximum 0.5 power and the number of
1660 : multiplications that will be required. */
1661 :
1662 : bool
1663 33 : representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1664 : struct pow_synth_sqrt_info *info)
1665 : {
1666 33 : REAL_VALUE_TYPE factor = dconsthalf;
1667 33 : REAL_VALUE_TYPE remainder = c;
1668 :
1669 33 : info->deepest = 0;
1670 33 : info->num_mults = 0;
1671 33 : memset (info->factors, 0, n * sizeof (bool));
1672 :
1673 97 : for (unsigned i = 0; i < n; i++)
1674 : {
1675 90 : REAL_VALUE_TYPE res;
1676 :
1677 : /* If something inexact happened bail out now. */
1678 90 : if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1679 26 : return false;
1680 :
1681 : /* We have hit zero. The number is representable as a sum
1682 : of powers of 0.5. */
1683 90 : if (real_equal (&res, &dconst0))
1684 : {
1685 26 : info->factors[i] = true;
1686 26 : info->deepest = i + 1;
1687 26 : return true;
1688 : }
1689 64 : else if (!REAL_VALUE_NEGATIVE (res))
1690 : {
1691 29 : remainder = res;
1692 29 : info->factors[i] = true;
1693 29 : info->num_mults++;
1694 : }
1695 : else
1696 35 : info->factors[i] = false;
1697 :
1698 64 : real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1699 : }
1700 : return false;
1701 : }
1702 :
1703 : /* Return the tree corresponding to FN being applied
1704 : to ARG N times at GSI and LOC.
1705 : Look up previous results from CACHE if need be.
1706 : cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1707 :
1708 : static tree
1709 63 : get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1710 : tree fn, location_t loc, tree *cache)
1711 : {
1712 63 : tree res = cache[n];
1713 63 : if (!res)
1714 : {
1715 40 : tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1716 40 : res = build_and_insert_call (gsi, loc, fn, prev);
1717 40 : cache[n] = res;
1718 : }
1719 :
1720 63 : return res;
1721 : }
1722 :
1723 : /* Print to STREAM the repeated application of function FNAME to ARG
1724 : N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1725 : "foo (foo (x))". */
1726 :
1727 : static void
1728 36 : print_nested_fn (FILE* stream, const char *fname, const char* arg,
1729 : unsigned int n)
1730 : {
1731 36 : if (n == 0)
1732 10 : fprintf (stream, "%s", arg);
1733 : else
1734 : {
1735 26 : fprintf (stream, "%s (", fname);
1736 26 : print_nested_fn (stream, fname, arg, n - 1);
1737 26 : fprintf (stream, ")");
1738 : }
1739 36 : }
1740 :
1741 : /* Print to STREAM the fractional sequence of sqrt chains
1742 : applied to ARG, described by INFO. Used for the dump file. */
1743 :
1744 : static void
1745 7 : dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1746 : struct pow_synth_sqrt_info *info)
1747 : {
1748 29 : for (unsigned int i = 0; i < info->deepest; i++)
1749 : {
1750 22 : bool is_set = info->factors[i];
1751 22 : if (is_set)
1752 : {
1753 10 : print_nested_fn (stream, "sqrt", arg, i + 1);
1754 10 : if (i != info->deepest - 1)
1755 3 : fprintf (stream, " * ");
1756 : }
1757 : }
1758 7 : }
1759 :
1760 : /* Print to STREAM a representation of raising ARG to an integer
1761 : power N. Used for the dump file. */
1762 :
1763 : static void
1764 7 : dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1765 : {
1766 7 : if (n > 1)
1767 3 : fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1768 4 : else if (n == 1)
1769 3 : fprintf (stream, "%s", arg);
1770 7 : }
1771 :
1772 : /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1773 : square roots. Place at GSI and LOC. Limit the maximum depth
1774 : of the sqrt chains to MAX_DEPTH. Return the tree holding the
1775 : result of the expanded sequence or NULL_TREE if the expansion failed.
1776 :
1777 : This routine assumes that ARG1 is a real number with a fractional part
1778 : (the integer exponent case will have been handled earlier in
1779 : gimple_expand_builtin_pow).
1780 :
1781 : For ARG1 > 0.0:
1782 : * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1783 : FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1784 : FRAC_PART == ARG1 - WHOLE_PART:
1785 : Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1786 : POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1787 : if it can be expressed as such, that is if FRAC_PART satisfies:
1788 : FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1789 : where integer a[i] is either 0 or 1.
1790 :
1791 : Example:
1792 : POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1793 : --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1794 :
1795 : For ARG1 < 0.0 there are two approaches:
1796 : * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1797 : is calculated as above.
1798 :
1799 : Example:
1800 : POW (x, -5.625) == 1.0 / POW (x, 5.625)
1801 : --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1802 :
1803 : * (B) : WHOLE_PART := - ceil (abs (ARG1))
1804 : FRAC_PART := ARG1 - WHOLE_PART
1805 : and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1806 : Example:
1807 : POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1808 : --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1809 :
1810 : For ARG1 < 0.0 we choose between (A) and (B) depending on
1811 : how many multiplications we'd have to do.
1812 : So, for the example in (B): POW (x, -5.875), if we were to
1813 : follow algorithm (A) we would produce:
1814 : 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1815 : which contains more multiplications than approach (B).
1816 :
1817 : Hopefully, this approach will eliminate potentially expensive POW library
1818 : calls when unsafe floating point math is enabled and allow the compiler to
1819 : further optimise the multiplies, square roots and divides produced by this
1820 : function. */
1821 :
1822 : static tree
1823 25 : expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1824 : tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1825 : {
1826 25 : tree type = TREE_TYPE (arg0);
1827 25 : machine_mode mode = TYPE_MODE (type);
1828 25 : tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1829 25 : bool one_over = true;
1830 :
1831 25 : if (!sqrtfn)
1832 : return NULL_TREE;
1833 :
1834 25 : if (TREE_CODE (arg1) != REAL_CST)
1835 : return NULL_TREE;
1836 :
1837 25 : REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1838 :
1839 25 : gcc_assert (max_depth > 0);
1840 25 : tree *cache = XALLOCAVEC (tree, max_depth + 1);
1841 :
1842 25 : struct pow_synth_sqrt_info synth_info;
1843 25 : synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1844 25 : synth_info.deepest = 0;
1845 25 : synth_info.num_mults = 0;
1846 :
1847 25 : bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1848 25 : REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1849 :
1850 : /* The whole and fractional parts of exp. */
1851 25 : REAL_VALUE_TYPE whole_part;
1852 25 : REAL_VALUE_TYPE frac_part;
1853 :
1854 25 : real_floor (&whole_part, mode, &exp);
1855 25 : real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1856 :
1857 :
1858 25 : REAL_VALUE_TYPE ceil_whole = dconst0;
1859 25 : REAL_VALUE_TYPE ceil_fract = dconst0;
1860 :
1861 25 : if (neg_exp)
1862 : {
1863 10 : real_ceil (&ceil_whole, mode, &exp);
1864 10 : real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1865 : }
1866 :
1867 25 : if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1868 : return NULL_TREE;
1869 :
1870 : /* Check whether it's more profitable to not use 1.0 / ... */
1871 18 : if (neg_exp)
1872 : {
1873 8 : struct pow_synth_sqrt_info alt_synth_info;
1874 8 : alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1875 8 : alt_synth_info.deepest = 0;
1876 8 : alt_synth_info.num_mults = 0;
1877 :
1878 8 : if (representable_as_half_series_p (ceil_fract, max_depth,
1879 : &alt_synth_info)
1880 8 : && alt_synth_info.deepest <= synth_info.deepest
1881 16 : && alt_synth_info.num_mults < synth_info.num_mults)
1882 : {
1883 2 : whole_part = ceil_whole;
1884 2 : frac_part = ceil_fract;
1885 2 : synth_info.deepest = alt_synth_info.deepest;
1886 2 : synth_info.num_mults = alt_synth_info.num_mults;
1887 2 : memcpy (synth_info.factors, alt_synth_info.factors,
1888 : (max_depth + 1) * sizeof (bool));
1889 2 : one_over = false;
1890 : }
1891 : }
1892 :
1893 18 : HOST_WIDE_INT n = real_to_integer (&whole_part);
1894 18 : REAL_VALUE_TYPE cint;
1895 18 : real_from_integer (&cint, VOIDmode, n, SIGNED);
1896 :
1897 18 : if (!real_identical (&whole_part, &cint))
1898 : return NULL_TREE;
1899 :
1900 18 : if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1901 : return NULL_TREE;
1902 :
1903 18 : memset (cache, 0, (max_depth + 1) * sizeof (tree));
1904 :
1905 18 : tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1906 :
1907 : /* Calculate the integer part of the exponent. */
1908 18 : if (n > 1)
1909 : {
1910 6 : integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1911 6 : if (!integer_res)
1912 : return NULL_TREE;
1913 : }
1914 :
1915 18 : if (dump_file)
1916 : {
1917 7 : char string[64];
1918 :
1919 7 : real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1920 7 : fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1921 :
1922 7 : if (neg_exp)
1923 : {
1924 2 : if (one_over)
1925 : {
1926 1 : fprintf (dump_file, "1.0 / (");
1927 1 : dump_integer_part (dump_file, "x", n);
1928 1 : if (n > 0)
1929 1 : fprintf (dump_file, " * ");
1930 1 : dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1931 1 : fprintf (dump_file, ")");
1932 : }
1933 : else
1934 : {
1935 1 : dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1936 1 : fprintf (dump_file, " / (");
1937 1 : dump_integer_part (dump_file, "x", n);
1938 1 : fprintf (dump_file, ")");
1939 : }
1940 : }
1941 : else
1942 : {
1943 5 : dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1944 5 : if (n > 0)
1945 4 : fprintf (dump_file, " * ");
1946 5 : dump_integer_part (dump_file, "x", n);
1947 : }
1948 :
1949 7 : fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1950 : }
1951 :
1952 :
1953 18 : tree fract_res = NULL_TREE;
1954 18 : cache[0] = arg0;
1955 :
1956 : /* Calculate the fractional part of the exponent. */
1957 58 : for (unsigned i = 0; i < synth_info.deepest; i++)
1958 : {
1959 40 : if (synth_info.factors[i])
1960 : {
1961 23 : tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1962 :
1963 23 : if (!fract_res)
1964 : fract_res = sqrt_chain;
1965 :
1966 : else
1967 5 : fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1968 : fract_res, sqrt_chain);
1969 : }
1970 : }
1971 :
1972 18 : tree res = NULL_TREE;
1973 :
1974 18 : if (neg_exp)
1975 : {
1976 8 : if (one_over)
1977 : {
1978 6 : if (n > 0)
1979 4 : res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1980 : fract_res, integer_res);
1981 : else
1982 : res = fract_res;
1983 :
1984 6 : res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1985 : build_real (type, dconst1), res);
1986 : }
1987 : else
1988 : {
1989 2 : res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1990 : fract_res, integer_res);
1991 : }
1992 : }
1993 : else
1994 10 : res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1995 : fract_res, integer_res);
1996 : return res;
1997 : }
1998 :
1999 : /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2000 : with location info LOC. If possible, create an equivalent and
2001 : less expensive sequence of statements prior to GSI, and return an
2002 : expession holding the result. */
2003 :
2004 : static tree
2005 600 : gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2006 : tree arg0, tree arg1)
2007 : {
2008 600 : REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2009 600 : REAL_VALUE_TYPE c2, dconst3;
2010 600 : HOST_WIDE_INT n;
2011 600 : tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2012 600 : machine_mode mode;
2013 600 : bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2014 600 : bool hw_sqrt_exists, c_is_int, c2_is_int;
2015 :
2016 600 : dconst1_4 = dconst1;
2017 600 : SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2018 :
2019 : /* If the exponent isn't a constant, there's nothing of interest
2020 : to be done. */
2021 600 : if (TREE_CODE (arg1) != REAL_CST)
2022 : return NULL_TREE;
2023 :
2024 : /* Don't perform the operation if flag_signaling_nans is on
2025 : and the operand is a signaling NaN. */
2026 362 : if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2027 362 : && ((TREE_CODE (arg0) == REAL_CST
2028 0 : && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2029 1 : || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2030 0 : return NULL_TREE;
2031 :
2032 362 : if (flag_errno_math)
2033 : return NULL_TREE;
2034 :
2035 : /* If the exponent is equivalent to an integer, expand to an optimal
2036 : multiplication sequence when profitable. */
2037 75 : c = TREE_REAL_CST (arg1);
2038 75 : n = real_to_integer (&c);
2039 75 : real_from_integer (&cint, VOIDmode, n, SIGNED);
2040 75 : c_is_int = real_identical (&c, &cint);
2041 :
2042 75 : if (c_is_int
2043 75 : && ((n >= -1 && n <= 2)
2044 21 : || (flag_unsafe_math_optimizations
2045 11 : && speed_p
2046 11 : && powi_cost (n) <= POWI_MAX_MULTS)))
2047 30 : return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2048 :
2049 : /* Attempt various optimizations using sqrt and cbrt. */
2050 45 : type = TREE_TYPE (arg0);
2051 45 : mode = TYPE_MODE (type);
2052 45 : sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2053 :
2054 : /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2055 : unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2056 : sqrt(-0) = -0. */
2057 45 : if (sqrtfn
2058 45 : && real_equal (&c, &dconsthalf)
2059 52 : && !HONOR_SIGNED_ZEROS (mode))
2060 0 : return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2061 :
2062 45 : hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2063 :
2064 : /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2065 : optimizations since 1./3. is not exactly representable. If x
2066 : is negative and finite, the correct value of pow(x,1./3.) is
2067 : a NaN with the "invalid" exception raised, because the value
2068 : of 1./3. actually has an even denominator. The correct value
2069 : of cbrt(x) is a negative real value. */
2070 45 : cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2071 45 : dconst1_3 = real_value_truncate (mode, dconst_third ());
2072 :
2073 45 : if (flag_unsafe_math_optimizations
2074 25 : && cbrtfn
2075 25 : && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2076 70 : && real_equal (&c, &dconst1_3))
2077 0 : return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2078 :
2079 : /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2080 : if we don't have a hardware sqrt insn. */
2081 45 : dconst1_6 = dconst1_3;
2082 45 : SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2083 :
2084 45 : if (flag_unsafe_math_optimizations
2085 25 : && sqrtfn
2086 25 : && cbrtfn
2087 25 : && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2088 : && speed_p
2089 25 : && hw_sqrt_exists
2090 70 : && real_equal (&c, &dconst1_6))
2091 : {
2092 : /* sqrt(x) */
2093 0 : sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2094 :
2095 : /* cbrt(sqrt(x)) */
2096 0 : return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2097 : }
2098 :
2099 :
2100 : /* Attempt to expand the POW as a product of square root chains.
2101 : Expand the 0.25 case even when otpimising for size. */
2102 45 : if (flag_unsafe_math_optimizations
2103 25 : && sqrtfn
2104 25 : && hw_sqrt_exists
2105 25 : && (speed_p || real_equal (&c, &dconst1_4))
2106 70 : && !HONOR_SIGNED_ZEROS (mode))
2107 : {
2108 50 : unsigned int max_depth = speed_p
2109 25 : ? param_max_pow_sqrt_depth
2110 : : 2;
2111 :
2112 25 : tree expand_with_sqrts
2113 25 : = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2114 :
2115 25 : if (expand_with_sqrts)
2116 : return expand_with_sqrts;
2117 : }
2118 :
2119 27 : real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2120 27 : n = real_to_integer (&c2);
2121 27 : real_from_integer (&cint, VOIDmode, n, SIGNED);
2122 27 : c2_is_int = real_identical (&c2, &cint);
2123 :
2124 : /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2125 :
2126 : powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2127 : 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2128 :
2129 : Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2130 : different from pow(x, 1./3.) due to rounding and behavior with
2131 : negative x, we need to constrain this transformation to unsafe
2132 : math and positive x or finite math. */
2133 27 : real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2134 27 : real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2135 27 : real_round (&c2, mode, &c2);
2136 27 : n = real_to_integer (&c2);
2137 27 : real_from_integer (&cint, VOIDmode, n, SIGNED);
2138 27 : real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2139 27 : real_convert (&c2, mode, &c2);
2140 :
2141 27 : if (flag_unsafe_math_optimizations
2142 7 : && cbrtfn
2143 7 : && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2144 7 : && real_identical (&c2, &c)
2145 4 : && !c2_is_int
2146 4 : && optimize_function_for_speed_p (cfun)
2147 31 : && powi_cost (n / 3) <= POWI_MAX_MULTS)
2148 : {
2149 4 : tree powi_x_ndiv3 = NULL_TREE;
2150 :
2151 : /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2152 : possible or profitable, give up. Skip the degenerate case when
2153 : abs(n) < 3, where the result is always 1. */
2154 4 : if (absu_hwi (n) >= 3)
2155 : {
2156 4 : powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2157 : abs_hwi (n / 3));
2158 4 : if (!powi_x_ndiv3)
2159 : return NULL_TREE;
2160 : }
2161 :
2162 : /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2163 : as that creates an unnecessary variable. Instead, just produce
2164 : either cbrt(x) or cbrt(x) * cbrt(x). */
2165 4 : cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2166 :
2167 4 : if (absu_hwi (n) % 3 == 1)
2168 : powi_cbrt_x = cbrt_x;
2169 : else
2170 2 : powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2171 : cbrt_x, cbrt_x);
2172 :
2173 : /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2174 4 : if (absu_hwi (n) < 3)
2175 : result = powi_cbrt_x;
2176 : else
2177 4 : result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2178 : powi_x_ndiv3, powi_cbrt_x);
2179 :
2180 : /* If n is negative, reciprocate the result. */
2181 4 : if (n < 0)
2182 1 : result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2183 : build_real (type, dconst1), result);
2184 :
2185 4 : return result;
2186 : }
2187 :
2188 : /* No optimizations succeeded. */
2189 : return NULL_TREE;
2190 : }
2191 :
2192 : /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2193 : on the SSA_NAME argument of each of them. */
2194 :
2195 : namespace {
2196 :
2197 : const pass_data pass_data_cse_sincos =
2198 : {
2199 : GIMPLE_PASS, /* type */
2200 : "sincos", /* name */
2201 : OPTGROUP_NONE, /* optinfo_flags */
2202 : TV_TREE_SINCOS, /* tv_id */
2203 : PROP_ssa, /* properties_required */
2204 : 0, /* properties_provided */
2205 : 0, /* properties_destroyed */
2206 : 0, /* todo_flags_start */
2207 : TODO_update_ssa, /* todo_flags_finish */
2208 : };
2209 :
2210 : class pass_cse_sincos : public gimple_opt_pass
2211 : {
2212 : public:
2213 287872 : pass_cse_sincos (gcc::context *ctxt)
2214 575744 : : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2215 : {}
2216 :
2217 : /* opt_pass methods: */
2218 1038027 : bool gate (function *) final override
2219 : {
2220 1038027 : return optimize;
2221 : }
2222 :
2223 : unsigned int execute (function *) final override;
2224 :
2225 : }; // class pass_cse_sincos
2226 :
2227 : unsigned int
2228 1038002 : pass_cse_sincos::execute (function *fun)
2229 : {
2230 1038002 : basic_block bb;
2231 1038002 : bool cfg_changed = false;
2232 :
2233 1038002 : calculate_dominance_info (CDI_DOMINATORS);
2234 1038002 : memset (&sincos_stats, 0, sizeof (sincos_stats));
2235 :
2236 10958690 : FOR_EACH_BB_FN (bb, fun)
2237 : {
2238 9920688 : gimple_stmt_iterator gsi;
2239 :
2240 93246733 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2241 : {
2242 83326045 : gimple *stmt = gsi_stmt (gsi);
2243 :
2244 83326045 : if (is_gimple_call (stmt)
2245 83326045 : && gimple_call_lhs (stmt))
2246 : {
2247 2025311 : tree arg;
2248 2025311 : switch (gimple_call_combined_fn (stmt))
2249 : {
2250 1031 : CASE_CFN_COS:
2251 1031 : CASE_CFN_SIN:
2252 1031 : CASE_CFN_CEXPI:
2253 1031 : arg = gimple_call_arg (stmt, 0);
2254 : /* Make sure we have either sincos or cexp. */
2255 1031 : if (!targetm.libc_has_function (function_c99_math_complex,
2256 1031 : TREE_TYPE (arg))
2257 1031 : && !targetm.libc_has_function (function_sincos,
2258 0 : TREE_TYPE (arg)))
2259 : break;
2260 :
2261 1031 : if (TREE_CODE (arg) == SSA_NAME)
2262 1031 : cfg_changed |= execute_cse_sincos_1 (arg);
2263 : break;
2264 : default:
2265 : break;
2266 : }
2267 : }
2268 : }
2269 : }
2270 :
2271 1038002 : statistics_counter_event (fun, "sincos statements inserted",
2272 : sincos_stats.inserted);
2273 1038002 : statistics_counter_event (fun, "conv statements removed",
2274 : sincos_stats.conv_removed);
2275 :
2276 1038002 : return cfg_changed ? TODO_cleanup_cfg : 0;
2277 : }
2278 :
2279 : } // anon namespace
2280 :
2281 : gimple_opt_pass *
2282 287872 : make_pass_cse_sincos (gcc::context *ctxt)
2283 : {
2284 287872 : return new pass_cse_sincos (ctxt);
2285 : }
2286 :
2287 : /* Expand powi(x,n) into an optimal number of multiplies, when n is a
2288 : constant. */
2289 : namespace {
2290 :
2291 : const pass_data pass_data_expand_pow =
2292 : {
2293 : GIMPLE_PASS, /* type */
2294 : "pow", /* name */
2295 : OPTGROUP_NONE, /* optinfo_flags */
2296 : TV_TREE_POW, /* tv_id */
2297 : PROP_ssa, /* properties_required */
2298 : PROP_gimple_opt_math, /* properties_provided */
2299 : 0, /* properties_destroyed */
2300 : 0, /* todo_flags_start */
2301 : TODO_update_ssa, /* todo_flags_finish */
2302 : };
2303 :
2304 : class pass_expand_pow : public gimple_opt_pass
2305 : {
2306 : public:
2307 287872 : pass_expand_pow (gcc::context *ctxt)
2308 575744 : : gimple_opt_pass (pass_data_expand_pow, ctxt)
2309 : {}
2310 :
2311 : /* opt_pass methods: */
2312 1038027 : bool gate (function *) final override
2313 : {
2314 1038027 : return optimize;
2315 : }
2316 :
2317 : unsigned int execute (function *) final override;
2318 :
2319 : }; // class pass_expand_pow
2320 :
2321 : unsigned int
2322 1038022 : pass_expand_pow::execute (function *fun)
2323 : {
2324 1038022 : basic_block bb;
2325 1038022 : bool cfg_changed = false;
2326 :
2327 1038022 : calculate_dominance_info (CDI_DOMINATORS);
2328 :
2329 10426507 : FOR_EACH_BB_FN (bb, fun)
2330 : {
2331 9388485 : gimple_stmt_iterator gsi;
2332 9388485 : bool cleanup_eh = false;
2333 :
2334 90631821 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2335 : {
2336 81243336 : gimple *stmt = gsi_stmt (gsi);
2337 :
2338 : /* Only the last stmt in a bb could throw, no need to call
2339 : gimple_purge_dead_eh_edges if we change something in the middle
2340 : of a basic block. */
2341 81243336 : cleanup_eh = false;
2342 :
2343 81243336 : if (is_gimple_call (stmt)
2344 81243336 : && gimple_call_lhs (stmt))
2345 : {
2346 1999700 : tree arg0, arg1, result;
2347 1999700 : HOST_WIDE_INT n;
2348 1999700 : location_t loc;
2349 :
2350 1999700 : switch (gimple_call_combined_fn (stmt))
2351 : {
2352 600 : CASE_CFN_POW:
2353 600 : arg0 = gimple_call_arg (stmt, 0);
2354 600 : arg1 = gimple_call_arg (stmt, 1);
2355 :
2356 600 : loc = gimple_location (stmt);
2357 600 : result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2358 :
2359 600 : if (result)
2360 : {
2361 52 : tree lhs = gimple_get_lhs (stmt);
2362 52 : gassign *new_stmt = gimple_build_assign (lhs, result);
2363 52 : gimple_set_location (new_stmt, loc);
2364 52 : unlink_stmt_vdef (stmt);
2365 52 : gsi_replace (&gsi, new_stmt, true);
2366 52 : cleanup_eh = true;
2367 104 : if (gimple_vdef (stmt))
2368 0 : release_ssa_name (gimple_vdef (stmt));
2369 : }
2370 : break;
2371 :
2372 812 : CASE_CFN_POWI:
2373 812 : arg0 = gimple_call_arg (stmt, 0);
2374 812 : arg1 = gimple_call_arg (stmt, 1);
2375 812 : loc = gimple_location (stmt);
2376 :
2377 812 : if (real_minus_onep (arg0))
2378 : {
2379 11 : tree t0, t1, cond, one, minus_one;
2380 11 : gassign *stmt;
2381 :
2382 11 : t0 = TREE_TYPE (arg0);
2383 11 : t1 = TREE_TYPE (arg1);
2384 11 : one = build_real (t0, dconst1);
2385 11 : minus_one = build_real (t0, dconstm1);
2386 :
2387 11 : cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2388 11 : stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2389 : arg1, build_int_cst (t1, 1));
2390 11 : gimple_set_location (stmt, loc);
2391 11 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2392 :
2393 11 : result = make_temp_ssa_name (t0, NULL, "powi");
2394 11 : stmt = gimple_build_assign (result, COND_EXPR, cond,
2395 : minus_one, one);
2396 11 : gimple_set_location (stmt, loc);
2397 11 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2398 : }
2399 : else
2400 : {
2401 801 : if (!tree_fits_shwi_p (arg1))
2402 : break;
2403 :
2404 590 : n = tree_to_shwi (arg1);
2405 590 : result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2406 : }
2407 :
2408 601 : if (result)
2409 : {
2410 593 : tree lhs = gimple_get_lhs (stmt);
2411 593 : gassign *new_stmt = gimple_build_assign (lhs, result);
2412 593 : gimple_set_location (new_stmt, loc);
2413 593 : unlink_stmt_vdef (stmt);
2414 593 : gsi_replace (&gsi, new_stmt, true);
2415 593 : cleanup_eh = true;
2416 81244522 : if (gimple_vdef (stmt))
2417 0 : release_ssa_name (gimple_vdef (stmt));
2418 : }
2419 : break;
2420 :
2421 211 : default:;
2422 : }
2423 : }
2424 : }
2425 9388485 : if (cleanup_eh)
2426 1 : cfg_changed |= gimple_purge_dead_eh_edges (bb);
2427 : }
2428 :
2429 1038022 : return cfg_changed ? TODO_cleanup_cfg : 0;
2430 : }
2431 :
2432 : } // anon namespace
2433 :
2434 : gimple_opt_pass *
2435 287872 : make_pass_expand_pow (gcc::context *ctxt)
2436 : {
2437 287872 : return new pass_expand_pow (ctxt);
2438 : }
2439 :
2440 : /* Return true if stmt is a type conversion operation that can be stripped
2441 : when used in a widening multiply operation. */
2442 : static bool
2443 467560 : widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2444 : {
2445 467560 : enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2446 :
2447 467560 : if (TREE_CODE (result_type) == INTEGER_TYPE)
2448 : {
2449 467560 : tree op_type;
2450 467560 : tree inner_op_type;
2451 :
2452 467560 : if (!CONVERT_EXPR_CODE_P (rhs_code))
2453 : return false;
2454 :
2455 185498 : op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2456 :
2457 : /* If the type of OP has the same precision as the result, then
2458 : we can strip this conversion. The multiply operation will be
2459 : selected to create the correct extension as a by-product. */
2460 185498 : if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2461 : return true;
2462 :
2463 : /* We can also strip a conversion if it preserves the signed-ness of
2464 : the operation and doesn't narrow the range. */
2465 1163 : inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2466 :
2467 : /* If the inner-most type is unsigned, then we can strip any
2468 : intermediate widening operation. If it's signed, then the
2469 : intermediate widening operation must also be signed. */
2470 1163 : if ((TYPE_UNSIGNED (inner_op_type)
2471 1162 : || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2472 2325 : && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2473 : return true;
2474 :
2475 1162 : return false;
2476 : }
2477 :
2478 0 : return rhs_code == FIXED_CONVERT_EXPR;
2479 : }
2480 :
2481 : /* Return true if RHS is a suitable operand for a widening multiplication,
2482 : assuming a target type of TYPE.
2483 : There are two cases:
2484 :
2485 : - RHS makes some value at least twice as wide. Store that value
2486 : in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2487 :
2488 : - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2489 : but leave *TYPE_OUT untouched. */
2490 :
2491 : static bool
2492 918447 : is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2493 : tree *new_rhs_out)
2494 : {
2495 918447 : gimple *stmt;
2496 918447 : tree type1, rhs1;
2497 :
2498 918447 : if (TREE_CODE (rhs) == SSA_NAME)
2499 : {
2500 : /* Use tree_non_zero_bits to see if this operand is zero_extended
2501 : for unsigned widening multiplications or non-negative for
2502 : signed widening multiplications. */
2503 761830 : if (TREE_CODE (type) == INTEGER_TYPE
2504 761830 : && (TYPE_PRECISION (type) & 1) == 0
2505 1523660 : && int_mode_for_size (TYPE_PRECISION (type) / 2, 1).exists ())
2506 : {
2507 755894 : unsigned int prec = TYPE_PRECISION (type);
2508 755894 : unsigned int hprec = prec / 2;
2509 755894 : wide_int bits = wide_int::from (tree_nonzero_bits (rhs), prec,
2510 1511788 : TYPE_SIGN (TREE_TYPE (rhs)));
2511 755894 : if (TYPE_UNSIGNED (type)
2512 1314680 : && wi::bit_and (bits, wi::mask (hprec, true, prec)) == 0)
2513 : {
2514 148024 : *type_out = build_nonstandard_integer_type (hprec, true);
2515 : /* X & MODE_MASK can be simplified to (T)X. */
2516 148024 : stmt = SSA_NAME_DEF_STMT (rhs);
2517 296048 : if (is_gimple_assign (stmt)
2518 128945 : && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
2519 16913 : && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST
2520 181250 : && wide_int::from (wi::to_wide (gimple_assign_rhs2 (stmt)),
2521 16613 : prec, TYPE_SIGN (TREE_TYPE (rhs)))
2522 197863 : == wi::mask (hprec, false, prec))
2523 14837 : *new_rhs_out = gimple_assign_rhs1 (stmt);
2524 : else
2525 133187 : *new_rhs_out = rhs;
2526 148024 : return true;
2527 : }
2528 607870 : else if (!TYPE_UNSIGNED (type)
2529 804978 : && wi::bit_and (bits, wi::mask (hprec - 1, true, prec)) == 0)
2530 : {
2531 24457 : *type_out = build_nonstandard_integer_type (hprec, false);
2532 24457 : *new_rhs_out = rhs;
2533 24457 : return true;
2534 : }
2535 755894 : }
2536 :
2537 589349 : stmt = SSA_NAME_DEF_STMT (rhs);
2538 589349 : if (is_gimple_assign (stmt))
2539 : {
2540 :
2541 467560 : if (widening_mult_conversion_strippable_p (type, stmt))
2542 : {
2543 184336 : rhs1 = gimple_assign_rhs1 (stmt);
2544 :
2545 184336 : if (TREE_CODE (rhs1) == INTEGER_CST)
2546 : {
2547 0 : *new_rhs_out = rhs1;
2548 0 : *type_out = NULL;
2549 0 : return true;
2550 : }
2551 : }
2552 : else
2553 : rhs1 = rhs;
2554 : }
2555 : else
2556 : rhs1 = rhs;
2557 :
2558 589349 : type1 = TREE_TYPE (rhs1);
2559 :
2560 589349 : if (TREE_CODE (type1) != TREE_CODE (type)
2561 589349 : || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2562 : return false;
2563 :
2564 59267 : *new_rhs_out = rhs1;
2565 59267 : *type_out = type1;
2566 59267 : return true;
2567 : }
2568 :
2569 156617 : if (TREE_CODE (rhs) == INTEGER_CST)
2570 : {
2571 156617 : *new_rhs_out = rhs;
2572 156617 : *type_out = NULL;
2573 156617 : return true;
2574 : }
2575 :
2576 : return false;
2577 : }
2578 :
2579 : /* Return true if STMT performs a widening multiplication, assuming the
2580 : output type is TYPE. If so, store the unwidened types of the operands
2581 : in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2582 : *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2583 : and *TYPE2_OUT would give the operands of the multiplication. */
2584 :
2585 : static bool
2586 721405 : is_widening_mult_p (gimple *stmt,
2587 : tree *type1_out, tree *rhs1_out,
2588 : tree *type2_out, tree *rhs2_out)
2589 : {
2590 721405 : tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2591 :
2592 721405 : if (TREE_CODE (type) == INTEGER_TYPE)
2593 : {
2594 721405 : if (TYPE_OVERFLOW_TRAPS (type))
2595 : return false;
2596 : }
2597 0 : else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2598 : return false;
2599 :
2600 721377 : if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2601 : rhs1_out))
2602 : return false;
2603 :
2604 197070 : if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2605 : rhs2_out))
2606 : return false;
2607 :
2608 191295 : if (*type1_out == NULL)
2609 : {
2610 0 : if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2611 : return false;
2612 0 : *type1_out = *type2_out;
2613 : }
2614 :
2615 191295 : if (*type2_out == NULL)
2616 : {
2617 156617 : if (!int_fits_type_p (*rhs2_out, *type1_out))
2618 : return false;
2619 152012 : *type2_out = *type1_out;
2620 : }
2621 :
2622 : /* Ensure that the larger of the two operands comes first. */
2623 186690 : if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2624 : {
2625 87 : std::swap (*type1_out, *type2_out);
2626 87 : std::swap (*rhs1_out, *rhs2_out);
2627 : }
2628 :
2629 : return true;
2630 : }
2631 :
2632 : /* Check to see if the CALL statement is an invocation of copysign
2633 : with 1. being the first argument. */
2634 : static bool
2635 163050 : is_copysign_call_with_1 (gimple *call)
2636 : {
2637 168153 : gcall *c = dyn_cast <gcall *> (call);
2638 5156 : if (! c)
2639 : return false;
2640 :
2641 5156 : enum combined_fn code = gimple_call_combined_fn (c);
2642 :
2643 5156 : if (code == CFN_LAST)
2644 : return false;
2645 :
2646 4232 : if (builtin_fn_p (code))
2647 : {
2648 1200 : switch (as_builtin_fn (code))
2649 : {
2650 30 : CASE_FLT_FN (BUILT_IN_COPYSIGN):
2651 30 : CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2652 30 : return real_onep (gimple_call_arg (c, 0));
2653 : default:
2654 : return false;
2655 : }
2656 : }
2657 :
2658 3032 : if (internal_fn_p (code))
2659 : {
2660 3032 : switch (as_internal_fn (code))
2661 : {
2662 23 : case IFN_COPYSIGN:
2663 23 : return real_onep (gimple_call_arg (c, 0));
2664 : default:
2665 : return false;
2666 : }
2667 : }
2668 :
2669 : return false;
2670 : }
2671 :
2672 : /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2673 : This only happens when the xorsign optab is defined, if the
2674 : pattern is not a xorsign pattern or if expansion fails FALSE is
2675 : returned, otherwise TRUE is returned. */
2676 : static bool
2677 711145 : convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2678 : {
2679 711145 : tree treeop0, treeop1, lhs, type;
2680 711145 : location_t loc = gimple_location (stmt);
2681 711145 : lhs = gimple_assign_lhs (stmt);
2682 711145 : treeop0 = gimple_assign_rhs1 (stmt);
2683 711145 : treeop1 = gimple_assign_rhs2 (stmt);
2684 711145 : type = TREE_TYPE (lhs);
2685 711145 : machine_mode mode = TYPE_MODE (type);
2686 :
2687 711145 : if (HONOR_SNANS (type))
2688 : return false;
2689 :
2690 710622 : if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2691 : {
2692 212995 : gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2693 212995 : if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2694 : {
2695 212969 : call0 = SSA_NAME_DEF_STMT (treeop1);
2696 212969 : if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2697 212952 : return false;
2698 :
2699 : treeop1 = treeop0;
2700 : }
2701 43 : if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2702 : return false;
2703 :
2704 43 : gcall *c = as_a<gcall*> (call0);
2705 43 : treeop0 = gimple_call_arg (c, 1);
2706 :
2707 43 : gcall *call_stmt
2708 43 : = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2709 43 : gimple_set_lhs (call_stmt, lhs);
2710 43 : gimple_set_location (call_stmt, loc);
2711 43 : gsi_replace (gsi, call_stmt, true);
2712 43 : return true;
2713 : }
2714 :
2715 : return false;
2716 : }
2717 :
2718 : /* Process a single gimple statement STMT, which has a MULT_EXPR as
2719 : its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2720 : value is true iff we converted the statement. */
2721 :
2722 : static bool
2723 721293 : convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2724 : {
2725 721293 : tree lhs, rhs1, rhs2, type, type1, type2;
2726 721293 : enum insn_code handler;
2727 721293 : scalar_int_mode to_mode, from_mode, actual_mode;
2728 721293 : optab op;
2729 721293 : int actual_precision;
2730 721293 : location_t loc = gimple_location (stmt);
2731 721293 : bool from_unsigned1, from_unsigned2;
2732 :
2733 721293 : lhs = gimple_assign_lhs (stmt);
2734 721293 : type = TREE_TYPE (lhs);
2735 721293 : if (TREE_CODE (type) != INTEGER_TYPE)
2736 : return false;
2737 :
2738 585122 : if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2739 : return false;
2740 :
2741 : /* if any one of rhs1 and rhs2 is subject to abnormal coalescing,
2742 : avoid the tranform. */
2743 150981 : if ((TREE_CODE (rhs1) == SSA_NAME
2744 150981 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
2745 301961 : || (TREE_CODE (rhs2) == SSA_NAME
2746 24180 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2)))
2747 : return false;
2748 :
2749 150980 : to_mode = SCALAR_INT_TYPE_MODE (type);
2750 150980 : from_mode = SCALAR_INT_TYPE_MODE (type1);
2751 150980 : if (to_mode == from_mode)
2752 : return false;
2753 :
2754 150976 : from_unsigned1 = TYPE_UNSIGNED (type1);
2755 150976 : from_unsigned2 = TYPE_UNSIGNED (type2);
2756 :
2757 150976 : if (from_unsigned1 && from_unsigned2)
2758 : op = umul_widen_optab;
2759 55615 : else if (!from_unsigned1 && !from_unsigned2)
2760 : op = smul_widen_optab;
2761 : else
2762 1917 : op = usmul_widen_optab;
2763 :
2764 150976 : handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2765 : &actual_mode);
2766 :
2767 150976 : if (handler == CODE_FOR_nothing)
2768 : {
2769 140828 : if (op != smul_widen_optab)
2770 : {
2771 : /* We can use a signed multiply with unsigned types as long as
2772 : there is a wider mode to use, or it is the smaller of the two
2773 : types that is unsigned. Note that type1 >= type2, always. */
2774 88637 : if ((TYPE_UNSIGNED (type1)
2775 86933 : && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2776 88637 : || (TYPE_UNSIGNED (type2)
2777 1704 : && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2778 : {
2779 88637 : if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2780 177274 : || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2781 88637 : return false;
2782 : }
2783 :
2784 0 : op = smul_widen_optab;
2785 0 : handler = find_widening_optab_handler_and_mode (op, to_mode,
2786 : from_mode,
2787 : &actual_mode);
2788 :
2789 0 : if (handler == CODE_FOR_nothing)
2790 : return false;
2791 :
2792 : from_unsigned1 = from_unsigned2 = false;
2793 : }
2794 : else
2795 : {
2796 : /* Expand can synthesize smul_widen_optab if the target
2797 : supports umul_widen_optab. */
2798 52191 : op = umul_widen_optab;
2799 52191 : handler = find_widening_optab_handler_and_mode (op, to_mode,
2800 : from_mode,
2801 : &actual_mode);
2802 52191 : if (handler == CODE_FOR_nothing)
2803 : return false;
2804 : }
2805 : }
2806 :
2807 : /* Ensure that the inputs to the handler are in the correct precison
2808 : for the opcode. This will be the full mode size. */
2809 10148 : actual_precision = GET_MODE_PRECISION (actual_mode);
2810 10148 : if (2 * actual_precision > TYPE_PRECISION (type))
2811 : return false;
2812 10148 : if (actual_precision != TYPE_PRECISION (type1)
2813 10148 : || from_unsigned1 != TYPE_UNSIGNED (type1))
2814 : {
2815 8 : if (!useless_type_conversion_p (type1, TREE_TYPE (rhs1)))
2816 : {
2817 0 : if (TREE_CODE (rhs1) == INTEGER_CST)
2818 0 : rhs1 = fold_convert (type1, rhs1);
2819 : else
2820 0 : rhs1 = build_and_insert_cast (gsi, loc, type1, rhs1);
2821 : }
2822 8 : type1 = build_nonstandard_integer_type (actual_precision,
2823 : from_unsigned1);
2824 : }
2825 10148 : if (!useless_type_conversion_p (type1, TREE_TYPE (rhs1)))
2826 : {
2827 9411 : if (TREE_CODE (rhs1) == INTEGER_CST)
2828 0 : rhs1 = fold_convert (type1, rhs1);
2829 : else
2830 9411 : rhs1 = build_and_insert_cast (gsi, loc, type1, rhs1);
2831 : }
2832 10148 : if (actual_precision != TYPE_PRECISION (type2)
2833 10148 : || from_unsigned2 != TYPE_UNSIGNED (type2))
2834 : {
2835 8 : if (!useless_type_conversion_p (type2, TREE_TYPE (rhs2)))
2836 : {
2837 8 : if (TREE_CODE (rhs2) == INTEGER_CST)
2838 8 : rhs2 = fold_convert (type2, rhs2);
2839 : else
2840 0 : rhs2 = build_and_insert_cast (gsi, loc, type2, rhs2);
2841 : }
2842 8 : type2 = build_nonstandard_integer_type (actual_precision,
2843 : from_unsigned2);
2844 : }
2845 10148 : if (!useless_type_conversion_p (type2, TREE_TYPE (rhs2)))
2846 : {
2847 9602 : if (TREE_CODE (rhs2) == INTEGER_CST)
2848 2097 : rhs2 = fold_convert (type2, rhs2);
2849 : else
2850 7505 : rhs2 = build_and_insert_cast (gsi, loc, type2, rhs2);
2851 : }
2852 :
2853 10148 : gimple_assign_set_rhs1 (stmt, rhs1);
2854 10148 : gimple_assign_set_rhs2 (stmt, rhs2);
2855 10148 : gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2856 10148 : update_stmt (stmt);
2857 10148 : widen_mul_stats.widen_mults_inserted++;
2858 10148 : return true;
2859 : }
2860 :
2861 : /* Process a single gimple statement STMT, which is found at the
2862 : iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2863 : rhs (given by CODE), and try to convert it into a
2864 : WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2865 : is true iff we converted the statement. */
2866 :
2867 : static bool
2868 2578042 : convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2869 : enum tree_code code)
2870 : {
2871 2578042 : gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2872 2578042 : gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2873 2578042 : tree type, type1, type2, optype;
2874 2578042 : tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2875 2578042 : enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2876 2578042 : optab this_optab;
2877 2578042 : enum tree_code wmult_code;
2878 2578042 : enum insn_code handler;
2879 2578042 : scalar_mode to_mode, from_mode, actual_mode;
2880 2578042 : location_t loc = gimple_location (stmt);
2881 2578042 : int actual_precision;
2882 2578042 : bool from_unsigned1, from_unsigned2;
2883 :
2884 2578042 : lhs = gimple_assign_lhs (stmt);
2885 2578042 : type = TREE_TYPE (lhs);
2886 2578042 : if ((TREE_CODE (type) != INTEGER_TYPE
2887 397377 : && TREE_CODE (type) != FIXED_POINT_TYPE)
2888 2578042 : || !type_has_mode_precision_p (type))
2889 398721 : return false;
2890 :
2891 2179321 : if (code == MINUS_EXPR)
2892 : wmult_code = WIDEN_MULT_MINUS_EXPR;
2893 : else
2894 1941888 : wmult_code = WIDEN_MULT_PLUS_EXPR;
2895 :
2896 2179321 : rhs1 = gimple_assign_rhs1 (stmt);
2897 2179321 : rhs2 = gimple_assign_rhs2 (stmt);
2898 :
2899 2179321 : if (TREE_CODE (rhs1) == SSA_NAME)
2900 : {
2901 2145393 : rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2902 2145393 : if (is_gimple_assign (rhs1_stmt))
2903 1253984 : rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2904 : }
2905 :
2906 2179321 : if (TREE_CODE (rhs2) == SSA_NAME)
2907 : {
2908 787704 : rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2909 787704 : if (is_gimple_assign (rhs2_stmt))
2910 605867 : rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2911 : }
2912 :
2913 : /* Allow for one conversion statement between the multiply
2914 : and addition/subtraction statement. If there are more than
2915 : one conversions then we assume they would invalidate this
2916 : transformation. If that's not the case then they should have
2917 : been folded before now. */
2918 2179321 : if (CONVERT_EXPR_CODE_P (rhs1_code))
2919 : {
2920 421594 : conv1_stmt = rhs1_stmt;
2921 421594 : rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2922 421594 : if (TREE_CODE (rhs1) == SSA_NAME)
2923 : {
2924 356142 : rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2925 356142 : if (is_gimple_assign (rhs1_stmt))
2926 205225 : rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2927 : }
2928 : else
2929 : return false;
2930 : }
2931 2113869 : if (CONVERT_EXPR_CODE_P (rhs2_code))
2932 : {
2933 196213 : conv2_stmt = rhs2_stmt;
2934 196213 : rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2935 196213 : if (TREE_CODE (rhs2) == SSA_NAME)
2936 : {
2937 184951 : rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2938 184951 : if (is_gimple_assign (rhs2_stmt))
2939 115026 : rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2940 : }
2941 : else
2942 : return false;
2943 : }
2944 :
2945 : /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2946 : is_widening_mult_p, but we still need the rhs returns.
2947 :
2948 : It might also appear that it would be sufficient to use the existing
2949 : operands of the widening multiply, but that would limit the choice of
2950 : multiply-and-accumulate instructions.
2951 :
2952 : If the widened-multiplication result has more than one uses, it is
2953 : probably wiser not to do the conversion. Also restrict this operation
2954 : to single basic block to avoid moving the multiply to a different block
2955 : with a higher execution frequency. */
2956 2102607 : if (code == PLUS_EXPR
2957 1869912 : && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2958 : {
2959 139160 : if (!has_single_use (rhs1)
2960 77099 : || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2961 206895 : || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2962 : &type2, &mult_rhs2))
2963 117808 : return false;
2964 : add_rhs = rhs2;
2965 : conv_stmt = conv1_stmt;
2966 : }
2967 1963447 : else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2968 : {
2969 124489 : if (!has_single_use (rhs2)
2970 75866 : || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2971 193037 : || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2972 : &type2, &mult_rhs2))
2973 110132 : return false;
2974 : add_rhs = rhs1;
2975 : conv_stmt = conv2_stmt;
2976 : }
2977 : else
2978 : return false;
2979 :
2980 35709 : to_mode = SCALAR_TYPE_MODE (type);
2981 35709 : from_mode = SCALAR_TYPE_MODE (type1);
2982 35709 : if (to_mode == from_mode)
2983 : return false;
2984 :
2985 : /* For fixed point types, the mode classes could be different
2986 : so reject that case. */
2987 35706 : if (GET_MODE_CLASS (from_mode) != GET_MODE_CLASS (to_mode))
2988 : return false;
2989 :
2990 35706 : from_unsigned1 = TYPE_UNSIGNED (type1);
2991 35706 : from_unsigned2 = TYPE_UNSIGNED (type2);
2992 35706 : optype = type1;
2993 :
2994 : /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2995 35706 : if (from_unsigned1 != from_unsigned2)
2996 : {
2997 913 : if (!INTEGRAL_TYPE_P (type))
2998 : return false;
2999 : /* We can use a signed multiply with unsigned types as long as
3000 : there is a wider mode to use, or it is the smaller of the two
3001 : types that is unsigned. Note that type1 >= type2, always. */
3002 913 : if ((from_unsigned1
3003 56 : && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3004 913 : || (from_unsigned2
3005 857 : && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3006 : {
3007 1790 : if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
3008 1826 : || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
3009 877 : return false;
3010 : }
3011 :
3012 36 : from_unsigned1 = from_unsigned2 = false;
3013 36 : optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
3014 : false);
3015 : }
3016 :
3017 : /* If there was a conversion between the multiply and addition
3018 : then we need to make sure it fits a multiply-and-accumulate.
3019 : The should be a single mode change which does not change the
3020 : value. */
3021 34829 : if (conv_stmt)
3022 : {
3023 : /* We use the original, unmodified data types for this. */
3024 737 : tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3025 737 : tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3026 737 : int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3027 737 : bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3028 :
3029 737 : if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3030 : {
3031 : /* Conversion is a truncate. */
3032 0 : if (TYPE_PRECISION (to_type) < data_size)
3033 : return false;
3034 : }
3035 737 : else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3036 : {
3037 : /* Conversion is an extend. Check it's the right sort. */
3038 382 : if (TYPE_UNSIGNED (from_type) != is_unsigned
3039 382 : && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3040 : return false;
3041 : }
3042 : /* else convert is a no-op for our purposes. */
3043 : }
3044 :
3045 : /* Verify that the machine can perform a widening multiply
3046 : accumulate in this mode/signedness combination, otherwise
3047 : this transformation is likely to pessimize code. */
3048 34510 : this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3049 34510 : handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3050 : from_mode, &actual_mode);
3051 :
3052 34510 : if (handler == CODE_FOR_nothing)
3053 : return false;
3054 :
3055 : /* Ensure that the inputs to the handler are in the correct precison
3056 : for the opcode. This will be the full mode size. */
3057 0 : actual_precision = GET_MODE_PRECISION (actual_mode);
3058 0 : if (actual_precision != TYPE_PRECISION (type1)
3059 0 : || from_unsigned1 != TYPE_UNSIGNED (type1))
3060 : {
3061 0 : if (!useless_type_conversion_p (type1, TREE_TYPE (mult_rhs1)))
3062 : {
3063 0 : if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3064 0 : mult_rhs1 = fold_convert (type1, mult_rhs1);
3065 : else
3066 0 : mult_rhs1 = build_and_insert_cast (gsi, loc, type1, mult_rhs1);
3067 : }
3068 0 : type1 = build_nonstandard_integer_type (actual_precision,
3069 : from_unsigned1);
3070 : }
3071 0 : if (!useless_type_conversion_p (type1, TREE_TYPE (mult_rhs1)))
3072 : {
3073 0 : if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3074 0 : mult_rhs1 = fold_convert (type1, mult_rhs1);
3075 : else
3076 0 : mult_rhs1 = build_and_insert_cast (gsi, loc, type1, mult_rhs1);
3077 : }
3078 0 : if (actual_precision != TYPE_PRECISION (type2)
3079 0 : || from_unsigned2 != TYPE_UNSIGNED (type2))
3080 : {
3081 0 : if (!useless_type_conversion_p (type2, TREE_TYPE (mult_rhs2)))
3082 : {
3083 0 : if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3084 0 : mult_rhs2 = fold_convert (type2, mult_rhs2);
3085 : else
3086 0 : mult_rhs2 = build_and_insert_cast (gsi, loc, type2, mult_rhs2);
3087 : }
3088 0 : type2 = build_nonstandard_integer_type (actual_precision,
3089 : from_unsigned2);
3090 : }
3091 0 : if (!useless_type_conversion_p (type2, TREE_TYPE (mult_rhs2)))
3092 : {
3093 0 : if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3094 0 : mult_rhs2 = fold_convert (type2, mult_rhs2);
3095 : else
3096 0 : mult_rhs2 = build_and_insert_cast (gsi, loc, type2, mult_rhs2);
3097 : }
3098 :
3099 0 : if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3100 0 : add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3101 :
3102 0 : gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3103 : add_rhs);
3104 0 : update_stmt (gsi_stmt (*gsi));
3105 0 : widen_mul_stats.maccs_inserted++;
3106 0 : return true;
3107 : }
3108 :
3109 : /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3110 : OP2 and which we know is used in statements that can be, together with the
3111 : multiplication, converted to FMAs, perform the transformation. */
3112 :
3113 : static void
3114 17496 : convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
3115 : {
3116 17496 : gimple *use_stmt;
3117 17496 : imm_use_iterator imm_iter;
3118 17496 : gcall *fma_stmt;
3119 :
3120 52546 : FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3121 : {
3122 17554 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3123 17554 : tree addop, mulop1 = op1, result = mul_result;
3124 17554 : bool negate_p = false;
3125 17554 : gimple_seq seq = NULL;
3126 :
3127 17554 : if (is_gimple_debug (use_stmt))
3128 0 : continue;
3129 :
3130 : /* If the use is a type convert, look further into it if the operations
3131 : are the same under two's complement. */
3132 17554 : tree lhs_type;
3133 17554 : if (gimple_assign_cast_p (use_stmt)
3134 0 : && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
3135 17554 : && tree_nop_conversion_p (lhs_type, TREE_TYPE (op1)))
3136 : {
3137 0 : tree cast_lhs = gimple_get_lhs (use_stmt);
3138 0 : gimple *tmp_use;
3139 0 : use_operand_p tmp_use_p;
3140 0 : if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
3141 : {
3142 0 : release_defs (use_stmt);
3143 0 : use_stmt = tmp_use;
3144 0 : result = cast_lhs;
3145 0 : gsi_remove (&gsi, true);
3146 0 : gsi = gsi_for_stmt (use_stmt);
3147 : }
3148 : }
3149 :
3150 17554 : if (is_gimple_assign (use_stmt)
3151 17554 : && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3152 : {
3153 700 : result = gimple_assign_lhs (use_stmt);
3154 700 : use_operand_p use_p;
3155 700 : gimple *neguse_stmt;
3156 700 : single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3157 700 : gsi_remove (&gsi, true);
3158 700 : release_defs (use_stmt);
3159 :
3160 700 : use_stmt = neguse_stmt;
3161 700 : gsi = gsi_for_stmt (use_stmt);
3162 700 : negate_p = true;
3163 : }
3164 :
3165 17554 : tree cond, else_value, ops[3], len, bias;
3166 17554 : tree_code code;
3167 17554 : if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3168 : ops, &else_value,
3169 : &len, &bias))
3170 0 : gcc_unreachable ();
3171 17554 : addop = ops[0] == result ? ops[1] : ops[0];
3172 :
3173 17554 : if (code == MINUS_EXPR)
3174 : {
3175 5769 : if (ops[0] == result)
3176 : /* a * b - c -> a * b + (-c) */
3177 2910 : addop = gimple_build (&seq, NEGATE_EXPR, TREE_TYPE (addop), addop);
3178 : else
3179 : /* a - b * c -> (-b) * c + a */
3180 2859 : negate_p = !negate_p;
3181 : }
3182 :
3183 17554 : if (negate_p)
3184 3559 : mulop1 = gimple_build (&seq, NEGATE_EXPR, TREE_TYPE (mulop1), mulop1);
3185 :
3186 17554 : if (seq)
3187 5764 : gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3188 :
3189 : /* Ensure all the operands are of the same type. Use the type of the
3190 : addend as that's the statement being replaced. */
3191 17554 : op2 = gimple_convert (&gsi, true, GSI_SAME_STMT,
3192 17554 : UNKNOWN_LOCATION, TREE_TYPE (addop), op2);
3193 17554 : mulop1 = gimple_convert (&gsi, true, GSI_SAME_STMT,
3194 17554 : UNKNOWN_LOCATION, TREE_TYPE (addop), mulop1);
3195 :
3196 17554 : if (len)
3197 0 : fma_stmt
3198 0 : = gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2,
3199 : addop, else_value, len, bias);
3200 17554 : else if (cond)
3201 94 : fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3202 : op2, addop, else_value);
3203 : else
3204 17460 : fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3205 17554 : gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3206 17554 : gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3207 : use_stmt));
3208 17554 : gsi_replace (&gsi, fma_stmt, true);
3209 : /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3210 : regardless of where the negation occurs. */
3211 17554 : gimple *orig_stmt = gsi_stmt (gsi);
3212 17554 : if (fold_stmt (&gsi, follow_all_ssa_edges))
3213 : {
3214 5813 : if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3215 0 : gcc_unreachable ();
3216 5813 : update_stmt (gsi_stmt (gsi));
3217 : }
3218 :
3219 17554 : if (dump_file && (dump_flags & TDF_DETAILS))
3220 : {
3221 3 : fprintf (dump_file, "Generated FMA ");
3222 3 : print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3223 3 : fprintf (dump_file, "\n");
3224 : }
3225 :
3226 : /* If the FMA result is negated in a single use, fold the negation
3227 : too. */
3228 17554 : orig_stmt = gsi_stmt (gsi);
3229 17554 : use_operand_p use_p;
3230 17554 : gimple *neg_stmt;
3231 17554 : if (is_gimple_call (orig_stmt)
3232 17554 : && gimple_call_internal_p (orig_stmt)
3233 17554 : && gimple_call_lhs (orig_stmt)
3234 17554 : && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3235 17554 : && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3236 12536 : && is_gimple_assign (neg_stmt)
3237 9970 : && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3238 18907 : && !stmt_could_throw_p (cfun, neg_stmt))
3239 : {
3240 1353 : gsi = gsi_for_stmt (neg_stmt);
3241 1353 : if (fold_stmt (&gsi, follow_all_ssa_edges))
3242 : {
3243 1353 : if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3244 0 : gcc_unreachable ();
3245 1353 : update_stmt (gsi_stmt (gsi));
3246 1353 : if (dump_file && (dump_flags & TDF_DETAILS))
3247 : {
3248 0 : fprintf (dump_file, "Folded FMA negation ");
3249 0 : print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3250 0 : fprintf (dump_file, "\n");
3251 : }
3252 : }
3253 : }
3254 :
3255 17554 : widen_mul_stats.fmas_inserted++;
3256 17496 : }
3257 17496 : }
3258 :
3259 : /* Data necessary to perform the actual transformation from a multiplication
3260 : and an addition to an FMA after decision is taken it should be done and to
3261 : then delete the multiplication statement from the function IL. */
3262 :
3263 : struct fma_transformation_info
3264 : {
3265 : gimple *mul_stmt;
3266 : tree mul_result;
3267 : tree op1;
3268 : tree op2;
3269 : };
3270 :
3271 : /* Structure containing the current state of FMA deferring, i.e. whether we are
3272 : deferring, whether to continue deferring, and all data necessary to come
3273 : back and perform all deferred transformations. */
3274 :
3275 10188207 : class fma_deferring_state
3276 : {
3277 : public:
3278 : /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3279 : do any deferring. */
3280 :
3281 10188207 : fma_deferring_state (bool perform_deferring)
3282 10188207 : : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3283 10188207 : m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3284 :
3285 : /* List of FMA candidates for which we the transformation has been determined
3286 : possible but we at this point in BB analysis we do not consider them
3287 : beneficial. */
3288 : auto_vec<fma_transformation_info, 8> m_candidates;
3289 :
3290 : /* Set of results of multiplication that are part of an already deferred FMA
3291 : candidates. */
3292 : hash_set<tree> m_mul_result_set;
3293 :
3294 : /* The PHI that supposedly feeds back result of a FMA to another over loop
3295 : boundary. */
3296 : gphi *m_initial_phi;
3297 :
3298 : /* Result of the last produced FMA candidate or NULL if there has not been
3299 : one. */
3300 : tree m_last_result;
3301 :
3302 : /* If true, deferring might still be profitable. If false, transform all
3303 : candidates and no longer defer. */
3304 : bool m_deferring_p;
3305 : };
3306 :
3307 : /* Transform all deferred FMA candidates and mark STATE as no longer
3308 : deferring. */
3309 :
3310 : static void
3311 3609117 : cancel_fma_deferring (fma_deferring_state *state)
3312 : {
3313 3609117 : if (!state->m_deferring_p)
3314 : return;
3315 :
3316 2604969 : for (unsigned i = 0; i < state->m_candidates.length (); i++)
3317 : {
3318 918 : if (dump_file && (dump_flags & TDF_DETAILS))
3319 0 : fprintf (dump_file, "Generating deferred FMA\n");
3320 :
3321 918 : const fma_transformation_info &fti = state->m_candidates[i];
3322 918 : convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3323 :
3324 918 : gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3325 918 : gsi_remove (&gsi, true);
3326 918 : release_defs (fti.mul_stmt);
3327 : }
3328 2604051 : state->m_deferring_p = false;
3329 : }
3330 :
3331 : /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3332 : Otherwise return NULL. */
3333 :
3334 : static gphi *
3335 5250 : result_of_phi (tree op)
3336 : {
3337 0 : if (TREE_CODE (op) != SSA_NAME)
3338 : return NULL;
3339 :
3340 5125 : return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3341 : }
3342 :
3343 : /* After processing statements of a BB and recording STATE, return true if the
3344 : initial phi is fed by the last FMA candidate result ore one such result from
3345 : previously processed BBs marked in LAST_RESULT_SET. */
3346 :
3347 : static bool
3348 362 : last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3349 : hash_set<tree> *last_result_set)
3350 : {
3351 362 : ssa_op_iter iter;
3352 362 : use_operand_p use;
3353 898 : FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3354 : {
3355 630 : tree t = USE_FROM_PTR (use);
3356 630 : if (t == state->m_last_result
3357 630 : || last_result_set->contains (t))
3358 94 : return true;
3359 : }
3360 :
3361 : return false;
3362 : }
3363 :
3364 : /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3365 : with uses in additions and subtractions to form fused multiply-add
3366 : operations. Returns true if successful and MUL_STMT should be removed.
3367 : If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3368 : on MUL_COND, otherwise it is unconditional.
3369 :
3370 : If STATE indicates that we are deferring FMA transformation, that means
3371 : that we do not produce FMAs for basic blocks which look like:
3372 :
3373 : <bb 6>
3374 : # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3375 : _65 = _14 * _16;
3376 : accumulator_66 = _65 + accumulator_111;
3377 :
3378 : or its unrolled version, i.e. with several FMA candidates that feed result
3379 : of one into the addend of another. Instead, we add them to a list in STATE
3380 : and if we later discover an FMA candidate that is not part of such a chain,
3381 : we go back and perform all deferred past candidates. */
3382 :
3383 : static bool
3384 711231 : convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3385 : fma_deferring_state *state, tree mul_cond = NULL_TREE,
3386 : tree mul_len = NULL_TREE, tree mul_bias = NULL_TREE)
3387 : {
3388 711231 : tree mul_result = gimple_get_lhs (mul_stmt);
3389 : /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3390 : if the statement was left just for the side-effects. */
3391 711231 : if (!mul_result)
3392 : return false;
3393 711231 : tree type = TREE_TYPE (mul_result);
3394 711231 : gimple *use_stmt, *neguse_stmt;
3395 711231 : use_operand_p use_p;
3396 711231 : imm_use_iterator imm_iter;
3397 :
3398 613801 : if (FLOAT_TYPE_P (type)
3399 735573 : && flag_fp_contract_mode != FP_CONTRACT_FAST)
3400 : return false;
3401 :
3402 : /* We don't want to do bitfield reduction ops. */
3403 706091 : if (INTEGRAL_TYPE_P (type)
3404 706091 : && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3405 : return false;
3406 :
3407 : /* If the target doesn't support it, don't generate it. We assume that
3408 : if fma isn't available then fms, fnma or fnms are not either. */
3409 705917 : optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3410 705917 : if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3411 : return false;
3412 :
3413 : /* If the multiplication has zero uses, it is kept around probably because
3414 : of -fnon-call-exceptions. Don't optimize it away in that case,
3415 : it is DCE job. */
3416 23246 : if (has_zero_uses (mul_result))
3417 : return false;
3418 :
3419 23246 : bool check_defer
3420 23246 : = (state->m_deferring_p
3421 23246 : && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3422 23246 : param_avoid_fma_max_bits));
3423 23246 : bool defer = check_defer;
3424 23246 : bool seen_negate_p = false;
3425 :
3426 : /* There is no numerical difference between fused and unfused integer FMAs,
3427 : and the assumption below that FMA is as cheap as addition is unlikely
3428 : to be true, especially if the multiplication occurs multiple times on
3429 : the same chain. E.g., for something like:
3430 :
3431 : (((a * b) + c) >> 1) + (a * b)
3432 :
3433 : we do not want to duplicate the a * b into two additions, not least
3434 : because the result is not a natural FMA chain. */
3435 23246 : if (ANY_INTEGRAL_TYPE_P (type)
3436 23246 : && !has_single_use (mul_result))
3437 : return false;
3438 :
3439 23246 : if (!dbg_cnt (form_fma))
3440 : return false;
3441 :
3442 : /* Make sure that the multiplication statement becomes dead after
3443 : the transformation, thus that all uses are transformed to FMAs.
3444 : This means we assume that an FMA operation has the same cost
3445 : as an addition. */
3446 41646 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3447 : {
3448 24056 : tree result = mul_result;
3449 24056 : bool negate_p = false;
3450 :
3451 24056 : use_stmt = USE_STMT (use_p);
3452 :
3453 24056 : if (is_gimple_debug (use_stmt))
3454 278 : continue;
3455 :
3456 : /* If the use is a type convert, look further into it if the operations
3457 : are the same under two's complement. */
3458 23778 : tree lhs_type;
3459 23778 : if (gimple_assign_cast_p (use_stmt)
3460 295 : && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
3461 24073 : && tree_nop_conversion_p (lhs_type, TREE_TYPE (op1)))
3462 : {
3463 0 : tree cast_lhs = gimple_get_lhs (use_stmt);
3464 0 : gimple *tmp_use;
3465 0 : use_operand_p tmp_use_p;
3466 0 : if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
3467 0 : use_stmt = tmp_use;
3468 0 : result = cast_lhs;
3469 : }
3470 :
3471 : /* For now restrict this operations to single basic blocks. In theory
3472 : we would want to support sinking the multiplication in
3473 : m = a*b;
3474 : if ()
3475 : ma = m + c;
3476 : else
3477 : d = m;
3478 : to form a fma in the then block and sink the multiplication to the
3479 : else block. */
3480 23778 : if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3481 5656 : return false;
3482 :
3483 : /* A negate on the multiplication leads to FNMA. */
3484 22927 : if (is_gimple_assign (use_stmt)
3485 22927 : && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3486 : {
3487 706 : ssa_op_iter iter;
3488 706 : use_operand_p usep;
3489 :
3490 : /* If (due to earlier missed optimizations) we have two
3491 : negates of the same value, treat them as equivalent
3492 : to a single negate with multiple uses. */
3493 706 : if (seen_negate_p)
3494 0 : return false;
3495 :
3496 706 : result = gimple_assign_lhs (use_stmt);
3497 :
3498 : /* Make sure the negate statement becomes dead with this
3499 : single transformation. */
3500 706 : if (!single_imm_use (gimple_assign_lhs (use_stmt),
3501 : &use_p, &neguse_stmt))
3502 : return false;
3503 :
3504 : /* Make sure the multiplication isn't also used on that stmt. */
3505 2836 : FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3506 1424 : if (USE_FROM_PTR (usep) == mul_result)
3507 : return false;
3508 :
3509 : /* Re-validate. */
3510 706 : use_stmt = neguse_stmt;
3511 706 : if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3512 : return false;
3513 :
3514 706 : negate_p = seen_negate_p = true;
3515 : }
3516 :
3517 22927 : tree cond, else_value, ops[3], len, bias;
3518 22927 : tree_code code;
3519 22927 : if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3520 : &else_value, &len, &bias))
3521 : return false;
3522 :
3523 : /* The multiplication result must be one of the addition operands. */
3524 20363 : if (ops[0] != result && ops[1] != result)
3525 : return false;
3526 :
3527 19786 : switch (code)
3528 : {
3529 5775 : case MINUS_EXPR:
3530 5775 : if (ops[1] == result)
3531 2859 : negate_p = !negate_p;
3532 : break;
3533 : case PLUS_EXPR:
3534 : break;
3535 : default:
3536 : /* FMA can only be formed from PLUS and MINUS. */
3537 : return false;
3538 : }
3539 :
3540 18144 : if (len)
3541 : {
3542 : /* For COND_LEN_* operations, we may have dummpy mask which is
3543 : the all true mask. Such TREE type may be mul_cond != cond
3544 : but we still consider they are equal. */
3545 0 : if (mul_cond && cond != mul_cond
3546 0 : && !(integer_truep (mul_cond) && integer_truep (cond)))
3547 0 : return false;
3548 :
3549 0 : if (else_value == result)
3550 : return false;
3551 :
3552 0 : if (!direct_internal_fn_supported_p (IFN_COND_LEN_FMA, type,
3553 : opt_type))
3554 : return false;
3555 :
3556 0 : if (mul_len)
3557 : {
3558 0 : poly_int64 mul_value, value;
3559 0 : if (poly_int_tree_p (mul_len, &mul_value)
3560 0 : && poly_int_tree_p (len, &value)
3561 0 : && maybe_ne (mul_value, value))
3562 0 : return false;
3563 0 : else if (mul_len != len)
3564 : return false;
3565 :
3566 0 : if (wi::to_widest (mul_bias) != wi::to_widest (bias))
3567 : return false;
3568 : }
3569 : }
3570 : else
3571 : {
3572 18144 : if (mul_cond && cond != mul_cond)
3573 : return false;
3574 :
3575 18132 : if (cond)
3576 : {
3577 104 : if (cond == result || else_value == result)
3578 : return false;
3579 94 : if (!direct_internal_fn_supported_p (IFN_COND_FMA, type,
3580 : opt_type))
3581 : return false;
3582 : }
3583 : }
3584 :
3585 : /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3586 : we'll visit later, we might be able to get a more profitable
3587 : match with fnma.
3588 : OTOH, if we don't, a negate / fma pair has likely lower latency
3589 : that a mult / subtract pair. */
3590 18122 : if (code == MINUS_EXPR
3591 5769 : && !negate_p
3592 2210 : && ops[0] == result
3593 2210 : && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3594 0 : && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3595 0 : && TREE_CODE (ops[1]) == SSA_NAME
3596 18122 : && has_single_use (ops[1]))
3597 : {
3598 0 : gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3599 0 : if (is_gimple_assign (stmt2)
3600 0 : && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3601 : return false;
3602 : }
3603 :
3604 : /* We can't handle a * b + a * b. */
3605 18122 : if (ops[0] == ops[1])
3606 : return false;
3607 : /* If deferring, make sure we are not looking at an instruction that
3608 : wouldn't have existed if we were not. */
3609 18122 : if (state->m_deferring_p
3610 18122 : && (state->m_mul_result_set.contains (ops[0])
3611 6424 : || state->m_mul_result_set.contains (ops[1])))
3612 0 : return false;
3613 :
3614 18122 : if (check_defer)
3615 : {
3616 6276 : tree use_lhs = gimple_get_lhs (use_stmt);
3617 6276 : if (state->m_last_result)
3618 : {
3619 1026 : if (ops[1] == state->m_last_result
3620 1026 : || ops[0] == state->m_last_result)
3621 : defer = true;
3622 : else
3623 6276 : defer = false;
3624 : }
3625 : else
3626 : {
3627 5250 : gcc_checking_assert (!state->m_initial_phi);
3628 5250 : gphi *phi;
3629 5250 : if (ops[0] == result)
3630 3292 : phi = result_of_phi (ops[1]);
3631 : else
3632 : {
3633 1958 : gcc_assert (ops[1] == result);
3634 1958 : phi = result_of_phi (ops[0]);
3635 : }
3636 :
3637 : if (phi)
3638 : {
3639 940 : state->m_initial_phi = phi;
3640 940 : defer = true;
3641 : }
3642 : else
3643 : defer = false;
3644 : }
3645 :
3646 6276 : state->m_last_result = use_lhs;
3647 6276 : check_defer = false;
3648 : }
3649 : else
3650 : defer = false;
3651 :
3652 : /* While it is possible to validate whether or not the exact form that
3653 : we've recognized is available in the backend, the assumption is that
3654 : if the deferring logic above did not trigger, the transformation is
3655 : never a loss. For instance, suppose the target only has the plain FMA
3656 : pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3657 : MUL+SUB for FMA+NEG, which is still two operations. Consider
3658 : -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3659 : form the two NEGs are independent and could be run in parallel. */
3660 5656 : }
3661 :
3662 17590 : if (defer)
3663 : {
3664 1012 : fma_transformation_info fti;
3665 1012 : fti.mul_stmt = mul_stmt;
3666 1012 : fti.mul_result = mul_result;
3667 1012 : fti.op1 = op1;
3668 1012 : fti.op2 = op2;
3669 1012 : state->m_candidates.safe_push (fti);
3670 1012 : state->m_mul_result_set.add (mul_result);
3671 :
3672 1012 : if (dump_file && (dump_flags & TDF_DETAILS))
3673 : {
3674 0 : fprintf (dump_file, "Deferred generating FMA for multiplication ");
3675 0 : print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3676 0 : fprintf (dump_file, "\n");
3677 : }
3678 :
3679 1012 : return false;
3680 : }
3681 : else
3682 : {
3683 16578 : if (state->m_deferring_p)
3684 4914 : cancel_fma_deferring (state);
3685 16578 : convert_mult_to_fma_1 (mul_result, op1, op2);
3686 16578 : return true;
3687 : }
3688 : }
3689 :
3690 :
3691 : /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3692 : a check for non-zero like:
3693 : _1 = x_4(D) * y_5(D);
3694 : *res_7(D) = _1;
3695 : if (x_4(D) != 0)
3696 : goto <bb 3>; [50.00%]
3697 : else
3698 : goto <bb 4>; [50.00%]
3699 :
3700 : <bb 3> [local count: 536870913]:
3701 : _2 = _1 / x_4(D);
3702 : _9 = _2 != y_5(D);
3703 : _10 = (int) _9;
3704 :
3705 : <bb 4> [local count: 1073741824]:
3706 : # iftmp.0_3 = PHI <_10(3), 0(2)>
3707 : then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3708 : optimize the x_4(D) != 0 condition to 1. */
3709 :
3710 : static void
3711 145 : maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3712 : gimple *div_stmt, bool *cfg_changed)
3713 : {
3714 145 : basic_block bb = gimple_bb (cond_stmt);
3715 290 : if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3716 51 : return;
3717 145 : edge pred_edge = single_pred_edge (bb);
3718 145 : basic_block pred_bb = pred_edge->src;
3719 145 : if (EDGE_COUNT (pred_bb->succs) != 2)
3720 : return;
3721 102 : edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3722 102 : edge other_succ_edge = NULL;
3723 102 : if (gimple_code (cond_stmt) == GIMPLE_COND)
3724 : {
3725 48 : if (EDGE_COUNT (bb->succs) != 2)
3726 : return;
3727 48 : other_succ_edge = EDGE_SUCC (bb, 0);
3728 48 : if (gimple_cond_code (cond_stmt) == NE_EXPR)
3729 : {
3730 24 : if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3731 24 : other_succ_edge = EDGE_SUCC (bb, 1);
3732 : }
3733 : else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3734 48 : other_succ_edge = EDGE_SUCC (bb, 0);
3735 48 : if (other_edge->dest != other_succ_edge->dest)
3736 : return;
3737 : }
3738 105 : else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3739 : return;
3740 202 : gcond *zero_cond = safe_dyn_cast <gcond *> (*gsi_last_bb (pred_bb));
3741 101 : if (zero_cond == NULL
3742 101 : || (gimple_cond_code (zero_cond)
3743 101 : != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3744 101 : || !integer_zerop (gimple_cond_rhs (zero_cond)))
3745 0 : return;
3746 101 : tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3747 101 : if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3748 : return;
3749 101 : if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3750 : {
3751 : /* Allow the divisor to be result of a same precision cast
3752 : from zero_cond_lhs. */
3753 53 : tree rhs2 = gimple_assign_rhs2 (div_stmt);
3754 53 : if (TREE_CODE (rhs2) != SSA_NAME)
3755 : return;
3756 53 : gimple *g = SSA_NAME_DEF_STMT (rhs2);
3757 53 : if (!gimple_assign_cast_p (g)
3758 53 : || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3759 53 : || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3760 106 : || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3761 53 : != TYPE_PRECISION (TREE_TYPE (rhs2))))
3762 : return;
3763 : }
3764 101 : gimple_stmt_iterator gsi = gsi_after_labels (bb);
3765 101 : mul_stmts.quick_push (div_stmt);
3766 101 : if (is_gimple_debug (gsi_stmt (gsi)))
3767 0 : gsi_next_nondebug (&gsi);
3768 : unsigned cast_count = 0;
3769 635 : while (gsi_stmt (gsi) != cond_stmt)
3770 : {
3771 : /* If original mul_stmt has a single use, allow it in the same bb,
3772 : we are looking then just at __builtin_mul_overflow_p.
3773 : Though, in that case the original mul_stmt will be replaced
3774 : by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3775 : gimple *mul_stmt;
3776 : unsigned int i;
3777 2274 : bool ok = false;
3778 2274 : FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3779 : {
3780 2127 : if (gsi_stmt (gsi) == mul_stmt)
3781 : {
3782 : ok = true;
3783 : break;
3784 : }
3785 : }
3786 534 : if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3787 : ok = true;
3788 387 : if (!ok)
3789 51 : return;
3790 534 : gsi_next_nondebug (&gsi);
3791 : }
3792 101 : if (gimple_code (cond_stmt) == GIMPLE_COND)
3793 : {
3794 47 : basic_block succ_bb = other_edge->dest;
3795 75 : for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3796 28 : gsi_next (&gpi))
3797 : {
3798 35 : gphi *phi = gpi.phi ();
3799 35 : tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3800 35 : tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3801 35 : if (!operand_equal_p (v1, v2, 0))
3802 7 : return;
3803 : }
3804 : }
3805 : else
3806 : {
3807 54 : tree lhs = gimple_assign_lhs (cond_stmt);
3808 54 : if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3809 : return;
3810 54 : gsi_next_nondebug (&gsi);
3811 54 : if (!gsi_end_p (gsi))
3812 : {
3813 54 : if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3814 : return;
3815 54 : gimple *cast_stmt = gsi_stmt (gsi);
3816 54 : if (!gimple_assign_cast_p (cast_stmt))
3817 : return;
3818 54 : tree new_lhs = gimple_assign_lhs (cast_stmt);
3819 54 : gsi_next_nondebug (&gsi);
3820 54 : if (!gsi_end_p (gsi)
3821 54 : || !new_lhs
3822 54 : || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3823 108 : || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3824 : return;
3825 : lhs = new_lhs;
3826 : }
3827 54 : edge succ_edge = single_succ_edge (bb);
3828 54 : basic_block succ_bb = succ_edge->dest;
3829 54 : gsi = gsi_start_phis (succ_bb);
3830 54 : if (gsi_end_p (gsi))
3831 : return;
3832 54 : gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3833 54 : gsi_next (&gsi);
3834 54 : if (!gsi_end_p (gsi))
3835 : return;
3836 54 : if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3837 : return;
3838 54 : tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3839 54 : if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3840 : {
3841 0 : tree cond = gimple_assign_rhs1 (cond_stmt);
3842 0 : if (TREE_CODE (cond) == NE_EXPR)
3843 : {
3844 0 : if (!operand_equal_p (other_val,
3845 0 : gimple_assign_rhs3 (cond_stmt), 0))
3846 : return;
3847 : }
3848 0 : else if (!operand_equal_p (other_val,
3849 0 : gimple_assign_rhs2 (cond_stmt), 0))
3850 : return;
3851 : }
3852 54 : else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3853 : {
3854 25 : if (!integer_zerop (other_val))
3855 : return;
3856 : }
3857 29 : else if (!integer_onep (other_val))
3858 : return;
3859 : }
3860 94 : if (pred_edge->flags & EDGE_TRUE_VALUE)
3861 41 : gimple_cond_make_true (zero_cond);
3862 : else
3863 53 : gimple_cond_make_false (zero_cond);
3864 94 : update_stmt (zero_cond);
3865 94 : reset_flow_sensitive_info_in_bb (bb);
3866 94 : *cfg_changed = true;
3867 : }
3868 :
3869 : /* Helper function for arith_overflow_check_p. Return true
3870 : if VAL1 is equal to VAL2 cast to corresponding integral type
3871 : with other signedness or vice versa. */
3872 :
3873 : static bool
3874 382 : arith_cast_equal_p (tree val1, tree val2)
3875 : {
3876 382 : if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3877 65 : return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3878 317 : else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3879 : return false;
3880 280 : if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3881 280 : && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3882 : return true;
3883 168 : if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3884 168 : && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3885 120 : return true;
3886 : return false;
3887 : }
3888 :
3889 : /* Helper function of match_arith_overflow. Return 1
3890 : if USE_STMT is unsigned overflow check ovf != 0 for
3891 : STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3892 : and 0 otherwise. */
3893 :
3894 : static int
3895 2947352 : arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3896 : tree maxval, tree *other)
3897 : {
3898 2947352 : enum tree_code ccode = ERROR_MARK;
3899 2947352 : tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3900 2947352 : enum tree_code code = gimple_assign_rhs_code (stmt);
3901 5859385 : tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3902 2947352 : tree rhs1 = gimple_assign_rhs1 (stmt);
3903 2947352 : tree rhs2 = gimple_assign_rhs2 (stmt);
3904 2947352 : tree multop = NULL_TREE, divlhs = NULL_TREE;
3905 2947352 : gimple *cur_use_stmt = use_stmt;
3906 :
3907 2947352 : if (code == MULT_EXPR)
3908 : {
3909 677081 : if (!is_gimple_assign (use_stmt))
3910 676785 : return 0;
3911 532856 : if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3912 : return 0;
3913 2379 : if (gimple_assign_rhs1 (use_stmt) != lhs)
3914 : return 0;
3915 2316 : if (cast_stmt)
3916 : {
3917 155 : if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3918 : multop = rhs2;
3919 81 : else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3920 : multop = rhs1;
3921 : else
3922 : return 0;
3923 : }
3924 2161 : else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3925 : multop = rhs2;
3926 2083 : else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3927 : multop = rhs1;
3928 : else
3929 : return 0;
3930 300 : if (stmt_ends_bb_p (use_stmt))
3931 : return 0;
3932 300 : divlhs = gimple_assign_lhs (use_stmt);
3933 300 : if (!divlhs)
3934 : return 0;
3935 300 : use_operand_p use;
3936 300 : if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3937 : return 0;
3938 296 : if (cast_stmt && gimple_assign_cast_p (cur_use_stmt))
3939 : {
3940 4 : tree cast_lhs = gimple_assign_lhs (cur_use_stmt);
3941 8 : if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3942 4 : && TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3943 4 : && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3944 4 : == TYPE_PRECISION (TREE_TYPE (divlhs)))
3945 8 : && single_imm_use (cast_lhs, &use, &cur_use_stmt))
3946 : {
3947 : cast_stmt = NULL;
3948 : divlhs = cast_lhs;
3949 : }
3950 : else
3951 0 : return 0;
3952 : }
3953 : }
3954 2270567 : if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3955 : {
3956 577273 : ccode = gimple_cond_code (cur_use_stmt);
3957 577273 : crhs1 = gimple_cond_lhs (cur_use_stmt);
3958 577273 : crhs2 = gimple_cond_rhs (cur_use_stmt);
3959 : }
3960 1693294 : else if (is_gimple_assign (cur_use_stmt))
3961 : {
3962 823270 : if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3963 : {
3964 496207 : ccode = gimple_assign_rhs_code (cur_use_stmt);
3965 496207 : crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3966 496207 : crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3967 : }
3968 327063 : else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3969 : {
3970 4877 : tree cond = gimple_assign_rhs1 (cur_use_stmt);
3971 4877 : if (COMPARISON_CLASS_P (cond))
3972 : {
3973 0 : ccode = TREE_CODE (cond);
3974 0 : crhs1 = TREE_OPERAND (cond, 0);
3975 0 : crhs2 = TREE_OPERAND (cond, 1);
3976 : }
3977 : else
3978 : return 0;
3979 : }
3980 : else
3981 : return 0;
3982 : }
3983 : else
3984 : return 0;
3985 :
3986 1073480 : if (maxval
3987 1073480 : && ccode == RSHIFT_EXPR
3988 97 : && crhs1 == lhs
3989 17 : && TREE_CODE (crhs2) == INTEGER_CST
3990 1073497 : && wi::to_widest (crhs2) == TYPE_PRECISION (TREE_TYPE (maxval)))
3991 : {
3992 16 : tree shiftlhs = gimple_assign_lhs (use_stmt);
3993 16 : if (!shiftlhs)
3994 : return 0;
3995 16 : use_operand_p use;
3996 16 : if (!single_imm_use (shiftlhs, &use, &cur_use_stmt))
3997 : return 0;
3998 12 : if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3999 : {
4000 0 : ccode = gimple_cond_code (cur_use_stmt);
4001 0 : crhs1 = gimple_cond_lhs (cur_use_stmt);
4002 0 : crhs2 = gimple_cond_rhs (cur_use_stmt);
4003 : }
4004 12 : else if (is_gimple_assign (cur_use_stmt))
4005 : {
4006 12 : if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
4007 : {
4008 0 : ccode = gimple_assign_rhs_code (cur_use_stmt);
4009 0 : crhs1 = gimple_assign_rhs1 (cur_use_stmt);
4010 0 : crhs2 = gimple_assign_rhs2 (cur_use_stmt);
4011 : }
4012 12 : else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
4013 : {
4014 0 : tree cond = gimple_assign_rhs1 (cur_use_stmt);
4015 0 : if (COMPARISON_CLASS_P (cond))
4016 : {
4017 0 : ccode = TREE_CODE (cond);
4018 0 : crhs1 = TREE_OPERAND (cond, 0);
4019 0 : crhs2 = TREE_OPERAND (cond, 1);
4020 : }
4021 : else
4022 : return 0;
4023 : }
4024 : else
4025 : {
4026 12 : enum tree_code sc = gimple_assign_rhs_code (cur_use_stmt);
4027 12 : tree castlhs = gimple_assign_lhs (cur_use_stmt);
4028 12 : if (!CONVERT_EXPR_CODE_P (sc)
4029 12 : || !castlhs
4030 12 : || !INTEGRAL_TYPE_P (TREE_TYPE (castlhs))
4031 24 : || (TYPE_PRECISION (TREE_TYPE (castlhs))
4032 12 : > TYPE_PRECISION (TREE_TYPE (maxval))))
4033 : return 0;
4034 : return 1;
4035 : }
4036 : }
4037 : else
4038 : return 0;
4039 0 : if ((ccode != EQ_EXPR && ccode != NE_EXPR)
4040 0 : || crhs1 != shiftlhs
4041 0 : || !integer_zerop (crhs2))
4042 0 : return 0;
4043 : return 1;
4044 : }
4045 :
4046 1073464 : if (TREE_CODE_CLASS (ccode) != tcc_comparison)
4047 : return 0;
4048 :
4049 613606 : switch (ccode)
4050 : {
4051 115622 : case GT_EXPR:
4052 115622 : case LE_EXPR:
4053 115622 : if (maxval)
4054 : {
4055 : /* r = a + b; r > maxval or r <= maxval */
4056 45 : if (crhs1 == lhs
4057 44 : && TREE_CODE (crhs2) == INTEGER_CST
4058 67 : && tree_int_cst_equal (crhs2, maxval))
4059 12 : return ccode == GT_EXPR ? 1 : -1;
4060 : break;
4061 : }
4062 : /* r = a - b; r > a or r <= a
4063 : r = a + b; a > r or a <= r or b > r or b <= r. */
4064 115577 : if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
4065 115513 : || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
4066 8785 : && crhs2 == lhs))
4067 9189 : return ccode == GT_EXPR ? 1 : -1;
4068 : /* r = ~a; b > r or b <= r. */
4069 106728 : if (code == BIT_NOT_EXPR && crhs2 == lhs)
4070 : {
4071 190 : if (other)
4072 95 : *other = crhs1;
4073 222 : return ccode == GT_EXPR ? 1 : -1;
4074 : }
4075 : break;
4076 61820 : case LT_EXPR:
4077 61820 : case GE_EXPR:
4078 61820 : if (maxval)
4079 : break;
4080 : /* r = a - b; a < r or a >= r
4081 : r = a + b; r < a or r >= a or r < b or r >= b. */
4082 61814 : if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
4083 61676 : || (code == PLUS_EXPR && crhs1 == lhs
4084 29878 : && (crhs2 == rhs1 || crhs2 == rhs2)))
4085 4089 : return ccode == LT_EXPR ? 1 : -1;
4086 : /* r = ~a; r < b or r >= b. */
4087 57767 : if (code == BIT_NOT_EXPR && crhs1 == lhs)
4088 : {
4089 167 : if (other)
4090 92 : *other = crhs2;
4091 219 : return ccode == LT_EXPR ? 1 : -1;
4092 : }
4093 : break;
4094 436164 : case EQ_EXPR:
4095 436164 : case NE_EXPR:
4096 : /* r = a * b; _1 = r / a; _1 == b
4097 : r = a * b; _1 = r / b; _1 == a
4098 : r = a * b; _1 = r / a; _1 != b
4099 : r = a * b; _1 = r / b; _1 != a. */
4100 436164 : if (code == MULT_EXPR)
4101 : {
4102 293 : if (cast_stmt)
4103 : {
4104 146 : if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
4105 146 : || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
4106 : {
4107 146 : use_stmt = cur_use_stmt;
4108 216 : return ccode == NE_EXPR ? 1 : -1;
4109 : }
4110 : }
4111 96 : else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
4112 147 : || (crhs2 == divlhs && crhs1 == multop))
4113 : {
4114 147 : use_stmt = cur_use_stmt;
4115 223 : return ccode == NE_EXPR ? 1 : -1;
4116 : }
4117 : }
4118 : break;
4119 : default:
4120 : break;
4121 : }
4122 : return 0;
4123 : }
4124 :
4125 : extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
4126 : extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
4127 : extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
4128 : extern bool gimple_unsigned_integer_sat_mul (tree, tree*, tree (*)(tree));
4129 :
4130 : extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
4131 : extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
4132 : extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree));
4133 :
4134 : static void
4135 159 : build_saturation_binary_arith_call_and_replace (gimple_stmt_iterator *gsi,
4136 : internal_fn fn, tree lhs,
4137 : tree op_0, tree op_1)
4138 : {
4139 159 : if (direct_internal_fn_supported_p (fn, TREE_TYPE (op_0), OPTIMIZE_FOR_BOTH))
4140 : {
4141 157 : gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
4142 157 : gimple_call_set_lhs (call, lhs);
4143 157 : gsi_replace (gsi, call, /* update_eh_info */ true);
4144 : }
4145 159 : }
4146 :
4147 : static bool
4148 51 : build_saturation_binary_arith_call_and_insert (gimple_stmt_iterator *gsi,
4149 : internal_fn fn, tree lhs,
4150 : tree op_0, tree op_1)
4151 : {
4152 51 : if (!direct_internal_fn_supported_p (fn, TREE_TYPE (op_0), OPTIMIZE_FOR_BOTH))
4153 : return false;
4154 :
4155 43 : gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
4156 43 : gimple_call_set_lhs (call, lhs);
4157 43 : gsi_insert_before (gsi, call, GSI_SAME_STMT);
4158 :
4159 43 : return true;
4160 : }
4161 :
4162 : /*
4163 : * Try to match saturation unsigned add with assign.
4164 : * _7 = _4 + _6;
4165 : * _8 = _4 > _7;
4166 : * _9 = (long unsigned int) _8;
4167 : * _10 = -_9;
4168 : * _12 = _7 | _10;
4169 : * =>
4170 : * _12 = .SAT_ADD (_4, _6);
4171 : *
4172 : * Try to match IMM=-1 saturation signed add with assign.
4173 : * <bb 2> [local count: 1073741824]:
4174 : * x.0_1 = (unsigned char) x_5(D);
4175 : * _3 = -x.0_1;
4176 : * _10 = (signed char) _3;
4177 : * _8 = x_5(D) & _10;
4178 : * if (_8 < 0)
4179 : * goto <bb 4>; [1.40%]
4180 : * else
4181 : * goto <bb 3>; [98.60%]
4182 : * <bb 3> [local count: 434070867]:
4183 : * _2 = x.0_1 + 255;
4184 : * <bb 4> [local count: 1073741824]:
4185 : * # _9 = PHI <_2(3), 128(2)>
4186 : * _4 = (int8_t) _9;
4187 : * =>
4188 : * _4 = .SAT_ADD (x_5, -1); */
4189 :
4190 : static void
4191 4858975 : match_saturation_add_with_assign (gimple_stmt_iterator *gsi, gassign *stmt)
4192 : {
4193 4858975 : tree ops[2];
4194 4858975 : tree lhs = gimple_assign_lhs (stmt);
4195 :
4196 4858975 : if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
4197 4858975 : || gimple_signed_integer_sat_add (lhs, ops, NULL))
4198 34 : build_saturation_binary_arith_call_and_replace (gsi, IFN_SAT_ADD, lhs,
4199 : ops[0], ops[1]);
4200 4858975 : }
4201 :
4202 : /*
4203 : * Try to match saturation add with PHI.
4204 : * For unsigned integer:
4205 : * <bb 2> :
4206 : * _1 = x_3(D) + y_4(D);
4207 : * if (_1 >= x_3(D))
4208 : * goto <bb 3>; [INV]
4209 : * else
4210 : * goto <bb 4>; [INV]
4211 : *
4212 : * <bb 3> :
4213 : *
4214 : * <bb 4> :
4215 : * # _2 = PHI <255(2), _1(3)>
4216 : * =>
4217 : * <bb 4> [local count: 1073741824]:
4218 : * _2 = .SAT_ADD (x_4(D), y_5(D));
4219 : *
4220 : * For signed integer:
4221 : * x.0_1 = (long unsigned int) x_7(D);
4222 : * y.1_2 = (long unsigned int) y_8(D);
4223 : * _3 = x.0_1 + y.1_2;
4224 : * sum_9 = (int64_t) _3;
4225 : * _4 = x_7(D) ^ y_8(D);
4226 : * _5 = x_7(D) ^ sum_9;
4227 : * _15 = ~_4;
4228 : * _16 = _5 & _15;
4229 : * if (_16 < 0)
4230 : * goto <bb 3>; [41.00%]
4231 : * else
4232 : * goto <bb 4>; [59.00%]
4233 : * _11 = x_7(D) < 0;
4234 : * _12 = (long int) _11;
4235 : * _13 = -_12;
4236 : * _14 = _13 ^ 9223372036854775807;
4237 : * # _6 = PHI <_14(3), sum_9(2)>
4238 : * =>
4239 : * _6 = .SAT_ADD (x_5(D), y_6(D)); [tail call] */
4240 :
4241 : static bool
4242 4205787 : match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
4243 : {
4244 4205787 : if (gimple_phi_num_args (phi) != 2)
4245 : return false;
4246 :
4247 3329358 : tree ops[2];
4248 3329358 : tree phi_result = gimple_phi_result (phi);
4249 :
4250 3329358 : if (!gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
4251 3329358 : && !gimple_signed_integer_sat_add (phi_result, ops, NULL))
4252 : return false;
4253 :
4254 21 : if (!TYPE_UNSIGNED (TREE_TYPE (ops[0])) && TREE_CODE (ops[1]) == INTEGER_CST)
4255 0 : ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4256 :
4257 21 : return build_saturation_binary_arith_call_and_insert (gsi, IFN_SAT_ADD,
4258 : phi_result, ops[0],
4259 21 : ops[1]);
4260 : }
4261 :
4262 : /*
4263 : * Try to match saturation unsigned sub.
4264 : * _1 = _4 >= _5;
4265 : * _3 = _4 - _5;
4266 : * _6 = _1 ? _3 : 0;
4267 : * =>
4268 : * _6 = .SAT_SUB (_4, _5); */
4269 :
4270 : static void
4271 3332693 : match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
4272 : {
4273 3332693 : tree ops[2];
4274 3332693 : tree lhs = gimple_assign_lhs (stmt);
4275 :
4276 3332693 : if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL))
4277 125 : build_saturation_binary_arith_call_and_replace (gsi, IFN_SAT_SUB, lhs,
4278 : ops[0], ops[1]);
4279 3332693 : }
4280 :
4281 : /*
4282 : * Try to match saturation unsigned mul.
4283 : * _1 = (unsigned int) a_6(D);
4284 : * _2 = (unsigned int) b_7(D);
4285 : * x_8 = _1 * _2;
4286 : * overflow_9 = x_8 > 255;
4287 : * _3 = (unsigned char) overflow_9;
4288 : * _4 = -_3;
4289 : * _5 = (unsigned char) x_8;
4290 : * _10 = _4 | _5;
4291 : * =>
4292 : * _10 = .SAT_SUB (a_6, b_7); */
4293 :
4294 : static void
4295 2575614 : match_unsigned_saturation_mul (gimple_stmt_iterator *gsi, gassign *stmt)
4296 : {
4297 2575614 : tree ops[2];
4298 2575614 : tree lhs = gimple_assign_lhs (stmt);
4299 :
4300 2575614 : if (gimple_unsigned_integer_sat_mul (lhs, ops, NULL))
4301 0 : build_saturation_binary_arith_call_and_replace (gsi, IFN_SAT_MUL, lhs,
4302 : ops[0], ops[1]);
4303 2575614 : }
4304 :
4305 : /* Try to match saturation unsigned mul, aka:
4306 : _6 = .MUL_OVERFLOW (a_4(D), b_5(D));
4307 : _2 = IMAGPART_EXPR <_6>;
4308 : if (_2 != 0)
4309 : goto <bb 4>; [35.00%]
4310 : else
4311 : goto <bb 3>; [65.00%]
4312 :
4313 : <bb 3> [local count: 697932184]:
4314 : _1 = REALPART_EXPR <_6>;
4315 :
4316 : <bb 4> [local count: 1073741824]:
4317 : # _3 = PHI <18446744073709551615(2), _1(3)>
4318 : =>
4319 : _3 = .SAT_MUL (a_4(D), b_5(D)); */
4320 :
4321 : static bool
4322 4205744 : match_saturation_mul (gimple_stmt_iterator *gsi, gphi *phi)
4323 : {
4324 4205744 : if (gimple_phi_num_args (phi) != 2)
4325 : return false;
4326 :
4327 3329315 : tree ops[2];
4328 3329315 : tree phi_result = gimple_phi_result (phi);
4329 :
4330 3329315 : if (!gimple_unsigned_integer_sat_mul (phi_result, ops, NULL))
4331 : return false;
4332 :
4333 0 : return build_saturation_binary_arith_call_and_insert (gsi, IFN_SAT_MUL,
4334 : phi_result, ops[0],
4335 0 : ops[1]);
4336 : }
4337 :
4338 : /*
4339 : * Try to match saturation unsigned sub.
4340 : * <bb 2> [local count: 1073741824]:
4341 : * if (x_2(D) > y_3(D))
4342 : * goto <bb 3>; [50.00%]
4343 : * else
4344 : * goto <bb 4>; [50.00%]
4345 : *
4346 : * <bb 3> [local count: 536870912]:
4347 : * _4 = x_2(D) - y_3(D);
4348 : *
4349 : * <bb 4> [local count: 1073741824]:
4350 : * # _1 = PHI <0(2), _4(3)>
4351 : * =>
4352 : * <bb 4> [local count: 1073741824]:
4353 : * _1 = .SAT_SUB (x_2(D), y_3(D)); */
4354 : static bool
4355 4205770 : match_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
4356 : {
4357 4205770 : if (gimple_phi_num_args (phi) != 2)
4358 : return false;
4359 :
4360 3329341 : tree ops[2];
4361 3329341 : tree phi_result = gimple_phi_result (phi);
4362 :
4363 3329341 : if (!gimple_unsigned_integer_sat_sub (phi_result, ops, NULL)
4364 3329341 : && !gimple_signed_integer_sat_sub (phi_result, ops, NULL))
4365 : return false;
4366 :
4367 30 : return build_saturation_binary_arith_call_and_insert (gsi, IFN_SAT_SUB,
4368 : phi_result, ops[0],
4369 30 : ops[1]);
4370 : }
4371 :
4372 : /*
4373 : * Try to match saturation unsigned sub.
4374 : * uint16_t x_4(D);
4375 : * uint8_t _6;
4376 : * overflow_5 = x_4(D) > 255;
4377 : * _1 = (unsigned char) x_4(D);
4378 : * _2 = (unsigned char) overflow_5;
4379 : * _3 = -_2;
4380 : * _6 = _1 | _3;
4381 : * =>
4382 : * _6 = .SAT_TRUNC (x_4(D));
4383 : * */
4384 : static void
4385 2575614 : match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
4386 : {
4387 2575614 : tree ops[1];
4388 2575614 : tree lhs = gimple_assign_lhs (stmt);
4389 2575614 : tree type = TREE_TYPE (lhs);
4390 :
4391 2575614 : if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
4392 2575714 : && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
4393 100 : tree_pair (type, TREE_TYPE (ops[0])),
4394 : OPTIMIZE_FOR_BOTH))
4395 : {
4396 73 : gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
4397 73 : gimple_call_set_lhs (call, lhs);
4398 73 : gsi_replace (gsi, call, /* update_eh_info */ true);
4399 : }
4400 2575614 : }
4401 :
4402 : /*
4403 : * Try to match saturation truncate.
4404 : * Aka:
4405 : * x.0_1 = (unsigned long) x_4(D);
4406 : * _2 = x.0_1 + 2147483648;
4407 : * if (_2 > 4294967295)
4408 : * goto <bb 4>; [50.00%]
4409 : * else
4410 : * goto <bb 3>; [50.00%]
4411 : * ;; succ: 4
4412 : * ;; 3
4413 : *
4414 : * ;; basic block 3, loop depth 0
4415 : * ;; pred: 2
4416 : * trunc_5 = (int32_t) x_4(D);
4417 : * goto <bb 5>; [100.00%]
4418 : * ;; succ: 5
4419 : *
4420 : * ;; basic block 4, loop depth 0
4421 : * ;; pred: 2
4422 : * _7 = x_4(D) < 0;
4423 : * _8 = (int) _7;
4424 : * _9 = -_8;
4425 : * _10 = _9 ^ 2147483647;
4426 : * ;; succ: 5
4427 : *
4428 : * ;; basic block 5, loop depth 0
4429 : * ;; pred: 3
4430 : * ;; 4
4431 : * # _3 = PHI <trunc_5(3), _10(4)>
4432 : * =>
4433 : * _6 = .SAT_TRUNC (x_4(D));
4434 : */
4435 :
4436 : static bool
4437 4205744 : match_saturation_trunc (gimple_stmt_iterator *gsi, gphi *phi)
4438 : {
4439 4205744 : if (gimple_phi_num_args (phi) != 2)
4440 : return false;
4441 :
4442 3329315 : tree ops[1];
4443 3329315 : tree phi_result = gimple_phi_result (phi);
4444 3329315 : tree type = TREE_TYPE (phi_result);
4445 :
4446 3329315 : if (!gimple_unsigned_integer_sat_trunc (phi_result, ops, NULL)
4447 3329315 : && !gimple_signed_integer_sat_trunc (phi_result, ops, NULL))
4448 : return false;
4449 :
4450 0 : if (!direct_internal_fn_supported_p (IFN_SAT_TRUNC,
4451 0 : tree_pair (type, TREE_TYPE (ops[0])),
4452 : OPTIMIZE_FOR_BOTH))
4453 : return false;
4454 :
4455 0 : gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
4456 0 : gimple_call_set_lhs (call, phi_result);
4457 0 : gsi_insert_before (gsi, call, GSI_SAME_STMT);
4458 :
4459 0 : return true;
4460 : }
4461 :
4462 : /* Recognize for unsigned x
4463 : x = y - z;
4464 : if (x > y)
4465 : where there are other uses of x and replace it with
4466 : _7 = .SUB_OVERFLOW (y, z);
4467 : x = REALPART_EXPR <_7>;
4468 : _8 = IMAGPART_EXPR <_7>;
4469 : if (_8)
4470 : and similarly for addition.
4471 :
4472 : Also recognize:
4473 : yc = (type) y;
4474 : zc = (type) z;
4475 : x = yc + zc;
4476 : if (x > max)
4477 : where y and z have unsigned types with maximum max
4478 : and there are other uses of x and all of those cast x
4479 : back to that unsigned type and again replace it with
4480 : _7 = .ADD_OVERFLOW (y, z);
4481 : _9 = REALPART_EXPR <_7>;
4482 : _8 = IMAGPART_EXPR <_7>;
4483 : if (_8)
4484 : and replace (utype) x with _9.
4485 : Or with x >> popcount (max) instead of x > max.
4486 :
4487 : Also recognize:
4488 : x = ~z;
4489 : if (y > x)
4490 : and replace it with
4491 : _7 = .ADD_OVERFLOW (y, z);
4492 : _8 = IMAGPART_EXPR <_7>;
4493 : if (_8)
4494 :
4495 : And also recognize:
4496 : z = x * y;
4497 : if (x != 0)
4498 : goto <bb 3>; [50.00%]
4499 : else
4500 : goto <bb 4>; [50.00%]
4501 :
4502 : <bb 3> [local count: 536870913]:
4503 : _2 = z / x;
4504 : _9 = _2 != y;
4505 : _10 = (int) _9;
4506 :
4507 : <bb 4> [local count: 1073741824]:
4508 : # iftmp.0_3 = PHI <_10(3), 0(2)>
4509 : and replace it with
4510 : _7 = .MUL_OVERFLOW (x, y);
4511 : z = IMAGPART_EXPR <_7>;
4512 : _8 = IMAGPART_EXPR <_7>;
4513 : _9 = _8 != 0;
4514 : iftmp.0_3 = (int) _9; */
4515 :
4516 : static bool
4517 3318811 : match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
4518 : enum tree_code code, bool *cfg_changed)
4519 : {
4520 3318811 : tree lhs = gimple_assign_lhs (stmt);
4521 3318811 : tree type = TREE_TYPE (lhs);
4522 3318811 : use_operand_p use_p;
4523 3318811 : imm_use_iterator iter;
4524 3318811 : bool use_seen = false;
4525 3318811 : bool ovf_use_seen = false;
4526 3318811 : gimple *use_stmt;
4527 3318811 : gimple *add_stmt = NULL;
4528 3318811 : bool add_first = false;
4529 3318811 : gimple *cond_stmt = NULL;
4530 3318811 : gimple *cast_stmt = NULL;
4531 3318811 : tree cast_lhs = NULL_TREE;
4532 :
4533 3318811 : gcc_checking_assert (code == PLUS_EXPR
4534 : || code == MINUS_EXPR
4535 : || code == MULT_EXPR
4536 : || code == BIT_NOT_EXPR);
4537 3318811 : if (!INTEGRAL_TYPE_P (type)
4538 2799244 : || !TYPE_UNSIGNED (type)
4539 1958178 : || has_zero_uses (lhs)
4540 3318811 : || (code != PLUS_EXPR
4541 1957841 : && code != MULT_EXPR
4542 171915 : && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
4543 148022 : TYPE_MODE (type)) == CODE_FOR_nothing))
4544 1362913 : return false;
4545 :
4546 1955898 : tree rhs1 = gimple_assign_rhs1 (stmt);
4547 1955898 : tree rhs2 = gimple_assign_rhs2 (stmt);
4548 7360645 : FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4549 : {
4550 3455190 : use_stmt = USE_STMT (use_p);
4551 3455190 : if (is_gimple_debug (use_stmt))
4552 561854 : continue;
4553 :
4554 2893336 : tree other = NULL_TREE;
4555 2893336 : if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
4556 : {
4557 6833 : if (code == BIT_NOT_EXPR)
4558 : {
4559 187 : gcc_assert (other);
4560 187 : if (TREE_CODE (other) != SSA_NAME)
4561 0 : return false;
4562 187 : if (rhs2 == NULL)
4563 187 : rhs2 = other;
4564 : else
4565 : return false;
4566 187 : cond_stmt = use_stmt;
4567 : }
4568 : ovf_use_seen = true;
4569 : }
4570 : else
4571 : {
4572 2886503 : use_seen = true;
4573 2886503 : if (code == MULT_EXPR
4574 2886503 : && cast_stmt == NULL
4575 2886503 : && gimple_assign_cast_p (use_stmt))
4576 : {
4577 32192 : cast_lhs = gimple_assign_lhs (use_stmt);
4578 64384 : if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
4579 31651 : && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
4580 61289 : && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
4581 29097 : == TYPE_PRECISION (TREE_TYPE (lhs))))
4582 : cast_stmt = use_stmt;
4583 : else
4584 : cast_lhs = NULL_TREE;
4585 : }
4586 : }
4587 2893336 : if (ovf_use_seen && use_seen)
4588 : break;
4589 0 : }
4590 :
4591 1955898 : if (!ovf_use_seen
4592 1955898 : && code == MULT_EXPR
4593 449828 : && cast_stmt)
4594 : {
4595 28713 : if (TREE_CODE (rhs1) != SSA_NAME
4596 28713 : || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
4597 : return false;
4598 93742 : FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
4599 : {
4600 36316 : use_stmt = USE_STMT (use_p);
4601 36316 : if (is_gimple_debug (use_stmt))
4602 1094 : continue;
4603 :
4604 35222 : if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4605 : NULL_TREE, NULL))
4606 36316 : ovf_use_seen = true;
4607 28713 : }
4608 28713 : }
4609 : else
4610 : {
4611 : cast_stmt = NULL;
4612 : cast_lhs = NULL_TREE;
4613 : }
4614 :
4615 1955898 : tree maxval = NULL_TREE;
4616 1955898 : if (!ovf_use_seen
4617 13668 : || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
4618 6459 : || (code == PLUS_EXPR
4619 6189 : && optab_handler (uaddv4_optab,
4620 6189 : TYPE_MODE (type)) == CODE_FOR_nothing)
4621 1969265 : || (code == MULT_EXPR
4622 221 : && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
4623 148 : TYPE_MODE (type)) == CODE_FOR_nothing
4624 3 : && (use_seen
4625 3 : || cast_stmt
4626 0 : || !can_mult_highpart_p (TYPE_MODE (type), true))))
4627 : {
4628 1949294 : if (code != PLUS_EXPR)
4629 : return false;
4630 1353729 : if (TREE_CODE (rhs1) != SSA_NAME
4631 1353729 : || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
4632 : return false;
4633 326256 : rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4634 326256 : tree type1 = TREE_TYPE (rhs1);
4635 326256 : if (!INTEGRAL_TYPE_P (type1)
4636 177044 : || !TYPE_UNSIGNED (type1)
4637 37779 : || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4638 342455 : || (TYPE_PRECISION (type1)
4639 32398 : != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4640 314962 : return false;
4641 11294 : if (TREE_CODE (rhs2) == INTEGER_CST)
4642 : {
4643 4150 : if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4644 4150 : TYPE_PRECISION (type1),
4645 8300 : UNSIGNED), 0))
4646 : return false;
4647 1627 : rhs2 = fold_convert (type1, rhs2);
4648 : }
4649 : else
4650 : {
4651 7144 : if (TREE_CODE (rhs2) != SSA_NAME
4652 7144 : || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4653 : return false;
4654 2920 : rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4655 2920 : tree type2 = TREE_TYPE (rhs2);
4656 2920 : if (!INTEGRAL_TYPE_P (type2)
4657 1193 : || !TYPE_UNSIGNED (type2)
4658 416 : || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4659 3292 : || (TYPE_PRECISION (type2)
4660 744 : != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4661 2561 : return false;
4662 : }
4663 1986 : if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4664 : type = type1;
4665 : else
4666 5 : type = TREE_TYPE (rhs2);
4667 :
4668 1986 : if (TREE_CODE (type) != INTEGER_TYPE
4669 3972 : || optab_handler (uaddv4_optab,
4670 1986 : TYPE_MODE (type)) == CODE_FOR_nothing)
4671 0 : return false;
4672 :
4673 1986 : maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4674 : UNSIGNED));
4675 1986 : ovf_use_seen = false;
4676 1986 : use_seen = false;
4677 1986 : basic_block use_bb = NULL;
4678 4044 : FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4679 : {
4680 1998 : use_stmt = USE_STMT (use_p);
4681 1998 : if (is_gimple_debug (use_stmt))
4682 8 : continue;
4683 :
4684 1990 : if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4685 : {
4686 12 : ovf_use_seen = true;
4687 12 : use_bb = gimple_bb (use_stmt);
4688 : }
4689 : else
4690 : {
4691 1978 : if (!gimple_assign_cast_p (use_stmt)
4692 1978 : || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4693 : return false;
4694 113 : tree use_lhs = gimple_assign_lhs (use_stmt);
4695 226 : if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4696 226 : || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4697 113 : > TYPE_PRECISION (type)))
4698 : return false;
4699 : use_seen = true;
4700 : }
4701 1926 : }
4702 60 : if (!ovf_use_seen)
4703 : return false;
4704 12 : if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4705 : {
4706 2 : if (!use_seen)
4707 : return false;
4708 2 : tree new_rhs1 = make_ssa_name (type);
4709 2 : gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4710 2 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4711 2 : rhs1 = new_rhs1;
4712 : }
4713 10 : else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4714 : {
4715 2 : if (!use_seen)
4716 : return false;
4717 2 : tree new_rhs2 = make_ssa_name (type);
4718 2 : gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4719 2 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4720 2 : rhs2 = new_rhs2;
4721 : }
4722 8 : else if (!use_seen)
4723 : {
4724 : /* If there are no uses of the wider addition, check if
4725 : forwprop has not created a narrower addition.
4726 : Require it to be in the same bb as the overflow check. */
4727 18 : FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4728 : {
4729 10 : use_stmt = USE_STMT (use_p);
4730 10 : if (is_gimple_debug (use_stmt))
4731 0 : continue;
4732 :
4733 10 : if (use_stmt == stmt)
4734 0 : continue;
4735 :
4736 10 : if (!is_gimple_assign (use_stmt)
4737 10 : || gimple_bb (use_stmt) != use_bb
4738 20 : || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4739 2 : continue;
4740 :
4741 8 : if (gimple_assign_rhs1 (use_stmt) == rhs1)
4742 : {
4743 8 : if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4744 : rhs2, 0))
4745 0 : continue;
4746 : }
4747 0 : else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4748 : {
4749 0 : if (gimple_assign_rhs1 (use_stmt) != rhs2)
4750 0 : continue;
4751 : }
4752 : else
4753 0 : continue;
4754 :
4755 8 : add_stmt = use_stmt;
4756 8 : break;
4757 8 : }
4758 8 : if (add_stmt == NULL)
4759 : return false;
4760 :
4761 : /* If stmt and add_stmt are in the same bb, we need to find out
4762 : which one is earlier. If they are in different bbs, we've
4763 : checked add_stmt is in the same bb as one of the uses of the
4764 : stmt lhs, so stmt needs to dominate add_stmt too. */
4765 8 : if (gimple_bb (stmt) == gimple_bb (add_stmt))
4766 : {
4767 8 : gimple_stmt_iterator gsif = *gsi;
4768 8 : gimple_stmt_iterator gsib = *gsi;
4769 8 : int i;
4770 : /* Search both forward and backward from stmt and have a small
4771 : upper bound. */
4772 20 : for (i = 0; i < 128; i++)
4773 : {
4774 20 : if (!gsi_end_p (gsib))
4775 : {
4776 18 : gsi_prev_nondebug (&gsib);
4777 18 : if (gsi_stmt (gsib) == add_stmt)
4778 : {
4779 : add_first = true;
4780 : break;
4781 : }
4782 : }
4783 2 : else if (gsi_end_p (gsif))
4784 : break;
4785 18 : if (!gsi_end_p (gsif))
4786 : {
4787 18 : gsi_next_nondebug (&gsif);
4788 18 : if (gsi_stmt (gsif) == add_stmt)
4789 : break;
4790 : }
4791 : }
4792 8 : if (i == 128)
4793 0 : return false;
4794 8 : if (add_first)
4795 2 : *gsi = gsi_for_stmt (add_stmt);
4796 : }
4797 : }
4798 : }
4799 :
4800 6616 : if (code == BIT_NOT_EXPR)
4801 170 : *gsi = gsi_for_stmt (cond_stmt);
4802 :
4803 6616 : auto_vec<gimple *, 8> mul_stmts;
4804 6616 : if (code == MULT_EXPR && cast_stmt)
4805 : {
4806 75 : type = TREE_TYPE (cast_lhs);
4807 75 : gimple *g = SSA_NAME_DEF_STMT (rhs1);
4808 75 : if (gimple_assign_cast_p (g)
4809 38 : && useless_type_conversion_p (type,
4810 38 : TREE_TYPE (gimple_assign_rhs1 (g)))
4811 113 : && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4812 : rhs1 = gimple_assign_rhs1 (g);
4813 : else
4814 : {
4815 37 : g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4816 37 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4817 37 : rhs1 = gimple_assign_lhs (g);
4818 37 : mul_stmts.quick_push (g);
4819 : }
4820 75 : if (TREE_CODE (rhs2) == INTEGER_CST)
4821 32 : rhs2 = fold_convert (type, rhs2);
4822 : else
4823 : {
4824 43 : g = SSA_NAME_DEF_STMT (rhs2);
4825 43 : if (gimple_assign_cast_p (g)
4826 22 : && useless_type_conversion_p (type,
4827 22 : TREE_TYPE (gimple_assign_rhs1 (g)))
4828 65 : && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4829 : rhs2 = gimple_assign_rhs1 (g);
4830 : else
4831 : {
4832 21 : g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4833 21 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4834 21 : rhs2 = gimple_assign_lhs (g);
4835 21 : mul_stmts.quick_push (g);
4836 : }
4837 : }
4838 : }
4839 6616 : tree ctype = build_complex_type (type);
4840 13087 : gcall *g = gimple_build_call_internal (code == MULT_EXPR
4841 : ? IFN_MUL_OVERFLOW
4842 : : code != MINUS_EXPR
4843 6471 : ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4844 : 2, rhs1, rhs2);
4845 6616 : tree ctmp = make_ssa_name (ctype);
4846 6616 : gimple_call_set_lhs (g, ctmp);
4847 6616 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4848 6616 : tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4849 6616 : gassign *g2;
4850 6616 : if (code != BIT_NOT_EXPR)
4851 : {
4852 6446 : g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4853 : build1 (REALPART_EXPR, type, ctmp));
4854 6446 : if (maxval || cast_stmt)
4855 : {
4856 87 : gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4857 87 : if (add_first)
4858 2 : *gsi = gsi_for_stmt (stmt);
4859 : }
4860 : else
4861 6359 : gsi_replace (gsi, g2, true);
4862 6446 : if (code == MULT_EXPR)
4863 : {
4864 145 : mul_stmts.quick_push (g);
4865 145 : mul_stmts.quick_push (g2);
4866 145 : if (cast_stmt)
4867 : {
4868 75 : g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4869 75 : gsi_replace (gsi, g2, true);
4870 75 : mul_stmts.quick_push (g2);
4871 : }
4872 : }
4873 : }
4874 6616 : tree ovf = make_ssa_name (type);
4875 6616 : g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4876 : build1 (IMAGPART_EXPR, type, ctmp));
4877 6616 : if (code != BIT_NOT_EXPR)
4878 6446 : gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4879 : else
4880 170 : gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4881 6616 : if (code == MULT_EXPR)
4882 145 : mul_stmts.quick_push (g2);
4883 :
4884 35337 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4885 : {
4886 22180 : if (is_gimple_debug (use_stmt))
4887 5376 : continue;
4888 :
4889 16804 : gimple *orig_use_stmt = use_stmt;
4890 16804 : int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4891 : maxval, NULL);
4892 16804 : if (ovf_use == 0)
4893 : {
4894 10154 : gcc_assert (code != BIT_NOT_EXPR);
4895 10154 : if (maxval)
4896 : {
4897 4 : tree use_lhs = gimple_assign_lhs (use_stmt);
4898 4 : gimple_assign_set_rhs1 (use_stmt, new_lhs);
4899 4 : if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4900 4 : TREE_TYPE (new_lhs)))
4901 4 : gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4902 4 : update_stmt (use_stmt);
4903 : }
4904 10154 : continue;
4905 10154 : }
4906 6650 : if (gimple_code (use_stmt) == GIMPLE_COND)
4907 : {
4908 4425 : gcond *cond_stmt = as_a <gcond *> (use_stmt);
4909 4425 : gimple_cond_set_lhs (cond_stmt, ovf);
4910 4425 : gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4911 4576 : gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4912 : }
4913 : else
4914 : {
4915 2225 : gcc_checking_assert (is_gimple_assign (use_stmt));
4916 2225 : if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4917 : {
4918 2225 : if (gimple_assign_rhs_code (use_stmt) == RSHIFT_EXPR)
4919 : {
4920 6 : g2 = gimple_build_assign (make_ssa_name (boolean_type_node),
4921 : ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4922 : ovf, build_int_cst (type, 0));
4923 6 : gimple_stmt_iterator gsiu = gsi_for_stmt (use_stmt);
4924 6 : gsi_insert_before (&gsiu, g2, GSI_SAME_STMT);
4925 6 : gimple_assign_set_rhs_with_ops (&gsiu, NOP_EXPR,
4926 : gimple_assign_lhs (g2));
4927 6 : update_stmt (use_stmt);
4928 6 : use_operand_p use;
4929 6 : single_imm_use (gimple_assign_lhs (use_stmt), &use,
4930 : &use_stmt);
4931 6 : if (gimple_code (use_stmt) == GIMPLE_COND)
4932 : {
4933 0 : gcond *cond_stmt = as_a <gcond *> (use_stmt);
4934 0 : gimple_cond_set_lhs (cond_stmt, ovf);
4935 0 : gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4936 : }
4937 : else
4938 : {
4939 6 : gcc_checking_assert (is_gimple_assign (use_stmt));
4940 6 : if (gimple_assign_rhs_class (use_stmt)
4941 : == GIMPLE_BINARY_RHS)
4942 : {
4943 0 : gimple_assign_set_rhs1 (use_stmt, ovf);
4944 0 : gimple_assign_set_rhs2 (use_stmt,
4945 : build_int_cst (type, 0));
4946 : }
4947 6 : else if (gimple_assign_cast_p (use_stmt))
4948 6 : gimple_assign_set_rhs1 (use_stmt, ovf);
4949 : else
4950 : {
4951 0 : tree_code sc = gimple_assign_rhs_code (use_stmt);
4952 0 : gcc_checking_assert (sc == COND_EXPR);
4953 0 : tree cond = gimple_assign_rhs1 (use_stmt);
4954 0 : cond = build2 (TREE_CODE (cond),
4955 : boolean_type_node, ovf,
4956 : build_int_cst (type, 0));
4957 0 : gimple_assign_set_rhs1 (use_stmt, cond);
4958 : }
4959 : }
4960 6 : update_stmt (use_stmt);
4961 6 : gsi_remove (&gsiu, true);
4962 6 : gsiu = gsi_for_stmt (g2);
4963 6 : gsi_remove (&gsiu, true);
4964 6 : continue;
4965 6 : }
4966 : else
4967 : {
4968 2219 : gimple_assign_set_rhs1 (use_stmt, ovf);
4969 2219 : gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4970 2368 : gimple_assign_set_rhs_code (use_stmt,
4971 : ovf_use == 1
4972 : ? NE_EXPR : EQ_EXPR);
4973 : }
4974 : }
4975 : else
4976 : {
4977 0 : gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4978 : == COND_EXPR);
4979 0 : tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4980 : boolean_type_node, ovf,
4981 : build_int_cst (type, 0));
4982 0 : gimple_assign_set_rhs1 (use_stmt, cond);
4983 : }
4984 : }
4985 6644 : update_stmt (use_stmt);
4986 6644 : if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4987 : {
4988 145 : gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4989 145 : maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4990 : cfg_changed);
4991 145 : use_operand_p use;
4992 145 : gimple *cast_stmt;
4993 145 : if (single_imm_use (gimple_assign_lhs (orig_use_stmt), &use,
4994 : &cast_stmt)
4995 145 : && gimple_assign_cast_p (cast_stmt))
4996 : {
4997 2 : gimple_stmt_iterator gsi3 = gsi_for_stmt (cast_stmt);
4998 2 : gsi_remove (&gsi3, true);
4999 2 : release_ssa_name (gimple_assign_lhs (cast_stmt));
5000 : }
5001 145 : gsi_remove (&gsi2, true);
5002 145 : release_ssa_name (gimple_assign_lhs (orig_use_stmt));
5003 : }
5004 6616 : }
5005 6616 : if (maxval)
5006 : {
5007 12 : gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
5008 12 : gsi_remove (&gsi2, true);
5009 12 : if (add_stmt)
5010 : {
5011 8 : gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
5012 : new_lhs);
5013 8 : gsi2 = gsi_for_stmt (add_stmt);
5014 8 : gsi_replace (&gsi2, g, true);
5015 : }
5016 : }
5017 6604 : else if (code == BIT_NOT_EXPR)
5018 : {
5019 170 : *gsi = gsi_for_stmt (stmt);
5020 170 : gsi_remove (gsi, true);
5021 170 : release_ssa_name (lhs);
5022 170 : return true;
5023 : }
5024 : return false;
5025 6616 : }
5026 :
5027 : /* Helper of match_uaddc_usubc. Look through an integral cast
5028 : which should preserve [0, 1] range value (unless source has
5029 : 1-bit signed type) and the cast has single use. */
5030 :
5031 : static gimple *
5032 2063153 : uaddc_cast (gimple *g)
5033 : {
5034 2063153 : if (!gimple_assign_cast_p (g))
5035 : return g;
5036 494397 : tree op = gimple_assign_rhs1 (g);
5037 494397 : if (TREE_CODE (op) == SSA_NAME
5038 418238 : && INTEGRAL_TYPE_P (TREE_TYPE (op))
5039 292237 : && (TYPE_PRECISION (TREE_TYPE (op)) > 1
5040 5529 : || TYPE_UNSIGNED (TREE_TYPE (op)))
5041 786634 : && has_single_use (gimple_assign_lhs (g)))
5042 176582 : return SSA_NAME_DEF_STMT (op);
5043 : return g;
5044 : }
5045 :
5046 : /* Helper of match_uaddc_usubc. Look through a NE_EXPR
5047 : comparison with 0 which also preserves [0, 1] value range. */
5048 :
5049 : static gimple *
5050 2063312 : uaddc_ne0 (gimple *g)
5051 : {
5052 2063312 : if (is_gimple_assign (g)
5053 1261335 : && gimple_assign_rhs_code (g) == NE_EXPR
5054 53550 : && integer_zerop (gimple_assign_rhs2 (g))
5055 5285 : && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
5056 2068597 : && has_single_use (gimple_assign_lhs (g)))
5057 5029 : return SSA_NAME_DEF_STMT (gimple_assign_rhs1 (g));
5058 : return g;
5059 : }
5060 :
5061 : /* Return true if G is {REAL,IMAG}PART_EXPR PART with SSA_NAME
5062 : operand. */
5063 :
5064 : static bool
5065 2064143 : uaddc_is_cplxpart (gimple *g, tree_code part)
5066 : {
5067 2064143 : return (is_gimple_assign (g)
5068 1260831 : && gimple_assign_rhs_code (g) == part
5069 2066482 : && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (g), 0)) == SSA_NAME);
5070 : }
5071 :
5072 : /* Try to match e.g.
5073 : _29 = .ADD_OVERFLOW (_3, _4);
5074 : _30 = REALPART_EXPR <_29>;
5075 : _31 = IMAGPART_EXPR <_29>;
5076 : _32 = .ADD_OVERFLOW (_30, _38);
5077 : _33 = REALPART_EXPR <_32>;
5078 : _34 = IMAGPART_EXPR <_32>;
5079 : _35 = _31 + _34;
5080 : as
5081 : _36 = .UADDC (_3, _4, _38);
5082 : _33 = REALPART_EXPR <_36>;
5083 : _35 = IMAGPART_EXPR <_36>;
5084 : or
5085 : _22 = .SUB_OVERFLOW (_6, _5);
5086 : _23 = REALPART_EXPR <_22>;
5087 : _24 = IMAGPART_EXPR <_22>;
5088 : _25 = .SUB_OVERFLOW (_23, _37);
5089 : _26 = REALPART_EXPR <_25>;
5090 : _27 = IMAGPART_EXPR <_25>;
5091 : _28 = _24 | _27;
5092 : as
5093 : _29 = .USUBC (_6, _5, _37);
5094 : _26 = REALPART_EXPR <_29>;
5095 : _288 = IMAGPART_EXPR <_29>;
5096 : provided _38 or _37 above have [0, 1] range
5097 : and _3, _4 and _30 or _6, _5 and _23 are unsigned
5098 : integral types with the same precision. Whether + or | or ^ is
5099 : used on the IMAGPART_EXPR results doesn't matter, with one of
5100 : added or subtracted operands in [0, 1] range at most one
5101 : .ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow. */
5102 :
5103 : static bool
5104 2783698 : match_uaddc_usubc (gimple_stmt_iterator *gsi, gimple *stmt, tree_code code)
5105 : {
5106 2783698 : tree rhs[4];
5107 2783698 : rhs[0] = gimple_assign_rhs1 (stmt);
5108 2783698 : rhs[1] = gimple_assign_rhs2 (stmt);
5109 2783698 : rhs[2] = NULL_TREE;
5110 2783698 : rhs[3] = NULL_TREE;
5111 2783698 : tree type = TREE_TYPE (rhs[0]);
5112 2783698 : if (!INTEGRAL_TYPE_P (type) || !TYPE_UNSIGNED (type))
5113 : return false;
5114 :
5115 1642501 : auto_vec<gimple *, 2> temp_stmts;
5116 1642501 : if (code != BIT_IOR_EXPR && code != BIT_XOR_EXPR)
5117 : {
5118 : /* If overflow flag is ignored on the MSB limb, we can end up with
5119 : the most significant limb handled as r = op1 + op2 + ovf1 + ovf2;
5120 : or r = op1 - op2 - ovf1 - ovf2; or various equivalent expressions
5121 : thereof. Handle those like the ovf = ovf1 + ovf2; case to recognize
5122 : the limb below the MSB, but also create another .UADDC/.USUBC call
5123 : for the last limb.
5124 :
5125 : First look through assignments with the same rhs code as CODE,
5126 : with the exception that subtraction of a constant is canonicalized
5127 : into addition of its negation. rhs[0] will be minuend for
5128 : subtractions and one of addends for addition, all other assigned
5129 : rhs[i] operands will be subtrahends or other addends. */
5130 1523390 : while (TREE_CODE (rhs[0]) == SSA_NAME && !rhs[3])
5131 : {
5132 1497369 : gimple *g = SSA_NAME_DEF_STMT (rhs[0]);
5133 1497369 : if (has_single_use (rhs[0])
5134 498101 : && is_gimple_assign (g)
5135 1937507 : && (gimple_assign_rhs_code (g) == code
5136 407815 : || (code == MINUS_EXPR
5137 52270 : && gimple_assign_rhs_code (g) == PLUS_EXPR
5138 17205 : && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST)))
5139 : {
5140 45528 : tree r2 = gimple_assign_rhs2 (g);
5141 45528 : if (gimple_assign_rhs_code (g) != code)
5142 : {
5143 13205 : r2 = const_unop (NEGATE_EXPR, TREE_TYPE (r2), r2);
5144 13205 : if (!r2)
5145 : break;
5146 : }
5147 45528 : rhs[0] = gimple_assign_rhs1 (g);
5148 45528 : tree &r = rhs[2] ? rhs[3] : rhs[2];
5149 45528 : r = r2;
5150 45528 : temp_stmts.quick_push (g);
5151 : }
5152 : else
5153 : break;
5154 : }
5155 4433586 : for (int i = 1; i <= 2; ++i)
5156 2998037 : while (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME && !rhs[3])
5157 : {
5158 527589 : gimple *g = SSA_NAME_DEF_STMT (rhs[i]);
5159 527589 : if (has_single_use (rhs[i])
5160 263625 : && is_gimple_assign (g)
5161 773345 : && gimple_assign_rhs_code (g) == PLUS_EXPR)
5162 : {
5163 42313 : rhs[i] = gimple_assign_rhs1 (g);
5164 42313 : if (rhs[2])
5165 8159 : rhs[3] = gimple_assign_rhs2 (g);
5166 : else
5167 34154 : rhs[2] = gimple_assign_rhs2 (g);
5168 42313 : temp_stmts.quick_push (g);
5169 : }
5170 : else
5171 : break;
5172 : }
5173 : /* If there are just 3 addends or one minuend and two subtrahends,
5174 : check for UADDC or USUBC being pattern recognized earlier.
5175 : Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part
5176 : got pattern matched earlier as __imag__ .UADDC (arg1, arg2, arg3)
5177 : etc. */
5178 1477862 : if (rhs[2] && !rhs[3])
5179 : {
5180 303344 : for (int i = (code == MINUS_EXPR ? 1 : 0); i < 3; ++i)
5181 176177 : if (TREE_CODE (rhs[i]) == SSA_NAME)
5182 : {
5183 137571 : gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
5184 137571 : im = uaddc_ne0 (im);
5185 137571 : if (uaddc_is_cplxpart (im, IMAGPART_EXPR))
5186 : {
5187 : /* We found one of the 3 addends or 2 subtrahends to be
5188 : __imag__ of something, verify it is .UADDC/.USUBC. */
5189 215 : tree rhs1 = gimple_assign_rhs1 (im);
5190 215 : gimple *ovf = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs1, 0));
5191 215 : tree ovf_lhs = NULL_TREE;
5192 215 : tree ovf_arg1 = NULL_TREE, ovf_arg2 = NULL_TREE;
5193 235 : if (gimple_call_internal_p (ovf, code == PLUS_EXPR
5194 : ? IFN_ADD_OVERFLOW
5195 : : IFN_SUB_OVERFLOW))
5196 : {
5197 : /* Or verify it is .ADD_OVERFLOW/.SUB_OVERFLOW.
5198 : This is for the case of 2 chained .UADDC/.USUBC,
5199 : where the first one uses 0 carry-in and the second
5200 : one ignores the carry-out.
5201 : So, something like:
5202 : _16 = .ADD_OVERFLOW (_1, _2);
5203 : _17 = REALPART_EXPR <_16>;
5204 : _18 = IMAGPART_EXPR <_16>;
5205 : _15 = _3 + _4;
5206 : _12 = _15 + _18;
5207 : where the first 3 statements come from the lower
5208 : limb addition and the last 2 from the higher limb
5209 : which ignores carry-out. */
5210 197 : ovf_lhs = gimple_call_lhs (ovf);
5211 197 : tree ovf_lhs_type = TREE_TYPE (TREE_TYPE (ovf_lhs));
5212 197 : ovf_arg1 = gimple_call_arg (ovf, 0);
5213 197 : ovf_arg2 = gimple_call_arg (ovf, 1);
5214 : /* In that case we need to punt if the types don't
5215 : mismatch. */
5216 197 : if (!types_compatible_p (type, ovf_lhs_type)
5217 197 : || !types_compatible_p (type, TREE_TYPE (ovf_arg1))
5218 391 : || !types_compatible_p (type,
5219 194 : TREE_TYPE (ovf_arg2)))
5220 : ovf_lhs = NULL_TREE;
5221 : else
5222 : {
5223 479 : for (int i = (code == PLUS_EXPR ? 1 : 0);
5224 479 : i >= 0; --i)
5225 : {
5226 339 : tree r = gimple_call_arg (ovf, i);
5227 339 : if (TREE_CODE (r) != SSA_NAME)
5228 0 : continue;
5229 339 : if (uaddc_is_cplxpart (SSA_NAME_DEF_STMT (r),
5230 : REALPART_EXPR))
5231 : {
5232 : /* Punt if one of the args which isn't
5233 : subtracted isn't __real__; that could
5234 : then prevent better match later.
5235 : Consider:
5236 : _3 = .ADD_OVERFLOW (_1, _2);
5237 : _4 = REALPART_EXPR <_3>;
5238 : _5 = IMAGPART_EXPR <_3>;
5239 : _7 = .ADD_OVERFLOW (_4, _6);
5240 : _8 = REALPART_EXPR <_7>;
5241 : _9 = IMAGPART_EXPR <_7>;
5242 : _12 = _10 + _11;
5243 : _13 = _12 + _9;
5244 : _14 = _13 + _5;
5245 : We want to match this when called on
5246 : the last stmt as a pair of .UADDC calls,
5247 : but without this check we could turn
5248 : that prematurely on _13 = _12 + _9;
5249 : stmt into .UADDC with 0 carry-in just
5250 : on the second .ADD_OVERFLOW call and
5251 : another replacing the _12 and _13
5252 : additions. */
5253 : ovf_lhs = NULL_TREE;
5254 : break;
5255 : }
5256 : }
5257 : }
5258 190 : if (ovf_lhs)
5259 : {
5260 140 : use_operand_p use_p;
5261 140 : imm_use_iterator iter;
5262 140 : tree re_lhs = NULL_TREE;
5263 560 : FOR_EACH_IMM_USE_FAST (use_p, iter, ovf_lhs)
5264 : {
5265 280 : gimple *use_stmt = USE_STMT (use_p);
5266 280 : if (is_gimple_debug (use_stmt))
5267 0 : continue;
5268 280 : if (use_stmt == im)
5269 140 : continue;
5270 140 : if (!uaddc_is_cplxpart (use_stmt,
5271 : REALPART_EXPR))
5272 : {
5273 : ovf_lhs = NULL_TREE;
5274 : break;
5275 : }
5276 140 : re_lhs = gimple_assign_lhs (use_stmt);
5277 140 : }
5278 140 : if (ovf_lhs && re_lhs)
5279 : {
5280 502 : FOR_EACH_IMM_USE_FAST (use_p, iter, re_lhs)
5281 : {
5282 281 : gimple *use_stmt = USE_STMT (use_p);
5283 281 : if (is_gimple_debug (use_stmt))
5284 102 : continue;
5285 179 : internal_fn ifn
5286 179 : = gimple_call_internal_fn (ovf);
5287 : /* Punt if the __real__ of lhs is used
5288 : in the same .*_OVERFLOW call.
5289 : Consider:
5290 : _3 = .ADD_OVERFLOW (_1, _2);
5291 : _4 = REALPART_EXPR <_3>;
5292 : _5 = IMAGPART_EXPR <_3>;
5293 : _7 = .ADD_OVERFLOW (_4, _6);
5294 : _8 = REALPART_EXPR <_7>;
5295 : _9 = IMAGPART_EXPR <_7>;
5296 : _12 = _10 + _11;
5297 : _13 = _12 + _5;
5298 : _14 = _13 + _9;
5299 : We want to match this when called on
5300 : the last stmt as a pair of .UADDC calls,
5301 : but without this check we could turn
5302 : that prematurely on _13 = _12 + _5;
5303 : stmt into .UADDC with 0 carry-in just
5304 : on the first .ADD_OVERFLOW call and
5305 : another replacing the _12 and _13
5306 : additions. */
5307 179 : if (gimple_call_internal_p (use_stmt, ifn))
5308 : {
5309 : ovf_lhs = NULL_TREE;
5310 : break;
5311 : }
5312 140 : }
5313 : }
5314 : }
5315 : }
5316 140 : if ((ovf_lhs
5317 143 : || gimple_call_internal_p (ovf,
5318 : code == PLUS_EXPR
5319 : ? IFN_UADDC : IFN_USUBC))
5320 241 : && (optab_handler (code == PLUS_EXPR
5321 : ? uaddc5_optab : usubc5_optab,
5322 87 : TYPE_MODE (type))
5323 : != CODE_FOR_nothing))
5324 : {
5325 : /* And in that case build another .UADDC/.USUBC
5326 : call for the most significand limb addition.
5327 : Overflow bit is ignored here. */
5328 63 : if (i != 2)
5329 63 : std::swap (rhs[i], rhs[2]);
5330 63 : gimple *g
5331 77 : = gimple_build_call_internal (code == PLUS_EXPR
5332 : ? IFN_UADDC
5333 : : IFN_USUBC,
5334 : 3, rhs[0], rhs[1],
5335 : rhs[2]);
5336 63 : tree nlhs = make_ssa_name (build_complex_type (type));
5337 63 : gimple_call_set_lhs (g, nlhs);
5338 63 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5339 63 : tree ilhs = gimple_assign_lhs (stmt);
5340 63 : g = gimple_build_assign (ilhs, REALPART_EXPR,
5341 : build1 (REALPART_EXPR,
5342 63 : TREE_TYPE (ilhs),
5343 : nlhs));
5344 63 : gsi_replace (gsi, g, true);
5345 : /* And if it is initialized from result of __imag__
5346 : of .{ADD,SUB}_OVERFLOW call, replace that
5347 : call with .U{ADD,SUB}C call with the same arguments,
5348 : just 0 added as third argument. This isn't strictly
5349 : necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5350 : produce the same result, but may result in better
5351 : generated code on some targets where the backend can
5352 : better prepare in how the result will be used. */
5353 63 : if (ovf_lhs)
5354 : {
5355 57 : tree zero = build_zero_cst (type);
5356 57 : g = gimple_build_call_internal (code == PLUS_EXPR
5357 : ? IFN_UADDC
5358 : : IFN_USUBC,
5359 : 3, ovf_arg1,
5360 : ovf_arg2, zero);
5361 57 : gimple_call_set_lhs (g, ovf_lhs);
5362 57 : gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf);
5363 57 : gsi_replace (&gsi2, g, true);
5364 : }
5365 63 : return true;
5366 : }
5367 : }
5368 : }
5369 : return false;
5370 : }
5371 1414247 : if (code == MINUS_EXPR && !rhs[2])
5372 : return false;
5373 263 : if (code == MINUS_EXPR)
5374 : /* Code below expects rhs[0] and rhs[1] to have the IMAGPART_EXPRs.
5375 : So, for MINUS_EXPR swap the single added rhs operand (others are
5376 : subtracted) to rhs[3]. */
5377 263 : std::swap (rhs[0], rhs[3]);
5378 : }
5379 : /* Walk from both operands of STMT (for +/- even sometimes from
5380 : all the 4 addends or 3 subtrahends), see through casts and != 0
5381 : statements which would preserve [0, 1] range of values and
5382 : check which is initialized from __imag__. */
5383 7347119 : gimple *im1 = NULL, *im2 = NULL;
5384 14693188 : for (int i = 0; i < (code == MINUS_EXPR ? 3 : 4); i++)
5385 5877806 : if (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME)
5386 : {
5387 1925490 : gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
5388 1925490 : im = uaddc_ne0 (im);
5389 1925490 : if (uaddc_is_cplxpart (im, IMAGPART_EXPR))
5390 : {
5391 1604 : if (im1 == NULL)
5392 : {
5393 1211 : im1 = im;
5394 1211 : if (i != 0)
5395 331 : std::swap (rhs[0], rhs[i]);
5396 : }
5397 : else
5398 : {
5399 393 : im2 = im;
5400 393 : if (i != 1)
5401 23 : std::swap (rhs[1], rhs[i]);
5402 : break;
5403 : }
5404 : }
5405 : }
5406 : /* If we don't find at least two, punt. */
5407 1469706 : if (!im2)
5408 : return false;
5409 : /* Check they are __imag__ of .ADD_OVERFLOW or .SUB_OVERFLOW call results,
5410 : either both .ADD_OVERFLOW or both .SUB_OVERFLOW and that we have
5411 : uaddc5/usubc5 named pattern for the corresponding mode. */
5412 393 : gimple *ovf1
5413 393 : = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im1), 0));
5414 393 : gimple *ovf2
5415 393 : = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im2), 0));
5416 393 : internal_fn ifn;
5417 393 : if (!is_gimple_call (ovf1)
5418 393 : || !gimple_call_internal_p (ovf1)
5419 393 : || ((ifn = gimple_call_internal_fn (ovf1)) != IFN_ADD_OVERFLOW
5420 60 : && ifn != IFN_SUB_OVERFLOW)
5421 370 : || !gimple_call_internal_p (ovf2, ifn)
5422 399 : || optab_handler (ifn == IFN_ADD_OVERFLOW ? uaddc5_optab : usubc5_optab,
5423 366 : TYPE_MODE (type)) == CODE_FOR_nothing
5424 94 : || (rhs[2]
5425 17 : && optab_handler (code == PLUS_EXPR ? uaddc5_optab : usubc5_optab,
5426 15 : TYPE_MODE (type)) == CODE_FOR_nothing)
5427 94 : || !types_compatible_p (type,
5428 94 : TREE_TYPE (TREE_TYPE (gimple_call_lhs (ovf1))))
5429 486 : || !types_compatible_p (type,
5430 93 : TREE_TYPE (TREE_TYPE (gimple_call_lhs (ovf2)))))
5431 300 : return false;
5432 93 : tree arg1, arg2, arg3 = NULL_TREE;
5433 93 : gimple *re1 = NULL, *re2 = NULL;
5434 : /* On one of the two calls, one of the .ADD_OVERFLOW/.SUB_OVERFLOW arguments
5435 : should be initialized from __real__ of the other of the two calls.
5436 : Though, for .SUB_OVERFLOW, it has to be the first argument, not the
5437 : second one. */
5438 340 : for (int i = (ifn == IFN_ADD_OVERFLOW ? 1 : 0); i >= 0; --i)
5439 349 : for (gimple *ovf = ovf1; ovf; ovf = (ovf == ovf1 ? ovf2 : NULL))
5440 : {
5441 288 : tree arg = gimple_call_arg (ovf, i);
5442 288 : if (TREE_CODE (arg) != SSA_NAME)
5443 2 : continue;
5444 286 : re1 = SSA_NAME_DEF_STMT (arg);
5445 286 : if (uaddc_is_cplxpart (re1, REALPART_EXPR)
5446 379 : && (SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (re1), 0))
5447 93 : == (ovf == ovf1 ? ovf2 : ovf1)))
5448 : {
5449 93 : if (ovf == ovf1)
5450 : {
5451 : /* Make sure ovf2 is the .*_OVERFLOW call with argument
5452 : initialized from __real__ of ovf1. */
5453 20 : std::swap (rhs[0], rhs[1]);
5454 20 : std::swap (im1, im2);
5455 20 : std::swap (ovf1, ovf2);
5456 : }
5457 93 : arg3 = gimple_call_arg (ovf, 1 - i);
5458 93 : i = -1;
5459 93 : break;
5460 : }
5461 : }
5462 93 : if (!arg3)
5463 : return false;
5464 93 : arg1 = gimple_call_arg (ovf1, 0);
5465 93 : arg2 = gimple_call_arg (ovf1, 1);
5466 93 : if (!types_compatible_p (type, TREE_TYPE (arg1)))
5467 : return false;
5468 93 : int kind[2] = { 0, 0 };
5469 93 : tree arg_im[2] = { NULL_TREE, NULL_TREE };
5470 : /* At least one of arg2 and arg3 should have type compatible
5471 : with arg1/rhs[0], and the other one should have value in [0, 1]
5472 : range. If both are in [0, 1] range and type compatible with
5473 : arg1/rhs[0], try harder to find after looking through casts,
5474 : != 0 comparisons which one is initialized to __imag__ of
5475 : .{ADD,SUB}_OVERFLOW or .U{ADD,SUB}C call results. */
5476 279 : for (int i = 0; i < 2; ++i)
5477 : {
5478 186 : tree arg = i == 0 ? arg2 : arg3;
5479 186 : if (types_compatible_p (type, TREE_TYPE (arg)))
5480 161 : kind[i] = 1;
5481 372 : if (!INTEGRAL_TYPE_P (TREE_TYPE (arg))
5482 372 : || (TYPE_PRECISION (TREE_TYPE (arg)) == 1
5483 25 : && !TYPE_UNSIGNED (TREE_TYPE (arg))))
5484 0 : continue;
5485 186 : if (tree_zero_one_valued_p (arg))
5486 51 : kind[i] |= 2;
5487 186 : if (TREE_CODE (arg) == SSA_NAME)
5488 : {
5489 184 : gimple *g = SSA_NAME_DEF_STMT (arg);
5490 184 : if (gimple_assign_cast_p (g))
5491 : {
5492 30 : tree op = gimple_assign_rhs1 (g);
5493 30 : if (TREE_CODE (op) == SSA_NAME
5494 30 : && INTEGRAL_TYPE_P (TREE_TYPE (op)))
5495 30 : g = SSA_NAME_DEF_STMT (op);
5496 : }
5497 184 : g = uaddc_ne0 (g);
5498 184 : if (!uaddc_is_cplxpart (g, IMAGPART_EXPR))
5499 124 : continue;
5500 60 : arg_im[i] = gimple_assign_lhs (g);
5501 60 : g = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g), 0));
5502 60 : if (!is_gimple_call (g) || !gimple_call_internal_p (g))
5503 0 : continue;
5504 60 : switch (gimple_call_internal_fn (g))
5505 : {
5506 60 : case IFN_ADD_OVERFLOW:
5507 60 : case IFN_SUB_OVERFLOW:
5508 60 : case IFN_UADDC:
5509 60 : case IFN_USUBC:
5510 60 : break;
5511 0 : default:
5512 0 : continue;
5513 : }
5514 60 : kind[i] |= 4;
5515 : }
5516 : }
5517 : /* Make arg2 the one with compatible type and arg3 the one
5518 : with [0, 1] range. If both is true for both operands,
5519 : prefer as arg3 result of __imag__ of some ifn. */
5520 93 : if ((kind[0] & 1) == 0 || ((kind[1] & 1) != 0 && kind[0] > kind[1]))
5521 : {
5522 1 : std::swap (arg2, arg3);
5523 1 : std::swap (kind[0], kind[1]);
5524 1 : std::swap (arg_im[0], arg_im[1]);
5525 : }
5526 93 : if ((kind[0] & 1) == 0 || (kind[1] & 6) == 0)
5527 : return false;
5528 69 : if (!has_single_use (gimple_assign_lhs (im1))
5529 67 : || !has_single_use (gimple_assign_lhs (im2))
5530 67 : || !has_single_use (gimple_assign_lhs (re1))
5531 136 : || num_imm_uses (gimple_call_lhs (ovf1)) != 2)
5532 : return false;
5533 : /* Check that ovf2's result is used in __real__ and set re2
5534 : to that statement. */
5535 67 : use_operand_p use_p;
5536 67 : imm_use_iterator iter;
5537 67 : tree lhs = gimple_call_lhs (ovf2);
5538 267 : FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
5539 : {
5540 133 : gimple *use_stmt = USE_STMT (use_p);
5541 133 : if (is_gimple_debug (use_stmt))
5542 0 : continue;
5543 133 : if (use_stmt == im2)
5544 67 : continue;
5545 66 : if (re2)
5546 : return false;
5547 66 : if (!uaddc_is_cplxpart (use_stmt, REALPART_EXPR))
5548 : return false;
5549 : re2 = use_stmt;
5550 0 : }
5551 : /* Build .UADDC/.USUBC call which will be placed before the stmt. */
5552 67 : gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf2);
5553 67 : gimple *g;
5554 67 : if ((kind[1] & 4) != 0 && types_compatible_p (type, TREE_TYPE (arg_im[1])))
5555 : arg3 = arg_im[1];
5556 67 : if ((kind[1] & 1) == 0)
5557 : {
5558 25 : if (TREE_CODE (arg3) == INTEGER_CST)
5559 0 : arg3 = fold_convert (type, arg3);
5560 : else
5561 : {
5562 25 : g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, arg3);
5563 25 : gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5564 25 : arg3 = gimple_assign_lhs (g);
5565 : }
5566 : }
5567 89 : g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
5568 : ? IFN_UADDC : IFN_USUBC,
5569 : 3, arg1, arg2, arg3);
5570 67 : tree nlhs = make_ssa_name (TREE_TYPE (lhs));
5571 67 : gimple_call_set_lhs (g, nlhs);
5572 67 : gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5573 : /* In the case where stmt is | or ^ of two overflow flags
5574 : or addition of those, replace stmt with __imag__ of the above
5575 : added call. In case of arg1 + arg2 + (ovf1 + ovf2) or
5576 : arg1 - arg2 - (ovf1 + ovf2) just emit it before stmt. */
5577 67 : tree ilhs = rhs[2] ? make_ssa_name (type) : gimple_assign_lhs (stmt);
5578 67 : g = gimple_build_assign (ilhs, IMAGPART_EXPR,
5579 67 : build1 (IMAGPART_EXPR, TREE_TYPE (ilhs), nlhs));
5580 67 : if (rhs[2])
5581 : {
5582 15 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5583 : /* Remove some further statements which can't be kept in the IL because
5584 : they can use SSA_NAMEs whose setter is going to be removed too. */
5585 75 : for (gimple *g2 : temp_stmts)
5586 : {
5587 30 : gsi2 = gsi_for_stmt (g2);
5588 30 : gsi_remove (&gsi2, true);
5589 30 : release_defs (g2);
5590 : }
5591 : }
5592 : else
5593 52 : gsi_replace (gsi, g, true);
5594 : /* Remove some statements which can't be kept in the IL because they
5595 : use SSA_NAME whose setter is going to be removed too. */
5596 67 : tree rhs1 = rhs[1];
5597 103 : for (int i = 0; i < 2; i++)
5598 85 : if (rhs1 == gimple_assign_lhs (im2))
5599 : break;
5600 : else
5601 : {
5602 36 : g = SSA_NAME_DEF_STMT (rhs1);
5603 36 : rhs1 = gimple_assign_rhs1 (g);
5604 36 : gsi2 = gsi_for_stmt (g);
5605 36 : gsi_remove (&gsi2, true);
5606 36 : release_defs (g);
5607 : }
5608 67 : gcc_checking_assert (rhs1 == gimple_assign_lhs (im2));
5609 67 : gsi2 = gsi_for_stmt (im2);
5610 67 : gsi_remove (&gsi2, true);
5611 67 : release_defs (im2);
5612 : /* Replace the re2 statement with __real__ of the newly added
5613 : .UADDC/.USUBC call. */
5614 67 : if (re2)
5615 : {
5616 66 : gsi2 = gsi_for_stmt (re2);
5617 66 : tree rlhs = gimple_assign_lhs (re2);
5618 66 : g = gimple_build_assign (rlhs, REALPART_EXPR,
5619 66 : build1 (REALPART_EXPR, TREE_TYPE (rlhs), nlhs));
5620 66 : gsi_replace (&gsi2, g, true);
5621 : }
5622 67 : if (rhs[2])
5623 : {
5624 : /* If this is the arg1 + arg2 + (ovf1 + ovf2) or
5625 : arg1 - arg2 - (ovf1 + ovf2) case for the most significant limb,
5626 : replace stmt with __real__ of another .UADDC/.USUBC call which
5627 : handles the most significant limb. Overflow flag from this is
5628 : ignored. */
5629 17 : g = gimple_build_call_internal (code == PLUS_EXPR
5630 : ? IFN_UADDC : IFN_USUBC,
5631 : 3, rhs[3], rhs[2], ilhs);
5632 15 : nlhs = make_ssa_name (TREE_TYPE (lhs));
5633 15 : gimple_call_set_lhs (g, nlhs);
5634 15 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5635 15 : ilhs = gimple_assign_lhs (stmt);
5636 15 : g = gimple_build_assign (ilhs, REALPART_EXPR,
5637 15 : build1 (REALPART_EXPR, TREE_TYPE (ilhs), nlhs));
5638 15 : gsi_replace (gsi, g, true);
5639 : }
5640 67 : if (TREE_CODE (arg3) == SSA_NAME)
5641 : {
5642 : /* When pattern recognizing the second least significant limb
5643 : above (i.e. first pair of .{ADD,SUB}_OVERFLOW calls for one limb),
5644 : check if the [0, 1] range argument (i.e. carry in) isn't the
5645 : result of another .{ADD,SUB}_OVERFLOW call (one handling the
5646 : least significant limb). Again look through casts and != 0. */
5647 67 : gimple *im3 = SSA_NAME_DEF_STMT (arg3);
5648 92 : for (int i = 0; i < 2; ++i)
5649 : {
5650 92 : gimple *im4 = uaddc_cast (im3);
5651 92 : if (im4 == im3)
5652 : break;
5653 : else
5654 25 : im3 = im4;
5655 : }
5656 67 : im3 = uaddc_ne0 (im3);
5657 67 : if (uaddc_is_cplxpart (im3, IMAGPART_EXPR))
5658 : {
5659 60 : gimple *ovf3
5660 60 : = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im3), 0));
5661 60 : if (gimple_call_internal_p (ovf3, ifn))
5662 : {
5663 25 : lhs = gimple_call_lhs (ovf3);
5664 25 : arg1 = gimple_call_arg (ovf3, 0);
5665 25 : arg2 = gimple_call_arg (ovf3, 1);
5666 25 : if (types_compatible_p (type, TREE_TYPE (TREE_TYPE (lhs)))
5667 25 : && types_compatible_p (type, TREE_TYPE (arg1))
5668 50 : && types_compatible_p (type, TREE_TYPE (arg2)))
5669 : {
5670 : /* And if it is initialized from result of __imag__
5671 : of .{ADD,SUB}_OVERFLOW call, replace that
5672 : call with .U{ADD,SUB}C call with the same arguments,
5673 : just 0 added as third argument. This isn't strictly
5674 : necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5675 : produce the same result, but may result in better
5676 : generated code on some targets where the backend can
5677 : better prepare in how the result will be used. */
5678 25 : g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
5679 : ? IFN_UADDC : IFN_USUBC,
5680 : 3, arg1, arg2,
5681 : build_zero_cst (type));
5682 25 : gimple_call_set_lhs (g, lhs);
5683 25 : gsi2 = gsi_for_stmt (ovf3);
5684 25 : gsi_replace (&gsi2, g, true);
5685 : }
5686 : }
5687 : }
5688 : }
5689 : return true;
5690 1642501 : }
5691 :
5692 : /* Replace .POPCOUNT (x) == 1 or .POPCOUNT (x) != 1 with
5693 : (x & (x - 1)) > x - 1 or (x & (x - 1)) <= x - 1 if .POPCOUNT
5694 : isn't a direct optab. Also handle `<=`/`>` to be
5695 : `x & (x - 1) !=/== x`. */
5696 :
5697 : static void
5698 4439428 : match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt)
5699 : {
5700 4439428 : tree clhs, crhs;
5701 4439428 : enum tree_code code;
5702 4439428 : bool was_le = false;
5703 4439428 : if (gimple_code (stmt) == GIMPLE_COND)
5704 : {
5705 4150947 : clhs = gimple_cond_lhs (stmt);
5706 4150947 : crhs = gimple_cond_rhs (stmt);
5707 4150947 : code = gimple_cond_code (stmt);
5708 : }
5709 : else
5710 : {
5711 288481 : clhs = gimple_assign_rhs1 (stmt);
5712 288481 : crhs = gimple_assign_rhs2 (stmt);
5713 288481 : code = gimple_assign_rhs_code (stmt);
5714 : }
5715 4439428 : if (code != LE_EXPR && code != GT_EXPR
5716 4439428 : && code != EQ_EXPR && code != NE_EXPR)
5717 4439422 : return;
5718 2074727 : if (code == LE_EXPR || code == GT_EXPR)
5719 4177785 : was_le = true;
5720 4177785 : if (TREE_CODE (clhs) != SSA_NAME || !integer_onep (crhs))
5721 4019952 : return;
5722 157833 : gimple *call = SSA_NAME_DEF_STMT (clhs);
5723 157833 : combined_fn cfn = gimple_call_combined_fn (call);
5724 157833 : switch (cfn)
5725 : {
5726 15 : CASE_CFN_POPCOUNT:
5727 15 : break;
5728 : default:
5729 : return;
5730 : }
5731 15 : if (!has_single_use (clhs))
5732 : return;
5733 14 : tree arg = gimple_call_arg (call, 0);
5734 14 : tree type = TREE_TYPE (arg);
5735 14 : if (!INTEGRAL_TYPE_P (type))
5736 : return;
5737 14 : bool nonzero_arg = tree_expr_nonzero_p (arg);
5738 14 : if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, OPTIMIZE_FOR_BOTH))
5739 : {
5740 : /* Tell expand_POPCOUNT the popcount result is only used in equality
5741 : comparison with one, so that it can decide based on rtx costs. */
5742 16 : gimple *g = gimple_build_call_internal (IFN_POPCOUNT, 2, arg,
5743 : was_le ? integer_minus_one_node
5744 8 : : (nonzero_arg ? integer_zero_node
5745 : : integer_one_node));
5746 8 : gimple_call_set_lhs (g, gimple_call_lhs (call));
5747 8 : gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
5748 8 : gsi_replace (&gsi2, g, true);
5749 8 : return;
5750 : }
5751 6 : tree argm1 = make_ssa_name (type);
5752 6 : gimple *g = gimple_build_assign (argm1, PLUS_EXPR, arg,
5753 : build_int_cst (type, -1));
5754 6 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5755 6 : g = gimple_build_assign (make_ssa_name (type),
5756 6 : (nonzero_arg || was_le) ? BIT_AND_EXPR : BIT_XOR_EXPR,
5757 : arg, argm1);
5758 6 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5759 6 : tree_code cmpcode;
5760 6 : if (was_le)
5761 : {
5762 0 : argm1 = build_zero_cst (type);
5763 0 : cmpcode = code == LE_EXPR ? EQ_EXPR : NE_EXPR;
5764 : }
5765 6 : else if (nonzero_arg)
5766 : {
5767 2 : argm1 = build_zero_cst (type);
5768 2 : cmpcode = code;
5769 : }
5770 : else
5771 4 : cmpcode = code == EQ_EXPR ? GT_EXPR : LE_EXPR;
5772 6 : if (gcond *cond = dyn_cast <gcond *> (stmt))
5773 : {
5774 2 : gimple_cond_set_lhs (cond, gimple_assign_lhs (g));
5775 2 : gimple_cond_set_rhs (cond, argm1);
5776 2 : gimple_cond_set_code (cond, cmpcode);
5777 : }
5778 : else
5779 : {
5780 4 : gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (g));
5781 4 : gimple_assign_set_rhs2 (stmt, argm1);
5782 4 : gimple_assign_set_rhs_code (stmt, cmpcode);
5783 : }
5784 6 : update_stmt (stmt);
5785 6 : gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
5786 6 : gsi_remove (&gsi2, true);
5787 6 : release_defs (call);
5788 : }
5789 :
5790 : /* Return true if target has support for divmod. */
5791 :
5792 : static bool
5793 28840 : target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
5794 : {
5795 : /* If target supports hardware divmod insn, use it for divmod. */
5796 28840 : if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
5797 : return true;
5798 :
5799 : /* Check if libfunc for divmod is available. */
5800 2606 : rtx libfunc = optab_libfunc (divmod_optab, mode);
5801 2606 : if (libfunc != NULL_RTX)
5802 : {
5803 : /* If optab_handler exists for div_optab, perhaps in a wider mode,
5804 : we don't want to use the libfunc even if it exists for given mode. */
5805 : machine_mode div_mode;
5806 10854 : FOR_EACH_MODE_FROM (div_mode, mode)
5807 8248 : if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
5808 : return false;
5809 :
5810 2606 : return targetm.expand_divmod_libfunc != NULL;
5811 : }
5812 :
5813 : return false;
5814 : }
5815 :
5816 : /* Check if stmt is candidate for divmod transform. */
5817 :
5818 : static bool
5819 47838 : divmod_candidate_p (gassign *stmt)
5820 : {
5821 47838 : tree type = TREE_TYPE (gimple_assign_lhs (stmt));
5822 47838 : machine_mode mode = TYPE_MODE (type);
5823 47838 : optab divmod_optab, div_optab;
5824 :
5825 47838 : if (TYPE_UNSIGNED (type))
5826 : {
5827 : divmod_optab = udivmod_optab;
5828 : div_optab = udiv_optab;
5829 : }
5830 : else
5831 : {
5832 20150 : divmod_optab = sdivmod_optab;
5833 20150 : div_optab = sdiv_optab;
5834 : }
5835 :
5836 47838 : tree op1 = gimple_assign_rhs1 (stmt);
5837 47838 : tree op2 = gimple_assign_rhs2 (stmt);
5838 :
5839 : /* Disable the transform if either is a constant, since division-by-constant
5840 : may have specialized expansion. */
5841 47838 : if (CONSTANT_CLASS_P (op1))
5842 : return false;
5843 :
5844 44067 : if (CONSTANT_CLASS_P (op2))
5845 : {
5846 17493 : if (integer_pow2p (op2))
5847 : return false;
5848 :
5849 15349 : if (element_precision (type) <= HOST_BITS_PER_WIDE_INT
5850 16431 : && element_precision (type) <= BITS_PER_WORD)
5851 : return false;
5852 :
5853 : /* If the divisor is not power of 2 and the precision wider than
5854 : HWI, expand_divmod punts on that, so in that case it is better
5855 : to use divmod optab or libfunc. Similarly if choose_multiplier
5856 : might need pre/post shifts of BITS_PER_WORD or more. */
5857 : }
5858 :
5859 : /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
5860 : expand using the [su]divv optabs. */
5861 28840 : if (TYPE_OVERFLOW_TRAPS (type))
5862 : return false;
5863 :
5864 28840 : if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
5865 : return false;
5866 :
5867 : return true;
5868 : }
5869 :
5870 : /* This function looks for:
5871 : t1 = a TRUNC_DIV_EXPR b;
5872 : t2 = a TRUNC_MOD_EXPR b;
5873 : and transforms it to the following sequence:
5874 : complex_tmp = DIVMOD (a, b);
5875 : t1 = REALPART_EXPR(a);
5876 : t2 = IMAGPART_EXPR(b);
5877 : For conditions enabling the transform see divmod_candidate_p().
5878 :
5879 : The pass has three parts:
5880 : 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
5881 : other trunc_div_expr and trunc_mod_expr stmts.
5882 : 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
5883 : to stmts vector.
5884 : 3) Insert DIVMOD call just before top_stmt and update entries in
5885 : stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
5886 : IMAGPART_EXPR for mod). */
5887 :
5888 : static bool
5889 47857 : convert_to_divmod (gassign *stmt)
5890 : {
5891 47857 : if (stmt_can_throw_internal (cfun, stmt)
5892 47857 : || !divmod_candidate_p (stmt))
5893 19017 : return false;
5894 :
5895 28840 : tree op1 = gimple_assign_rhs1 (stmt);
5896 28840 : tree op2 = gimple_assign_rhs2 (stmt);
5897 :
5898 28840 : imm_use_iterator use_iter;
5899 28840 : gimple *use_stmt;
5900 28840 : auto_vec<gimple *> stmts;
5901 :
5902 28840 : gimple *top_stmt = stmt;
5903 28840 : basic_block top_bb = gimple_bb (stmt);
5904 :
5905 : /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
5906 : at-least stmt and possibly other trunc_div/trunc_mod stmts
5907 : having same operands as stmt. */
5908 :
5909 122863 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
5910 : {
5911 94023 : if (is_gimple_assign (use_stmt)
5912 58076 : && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
5913 46270 : || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
5914 48074 : && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
5915 141980 : && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
5916 : {
5917 40550 : if (stmt_can_throw_internal (cfun, use_stmt))
5918 0 : continue;
5919 :
5920 40550 : basic_block bb = gimple_bb (use_stmt);
5921 :
5922 40550 : if (bb == top_bb)
5923 : {
5924 39824 : if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
5925 5155 : top_stmt = use_stmt;
5926 : }
5927 726 : else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
5928 : {
5929 193 : top_bb = bb;
5930 193 : top_stmt = use_stmt;
5931 : }
5932 : }
5933 28840 : }
5934 :
5935 28840 : tree top_op1 = gimple_assign_rhs1 (top_stmt);
5936 28840 : tree top_op2 = gimple_assign_rhs2 (top_stmt);
5937 :
5938 28840 : stmts.safe_push (top_stmt);
5939 28840 : bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
5940 :
5941 : /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
5942 : to stmts vector. The 2nd loop will always add stmt to stmts vector, since
5943 : gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
5944 : 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
5945 :
5946 122863 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
5947 : {
5948 94023 : if (is_gimple_assign (use_stmt)
5949 58076 : && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
5950 46270 : || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
5951 48074 : && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
5952 141980 : && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
5953 : {
5954 69486 : if (use_stmt == top_stmt
5955 11710 : || stmt_can_throw_internal (cfun, use_stmt)
5956 52260 : || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
5957 28936 : continue;
5958 :
5959 11614 : stmts.safe_push (use_stmt);
5960 11614 : if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
5961 94023 : div_seen = true;
5962 : }
5963 28840 : }
5964 :
5965 28840 : if (!div_seen)
5966 : return false;
5967 :
5968 : /* Part 3: Create libcall to internal fn DIVMOD:
5969 : divmod_tmp = DIVMOD (op1, op2). */
5970 :
5971 11588 : gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
5972 11588 : tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
5973 : call_stmt, "divmod_tmp");
5974 11588 : gimple_call_set_lhs (call_stmt, res);
5975 : /* We rejected throwing statements above. */
5976 11588 : gimple_call_set_nothrow (call_stmt, true);
5977 :
5978 : /* Insert the call before top_stmt. */
5979 11588 : gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
5980 11588 : gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
5981 :
5982 11588 : widen_mul_stats.divmod_calls_inserted++;
5983 :
5984 : /* Update all statements in stmts vector:
5985 : lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
5986 : lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
5987 :
5988 63628 : for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
5989 : {
5990 23200 : tree new_rhs;
5991 :
5992 23200 : switch (gimple_assign_rhs_code (use_stmt))
5993 : {
5994 11598 : case TRUNC_DIV_EXPR:
5995 11598 : new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
5996 11598 : break;
5997 :
5998 11602 : case TRUNC_MOD_EXPR:
5999 11602 : new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
6000 11602 : break;
6001 :
6002 0 : default:
6003 0 : gcc_unreachable ();
6004 : }
6005 :
6006 23200 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
6007 23200 : gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
6008 23200 : update_stmt (use_stmt);
6009 : }
6010 :
6011 : return true;
6012 28840 : }
6013 :
6014 : /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
6015 : its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
6016 : value is true iff we converted the statement. */
6017 :
6018 : static bool
6019 170986 : convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
6020 : {
6021 170986 : tree lhs = gimple_assign_lhs (stmt);
6022 170986 : tree stype = TREE_TYPE (lhs);
6023 170986 : tree sarg0 = gimple_assign_rhs1 (stmt);
6024 170986 : tree sarg1 = gimple_assign_rhs2 (stmt);
6025 :
6026 170986 : if (TREE_CODE (stype) != INTEGER_TYPE
6027 164109 : || TREE_CODE (sarg1) != INTEGER_CST
6028 148033 : || TREE_CODE (sarg0) != SSA_NAME
6029 148032 : || !tree_fits_uhwi_p (sarg1)
6030 319018 : || !has_single_use (sarg0))
6031 : return false;
6032 :
6033 41258 : gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
6034 38274 : if (!def)
6035 : return false;
6036 :
6037 38274 : enum tree_code mcode = gimple_assign_rhs_code (def);
6038 38274 : if (mcode == NOP_EXPR)
6039 : {
6040 6071 : tree tmp = gimple_assign_rhs1 (def);
6041 6071 : if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
6042 : return false;
6043 171994 : def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
6044 1850 : if (!def)
6045 : return false;
6046 1850 : mcode = gimple_assign_rhs_code (def);
6047 : }
6048 :
6049 34053 : if (mcode != WIDEN_MULT_EXPR
6050 34053 : || gimple_bb (def) != gimple_bb (stmt))
6051 : return false;
6052 844 : tree mtype = TREE_TYPE (gimple_assign_lhs (def));
6053 844 : if (TREE_CODE (mtype) != INTEGER_TYPE
6054 844 : || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
6055 : return false;
6056 :
6057 844 : tree mop1 = gimple_assign_rhs1 (def);
6058 844 : tree mop2 = gimple_assign_rhs2 (def);
6059 844 : tree optype = TREE_TYPE (mop1);
6060 844 : bool unsignedp = TYPE_UNSIGNED (optype);
6061 844 : unsigned int prec = TYPE_PRECISION (optype);
6062 :
6063 844 : if (unsignedp != TYPE_UNSIGNED (mtype)
6064 844 : || TYPE_PRECISION (mtype) != 2 * prec)
6065 : return false;
6066 :
6067 844 : unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
6068 844 : if (bits < prec || bits >= 2 * prec)
6069 : return false;
6070 :
6071 : /* For the time being, require operands to have the same sign. */
6072 842 : if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
6073 : return false;
6074 :
6075 842 : machine_mode mode = TYPE_MODE (optype);
6076 842 : optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
6077 842 : if (optab_handler (tab, mode) == CODE_FOR_nothing)
6078 : return false;
6079 :
6080 842 : location_t loc = gimple_location (stmt);
6081 842 : tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
6082 : MULT_HIGHPART_EXPR, mop1, mop2);
6083 842 : tree highpart2 = highpart1;
6084 842 : tree ntype = optype;
6085 :
6086 842 : if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
6087 : {
6088 16 : ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
6089 7 : : signed_type_for (optype);
6090 16 : highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
6091 : }
6092 842 : if (bits > prec)
6093 29 : highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
6094 : RSHIFT_EXPR, highpart2,
6095 29 : build_int_cst (ntype, bits - prec));
6096 :
6097 842 : gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
6098 842 : gsi_replace (gsi, new_stmt, true);
6099 :
6100 842 : widen_mul_stats.highpart_mults_inserted++;
6101 842 : return true;
6102 : }
6103 :
6104 : /* If target has spaceship<MODE>3 expander, pattern recognize
6105 : <bb 2> [local count: 1073741824]:
6106 : if (a_2(D) == b_3(D))
6107 : goto <bb 6>; [34.00%]
6108 : else
6109 : goto <bb 3>; [66.00%]
6110 :
6111 : <bb 3> [local count: 708669601]:
6112 : if (a_2(D) < b_3(D))
6113 : goto <bb 6>; [1.04%]
6114 : else
6115 : goto <bb 4>; [98.96%]
6116 :
6117 : <bb 4> [local count: 701299439]:
6118 : if (a_2(D) > b_3(D))
6119 : goto <bb 5>; [48.89%]
6120 : else
6121 : goto <bb 6>; [51.11%]
6122 :
6123 : <bb 5> [local count: 342865295]:
6124 :
6125 : <bb 6> [local count: 1073741824]:
6126 : and turn it into:
6127 : <bb 2> [local count: 1073741824]:
6128 : _1 = .SPACESHIP (a_2(D), b_3(D), 0);
6129 : if (_1 == 0)
6130 : goto <bb 6>; [34.00%]
6131 : else
6132 : goto <bb 3>; [66.00%]
6133 :
6134 : <bb 3> [local count: 708669601]:
6135 : if (_1 == -1)
6136 : goto <bb 6>; [1.04%]
6137 : else
6138 : goto <bb 4>; [98.96%]
6139 :
6140 : <bb 4> [local count: 701299439]:
6141 : if (_1 == 1)
6142 : goto <bb 5>; [48.89%]
6143 : else
6144 : goto <bb 6>; [51.11%]
6145 :
6146 : <bb 5> [local count: 342865295]:
6147 :
6148 : <bb 6> [local count: 1073741824]:
6149 : so that the backend can emit optimal comparison and
6150 : conditional jump sequence. If the
6151 : <bb 6> [local count: 1073741824]:
6152 : above has a single PHI like:
6153 : # _27 = PHI<0(2), -1(3), -128(4), 1(5)>
6154 : then replace it with effectively
6155 : _1 = .SPACESHIP (a_2(D), b_3(D), -128);
6156 : _27 = _1; */
6157 :
6158 : static void
6159 4150947 : optimize_spaceship (gcond *stmt)
6160 : {
6161 4150947 : enum tree_code code = gimple_cond_code (stmt);
6162 4150947 : if (code != EQ_EXPR && code != NE_EXPR)
6163 4150755 : return;
6164 3356889 : tree arg1 = gimple_cond_lhs (stmt);
6165 3356889 : tree arg2 = gimple_cond_rhs (stmt);
6166 3356889 : if ((!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
6167 3246691 : && !INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
6168 2606772 : || optab_handler (spaceship_optab,
6169 2606772 : TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
6170 5922443 : || operand_equal_p (arg1, arg2, 0))
6171 792654 : return;
6172 :
6173 2564235 : basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL;
6174 2564235 : edge em1 = NULL, e1 = NULL, e2 = NULL;
6175 2564235 : bb1 = EDGE_SUCC (bb0, 1)->dest;
6176 2564235 : if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
6177 1545485 : bb1 = EDGE_SUCC (bb0, 0)->dest;
6178 :
6179 7640724 : gcond *g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb1));
6180 1122801 : if (g == NULL
6181 1122801 : || !single_pred_p (bb1)
6182 707318 : || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
6183 589227 : ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
6184 471136 : : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
6185 1664 : || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
6186 599158 : || !cond_only_block_p (bb1))
6187 2554994 : return;
6188 :
6189 9241 : enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
6190 9241 : ? LT_EXPR : GT_EXPR);
6191 9241 : switch (gimple_cond_code (g))
6192 : {
6193 : case LT_EXPR:
6194 : case LE_EXPR:
6195 : break;
6196 7755 : case GT_EXPR:
6197 7755 : case GE_EXPR:
6198 7755 : ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
6199 : break;
6200 : default:
6201 : return;
6202 : }
6203 :
6204 27633 : for (int i = 0; i < 2; ++i)
6205 : {
6206 : /* With NaNs, </<=/>/>= are false, so we need to look for the
6207 : third comparison on the false edge from whatever non-equality
6208 : comparison the second comparison is. */
6209 18476 : if (HONOR_NANS (TREE_TYPE (arg1))
6210 18476 : && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
6211 199 : continue;
6212 :
6213 18277 : bb2 = EDGE_SUCC (bb1, i)->dest;
6214 54506 : g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb2));
6215 12479 : if (g == NULL
6216 12479 : || !single_pred_p (bb2)
6217 17407 : || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
6218 9822 : ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
6219 2237 : : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
6220 11 : || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
6221 85 : || !cond_only_block_p (bb2)
6222 9907 : || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
6223 18192 : continue;
6224 :
6225 85 : enum tree_code ccode2
6226 85 : = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
6227 85 : switch (gimple_cond_code (g))
6228 : {
6229 : case LT_EXPR:
6230 : case LE_EXPR:
6231 : break;
6232 55 : case GT_EXPR:
6233 55 : case GE_EXPR:
6234 55 : ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
6235 : break;
6236 1 : default:
6237 1 : continue;
6238 : }
6239 84 : if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
6240 0 : continue;
6241 :
6242 168 : if ((ccode == LT_EXPR)
6243 84 : ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
6244 : {
6245 55 : em1 = EDGE_SUCC (bb1, 1 - i);
6246 55 : e1 = EDGE_SUCC (bb2, 0);
6247 55 : e2 = EDGE_SUCC (bb2, 1);
6248 55 : if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
6249 0 : std::swap (e1, e2);
6250 : }
6251 : else
6252 : {
6253 29 : e1 = EDGE_SUCC (bb1, 1 - i);
6254 29 : em1 = EDGE_SUCC (bb2, 0);
6255 29 : e2 = EDGE_SUCC (bb2, 1);
6256 29 : if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
6257 : std::swap (em1, e2);
6258 : }
6259 : break;
6260 : }
6261 :
6262 9212 : if (em1 == NULL)
6263 : {
6264 18314 : if ((ccode == LT_EXPR)
6265 9157 : ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
6266 : {
6267 3124 : em1 = EDGE_SUCC (bb1, 1);
6268 3124 : e1 = EDGE_SUCC (bb1, 0);
6269 3124 : e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
6270 : }
6271 : else
6272 : {
6273 6033 : em1 = EDGE_SUCC (bb1, 0);
6274 6033 : e1 = EDGE_SUCC (bb1, 1);
6275 6033 : e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
6276 : }
6277 : }
6278 :
6279 : /* Check if there is a single bb into which all failed conditions
6280 : jump to (perhaps through an empty block) and if it results in
6281 : a single integral PHI which just sets it to -1, 0, 1, X
6282 : (or -1, 0, 1 when NaNs can't happen). In that case use 1 rather
6283 : than 0 as last .SPACESHIP argument to tell backends it might
6284 : consider different code generation and just cast the result
6285 : of .SPACESHIP to the PHI result. X above is some value
6286 : other than -1, 0, 1, for libstdc++ -128, for libc++ -127. */
6287 9241 : tree arg3 = integer_zero_node;
6288 9241 : edge e = EDGE_SUCC (bb0, 0);
6289 9241 : if (e->dest == bb1)
6290 6573 : e = EDGE_SUCC (bb0, 1);
6291 9241 : basic_block bbp = e->dest;
6292 9241 : gphi *phi = NULL;
6293 9241 : for (gphi_iterator psi = gsi_start_phis (bbp);
6294 11220 : !gsi_end_p (psi); gsi_next (&psi))
6295 : {
6296 3602 : gphi *gp = psi.phi ();
6297 3602 : tree res = gimple_phi_result (gp);
6298 :
6299 3602 : if (phi != NULL
6300 3245 : || virtual_operand_p (res)
6301 2257 : || !INTEGRAL_TYPE_P (TREE_TYPE (res))
6302 5728 : || TYPE_PRECISION (TREE_TYPE (res)) < 2)
6303 : {
6304 : phi = NULL;
6305 : break;
6306 : }
6307 1979 : phi = gp;
6308 : }
6309 9241 : if (phi
6310 1622 : && integer_zerop (gimple_phi_arg_def_from_edge (phi, e))
6311 9769 : && EDGE_COUNT (bbp->preds) == (HONOR_NANS (TREE_TYPE (arg1)) ? 4 : 3))
6312 : {
6313 107 : HOST_WIDE_INT argval
6314 107 : = SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) ? -128 : -1;
6315 630 : for (unsigned i = 0; phi && i < EDGE_COUNT (bbp->preds) - 1; ++i)
6316 : {
6317 228 : edge e3 = i == 0 ? e1 : i == 1 ? em1 : e2;
6318 228 : if (e3->dest != bbp)
6319 : {
6320 106 : if (!empty_block_p (e3->dest)
6321 96 : || !single_succ_p (e3->dest)
6322 202 : || single_succ (e3->dest) != bbp)
6323 : {
6324 : phi = NULL;
6325 : break;
6326 : }
6327 : e3 = single_succ_edge (e3->dest);
6328 : }
6329 218 : tree a = gimple_phi_arg_def_from_edge (phi, e3);
6330 218 : if (TREE_CODE (a) != INTEGER_CST
6331 218 : || (i == 0 && !integer_onep (a))
6332 430 : || (i == 1 && !integer_all_onesp (a)))
6333 : {
6334 : phi = NULL;
6335 : break;
6336 : }
6337 212 : if (i == 2)
6338 : {
6339 30 : tree minv = TYPE_MIN_VALUE (signed_char_type_node);
6340 30 : tree maxv = TYPE_MAX_VALUE (signed_char_type_node);
6341 30 : widest_int w = widest_int::from (wi::to_wide (a), SIGNED);
6342 41 : if ((w >= -1 && w <= 1)
6343 26 : || w < wi::to_widest (minv)
6344 56 : || w > wi::to_widest (maxv))
6345 : {
6346 4 : phi = NULL;
6347 4 : break;
6348 : }
6349 26 : argval = w.to_shwi ();
6350 26 : }
6351 : }
6352 107 : if (phi)
6353 87 : arg3 = build_int_cst (integer_type_node,
6354 103 : TYPE_UNSIGNED (TREE_TYPE (arg1)) ? 1 : argval);
6355 : }
6356 :
6357 : /* For integral <=> comparisons only use .SPACESHIP if it is turned
6358 : into an integer (-1, 0, 1). */
6359 9241 : if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) && arg3 == integer_zero_node)
6360 : return;
6361 :
6362 279 : gcall *gc = gimple_build_call_internal (IFN_SPACESHIP, 3, arg1, arg2, arg3);
6363 279 : tree lhs = make_ssa_name (integer_type_node);
6364 279 : gimple_call_set_lhs (gc, lhs);
6365 279 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6366 279 : gsi_insert_before (&gsi, gc, GSI_SAME_STMT);
6367 :
6368 471 : wide_int wmin = wi::minus_one (TYPE_PRECISION (integer_type_node));
6369 471 : wide_int wmax = wi::one (TYPE_PRECISION (integer_type_node));
6370 279 : if (HONOR_NANS (TREE_TYPE (arg1)))
6371 : {
6372 199 : if (arg3 == integer_zero_node)
6373 173 : wmin = wi::shwi (-128, TYPE_PRECISION (integer_type_node));
6374 26 : else if (tree_int_cst_sgn (arg3) < 0)
6375 19 : wmin = wi::to_wide (arg3);
6376 : else
6377 7 : wmax = wi::to_wide (arg3);
6378 : }
6379 471 : int_range<1> vr (TREE_TYPE (lhs), wmin, wmax);
6380 279 : set_range_info (lhs, vr);
6381 :
6382 279 : if (arg3 != integer_zero_node)
6383 : {
6384 87 : tree type = TREE_TYPE (gimple_phi_result (phi));
6385 87 : if (!useless_type_conversion_p (type, integer_type_node))
6386 : {
6387 63 : tree tem = make_ssa_name (type);
6388 63 : gimple *gcv = gimple_build_assign (tem, NOP_EXPR, lhs);
6389 63 : gsi_insert_before (&gsi, gcv, GSI_SAME_STMT);
6390 63 : lhs = tem;
6391 : }
6392 87 : SET_PHI_ARG_DEF_ON_EDGE (phi, e, lhs);
6393 87 : gimple_cond_set_lhs (stmt, boolean_false_node);
6394 87 : gimple_cond_set_rhs (stmt, boolean_false_node);
6395 161 : gimple_cond_set_code (stmt, (e->flags & EDGE_TRUE_VALUE)
6396 : ? EQ_EXPR : NE_EXPR);
6397 87 : update_stmt (stmt);
6398 87 : return;
6399 : }
6400 :
6401 192 : gimple_cond_set_lhs (stmt, lhs);
6402 192 : gimple_cond_set_rhs (stmt, integer_zero_node);
6403 192 : update_stmt (stmt);
6404 :
6405 384 : gcond *cond = as_a <gcond *> (*gsi_last_bb (bb1));
6406 192 : gimple_cond_set_lhs (cond, lhs);
6407 192 : if (em1->src == bb1 && e2 != em1)
6408 : {
6409 113 : gimple_cond_set_rhs (cond, integer_minus_one_node);
6410 119 : gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
6411 : ? EQ_EXPR : NE_EXPR);
6412 : }
6413 : else
6414 : {
6415 79 : gcc_assert (e1->src == bb1 && e2 != e1);
6416 79 : gimple_cond_set_rhs (cond, integer_one_node);
6417 79 : gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
6418 : ? EQ_EXPR : NE_EXPR);
6419 : }
6420 192 : update_stmt (cond);
6421 :
6422 192 : if (e2 != e1 && e2 != em1)
6423 : {
6424 116 : cond = as_a <gcond *> (*gsi_last_bb (bb2));
6425 58 : gimple_cond_set_lhs (cond, lhs);
6426 58 : if (em1->src == bb2)
6427 29 : gimple_cond_set_rhs (cond, integer_minus_one_node);
6428 : else
6429 : {
6430 29 : gcc_assert (e1->src == bb2);
6431 29 : gimple_cond_set_rhs (cond, integer_one_node);
6432 : }
6433 58 : gimple_cond_set_code (cond,
6434 58 : (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
6435 58 : update_stmt (cond);
6436 : }
6437 : }
6438 :
6439 :
6440 : /* Find integer multiplications where the operands are extended from
6441 : smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
6442 : or MULT_HIGHPART_EXPR where appropriate. */
6443 :
6444 : namespace {
6445 :
6446 : const pass_data pass_data_optimize_widening_mul =
6447 : {
6448 : GIMPLE_PASS, /* type */
6449 : "widening_mul", /* name */
6450 : OPTGROUP_NONE, /* optinfo_flags */
6451 : TV_TREE_WIDEN_MUL, /* tv_id */
6452 : PROP_ssa, /* properties_required */
6453 : 0, /* properties_provided */
6454 : 0, /* properties_destroyed */
6455 : 0, /* todo_flags_start */
6456 : TODO_update_ssa, /* todo_flags_finish */
6457 : };
6458 :
6459 : class pass_optimize_widening_mul : public gimple_opt_pass
6460 : {
6461 : public:
6462 287872 : pass_optimize_widening_mul (gcc::context *ctxt)
6463 575744 : : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
6464 : {}
6465 :
6466 : /* opt_pass methods: */
6467 1038027 : bool gate (function *) final override
6468 : {
6469 1038027 : return flag_expensive_optimizations && optimize;
6470 : }
6471 :
6472 : unsigned int execute (function *) final override;
6473 :
6474 : }; // class pass_optimize_widening_mul
6475 :
6476 : /* Walker class to perform the transformation in reverse dominance order. */
6477 :
6478 : class math_opts_dom_walker : public dom_walker
6479 : {
6480 : public:
6481 : /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
6482 : if walking modidifes the CFG. */
6483 :
6484 960579 : math_opts_dom_walker (bool *cfg_changed_p)
6485 2881737 : : dom_walker (CDI_DOMINATORS), m_last_result_set (),
6486 960579 : m_cfg_changed_p (cfg_changed_p) {}
6487 :
6488 : /* The actual actions performed in the walk. */
6489 :
6490 : void after_dom_children (basic_block) final override;
6491 :
6492 : /* Set of results of chains of multiply and add statement combinations that
6493 : were not transformed into FMAs because of active deferring. */
6494 : hash_set<tree> m_last_result_set;
6495 :
6496 : /* Pointer to a flag of the user that needs to be set if CFG has been
6497 : modified. */
6498 : bool *m_cfg_changed_p;
6499 : };
6500 :
6501 : void
6502 10188207 : math_opts_dom_walker::after_dom_children (basic_block bb)
6503 : {
6504 10188207 : gimple_stmt_iterator gsi;
6505 :
6506 10188207 : fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
6507 :
6508 14393994 : for (gphi_iterator psi_next, psi = gsi_start_phis (bb); !gsi_end_p (psi);
6509 4205787 : psi = psi_next)
6510 : {
6511 4205787 : psi_next = psi;
6512 4205787 : gsi_next (&psi_next);
6513 :
6514 4205787 : gimple_stmt_iterator gsi = gsi_after_labels (bb);
6515 4205787 : gphi *phi = psi.phi ();
6516 :
6517 4205787 : if (match_saturation_add (&gsi, phi)
6518 4205770 : || match_saturation_sub (&gsi, phi)
6519 4205744 : || match_saturation_trunc (&gsi, phi)
6520 8411531 : || match_saturation_mul (&gsi, phi))
6521 43 : remove_phi_node (&psi, /* release_lhs_p */ false);
6522 : }
6523 :
6524 90729929 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
6525 : {
6526 80541722 : gimple *stmt = gsi_stmt (gsi);
6527 80541722 : enum tree_code code;
6528 :
6529 80541722 : if (is_gimple_assign (stmt))
6530 : {
6531 20918486 : code = gimple_assign_rhs_code (stmt);
6532 20918486 : switch (code)
6533 : {
6534 721293 : case MULT_EXPR:
6535 721293 : if (!convert_mult_to_widen (stmt, &gsi)
6536 711145 : && !convert_expand_mult_copysign (stmt, &gsi)
6537 1432395 : && convert_mult_to_fma (stmt,
6538 : gimple_assign_rhs1 (stmt),
6539 : gimple_assign_rhs2 (stmt),
6540 : &fma_state))
6541 : {
6542 16494 : gsi_remove (&gsi, true);
6543 16494 : release_defs (stmt);
6544 16494 : continue;
6545 : }
6546 704799 : match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
6547 704799 : match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
6548 704799 : break;
6549 :
6550 2283361 : case PLUS_EXPR:
6551 2283361 : match_saturation_add_with_assign (&gsi, as_a<gassign *> (stmt));
6552 2283361 : match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
6553 : /* fall-through */
6554 2578042 : case MINUS_EXPR:
6555 2578042 : if (!convert_plusminus_to_widen (&gsi, stmt, code))
6556 : {
6557 2578042 : match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
6558 2578042 : if (gsi_stmt (gsi) == stmt)
6559 2571604 : match_uaddc_usubc (&gsi, stmt, code);
6560 : }
6561 : break;
6562 :
6563 35970 : case BIT_NOT_EXPR:
6564 35970 : if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
6565 170 : continue;
6566 : break;
6567 :
6568 47857 : case TRUNC_MOD_EXPR:
6569 47857 : convert_to_divmod (as_a<gassign *> (stmt));
6570 47857 : break;
6571 :
6572 170986 : case RSHIFT_EXPR:
6573 170986 : convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
6574 170986 : break;
6575 :
6576 182699 : case BIT_IOR_EXPR:
6577 182699 : match_unsigned_saturation_mul (&gsi, as_a<gassign *> (stmt));
6578 182699 : match_saturation_add_with_assign (&gsi, as_a<gassign *> (stmt));
6579 182699 : match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
6580 : /* fall-through */
6581 212094 : case BIT_XOR_EXPR:
6582 212094 : match_uaddc_usubc (&gsi, stmt, code);
6583 212094 : break;
6584 :
6585 288481 : case EQ_EXPR:
6586 288481 : case NE_EXPR:
6587 288481 : case LE_EXPR:
6588 288481 : case GT_EXPR:
6589 288481 : match_single_bit_test (&gsi, stmt);
6590 288481 : break;
6591 :
6592 344533 : case COND_EXPR:
6593 344533 : case BIT_AND_EXPR:
6594 344533 : match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
6595 344533 : break;
6596 :
6597 2392915 : case NOP_EXPR:
6598 2392915 : match_unsigned_saturation_mul (&gsi, as_a<gassign *> (stmt));
6599 2392915 : match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
6600 2392915 : match_saturation_add_with_assign (&gsi, as_a<gassign *> (stmt));
6601 2392915 : break;
6602 :
6603 : default:;
6604 : }
6605 : }
6606 59623236 : else if (is_gimple_call (stmt))
6607 : {
6608 4746501 : switch (gimple_call_combined_fn (stmt))
6609 : {
6610 129 : case CFN_COND_MUL:
6611 129 : if (convert_mult_to_fma (stmt,
6612 : gimple_call_arg (stmt, 1),
6613 : gimple_call_arg (stmt, 2),
6614 : &fma_state,
6615 : gimple_call_arg (stmt, 0)))
6616 :
6617 : {
6618 84 : gsi_remove (&gsi, true);
6619 84 : release_defs (stmt);
6620 84 : continue;
6621 : }
6622 : break;
6623 :
6624 0 : case CFN_COND_LEN_MUL:
6625 0 : if (convert_mult_to_fma (stmt,
6626 : gimple_call_arg (stmt, 1),
6627 : gimple_call_arg (stmt, 2),
6628 : &fma_state,
6629 : gimple_call_arg (stmt, 0),
6630 : gimple_call_arg (stmt, 4),
6631 : gimple_call_arg (stmt, 5)))
6632 :
6633 : {
6634 0 : gsi_remove (&gsi, true);
6635 0 : release_defs (stmt);
6636 0 : continue;
6637 : }
6638 : break;
6639 :
6640 3603935 : case CFN_LAST:
6641 3603935 : cancel_fma_deferring (&fma_state);
6642 3603935 : break;
6643 :
6644 : default:
6645 : break;
6646 : }
6647 : }
6648 54876735 : else if (gimple_code (stmt) == GIMPLE_COND)
6649 : {
6650 4150947 : match_single_bit_test (&gsi, stmt);
6651 4150947 : optimize_spaceship (as_a <gcond *> (stmt));
6652 : }
6653 80524974 : gsi_next (&gsi);
6654 : }
6655 10188207 : if (fma_state.m_deferring_p
6656 7483756 : && fma_state.m_initial_phi)
6657 : {
6658 362 : gcc_checking_assert (fma_state.m_last_result);
6659 362 : if (!last_fma_candidate_feeds_initial_phi (&fma_state,
6660 : &m_last_result_set))
6661 268 : cancel_fma_deferring (&fma_state);
6662 : else
6663 94 : m_last_result_set.add (fma_state.m_last_result);
6664 : }
6665 10188207 : }
6666 :
6667 :
6668 : unsigned int
6669 960579 : pass_optimize_widening_mul::execute (function *fun)
6670 : {
6671 960579 : bool cfg_changed = false;
6672 :
6673 960579 : memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
6674 960579 : calculate_dominance_info (CDI_DOMINATORS);
6675 960579 : renumber_gimple_stmt_uids (cfun);
6676 :
6677 960579 : math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6678 :
6679 960579 : statistics_counter_event (fun, "widening multiplications inserted",
6680 : widen_mul_stats.widen_mults_inserted);
6681 960579 : statistics_counter_event (fun, "widening maccs inserted",
6682 : widen_mul_stats.maccs_inserted);
6683 960579 : statistics_counter_event (fun, "fused multiply-adds inserted",
6684 : widen_mul_stats.fmas_inserted);
6685 960579 : statistics_counter_event (fun, "divmod calls inserted",
6686 : widen_mul_stats.divmod_calls_inserted);
6687 960579 : statistics_counter_event (fun, "highpart multiplications inserted",
6688 : widen_mul_stats.highpart_mults_inserted);
6689 :
6690 960579 : return cfg_changed ? TODO_cleanup_cfg : 0;
6691 : }
6692 :
6693 : } // anon namespace
6694 :
6695 : gimple_opt_pass *
6696 287872 : make_pass_optimize_widening_mul (gcc::context *ctxt)
6697 : {
6698 287872 : return new pass_optimize_widening_mul (ctxt);
6699 : }
|