Line data Source code
1 : /* Global, SSA-based optimizations using mathematical identities.
2 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 : operations. These are common in sequences such as this one:
22 :
23 : modulus = sqrt(x*x + y*y + z*z);
24 : x = x / modulus;
25 : y = y / modulus;
26 : z = z / modulus;
27 :
28 : that can be optimized to
29 :
30 : modulus = sqrt(x*x + y*y + z*z);
31 : rmodulus = 1.0 / modulus;
32 : x = x * rmodulus;
33 : y = y * rmodulus;
34 : z = z * rmodulus;
35 :
36 : We do this for loop invariant divisors, and with this pass whenever
37 : we notice that a division has the same divisor multiple times.
38 :
39 : Of course, like in PRE, we don't insert a division if a dominator
40 : already has one. However, this cannot be done as an extension of
41 : PRE for several reasons.
42 :
43 : First of all, with some experiments it was found out that the
44 : transformation is not always useful if there are only two divisions
45 : by the same divisor. This is probably because modern processors
46 : can pipeline the divisions; on older, in-order processors it should
47 : still be effective to optimize two divisions by the same number.
48 : We make this a param, and it shall be called N in the remainder of
49 : this comment.
50 :
51 : Second, if trapping math is active, we have less freedom on where
52 : to insert divisions: we can only do so in basic blocks that already
53 : contain one. (If divisions don't trap, instead, we can insert
54 : divisions elsewhere, which will be in blocks that are common dominators
55 : of those that have the division).
56 :
57 : We really don't want to compute the reciprocal unless a division will
58 : be found. To do this, we won't insert the division in a basic block
59 : that has less than N divisions *post-dominating* it.
60 :
61 : The algorithm constructs a subset of the dominator tree, holding the
62 : blocks containing the divisions and the common dominators to them,
63 : and walk it twice. The first walk is in post-order, and it annotates
64 : each block with the number of divisions that post-dominate it: this
65 : gives information on where divisions can be inserted profitably.
66 : The second walk is in pre-order, and it inserts divisions as explained
67 : above, and replaces divisions by multiplications.
68 :
69 : In the best case, the cost of the pass is O(n_statements). In the
70 : worst-case, the cost is due to creating the dominator tree subset,
71 : with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 : for n_statements / n_basic_blocks statements. So, the amortized cost
73 : of creating the dominator tree subset is O(n_basic_blocks) and the
74 : worst-case cost of the pass is O(n_statements * n_basic_blocks).
75 :
76 : More practically, the cost will be small because there are few
77 : divisions, and they tend to be in the same basic block, so insert_bb
78 : is called very few times.
79 :
80 : If we did this using domwalk.cc, an efficient implementation would have
81 : to work on all the variables in a single pass, because we could not
82 : work on just a subset of the dominator tree, as we do now, and the
83 : cost would also be something like O(n_statements * n_basic_blocks).
84 : The data structures would be more complex in order to work on all the
85 : variables in a single pass. */
86 :
87 : #include "config.h"
88 : #include "system.h"
89 : #include "coretypes.h"
90 : #include "backend.h"
91 : #include "target.h"
92 : #include "rtl.h"
93 : #include "tree.h"
94 : #include "gimple.h"
95 : #include "predict.h"
96 : #include "alloc-pool.h"
97 : #include "tree-pass.h"
98 : #include "ssa.h"
99 : #include "optabs-tree.h"
100 : #include "gimple-pretty-print.h"
101 : #include "alias.h"
102 : #include "fold-const.h"
103 : #include "gimple-iterator.h"
104 : #include "gimple-fold.h"
105 : #include "gimplify.h"
106 : #include "gimplify-me.h"
107 : #include "stor-layout.h"
108 : #include "tree-cfg.h"
109 : #include "tree-dfa.h"
110 : #include "tree-ssa.h"
111 : #include "builtins.h"
112 : #include "internal-fn.h"
113 : #include "case-cfn-macros.h"
114 : #include "optabs-libfuncs.h"
115 : #include "tree-eh.h"
116 : #include "targhooks.h"
117 : #include "domwalk.h"
118 : #include "tree-ssa-math-opts.h"
119 : #include "dbgcnt.h"
120 : #include "cfghooks.h"
121 :
122 : /* This structure represents one basic block that either computes a
123 : division, or is a common dominator for basic block that compute a
124 : division. */
125 : struct occurrence {
126 : /* The basic block represented by this structure. */
127 : basic_block bb = basic_block();
128 :
129 : /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
130 : inserted in BB. */
131 : tree recip_def = tree();
132 :
133 : /* If non-NULL, the SSA_NAME holding the definition for a squared
134 : reciprocal inserted in BB. */
135 : tree square_recip_def = tree();
136 :
137 : /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
138 : was inserted in BB. */
139 : gimple *recip_def_stmt = nullptr;
140 :
141 : /* Pointer to a list of "struct occurrence"s for blocks dominated
142 : by BB. */
143 : struct occurrence *children = nullptr;
144 :
145 : /* Pointer to the next "struct occurrence"s in the list of blocks
146 : sharing a common dominator. */
147 : struct occurrence *next = nullptr;
148 :
149 : /* The number of divisions that are in BB before compute_merit. The
150 : number of divisions that are in BB or post-dominate it after
151 : compute_merit. */
152 : int num_divisions = 0;
153 :
154 : /* True if the basic block has a division, false if it is a common
155 : dominator for basic blocks that do. If it is false and trapping
156 : math is active, BB is not a candidate for inserting a reciprocal. */
157 : bool bb_has_division = false;
158 :
159 : /* Construct a struct occurrence for basic block BB, and whose
160 : children list is headed by CHILDREN. */
161 610 : occurrence (basic_block bb, struct occurrence *children)
162 610 : : bb (bb), children (children)
163 : {
164 610 : bb->aux = this;
165 : }
166 :
167 : /* Destroy a struct occurrence and remove it from its basic block. */
168 610 : ~occurrence ()
169 : {
170 610 : bb->aux = nullptr;
171 610 : }
172 :
173 : /* Allocate memory for a struct occurrence from OCC_POOL. */
174 : static void* operator new (size_t);
175 :
176 : /* Return memory for a struct occurrence to OCC_POOL. */
177 : static void operator delete (void*, size_t);
178 : };
179 :
180 : static struct
181 : {
182 : /* Number of 1.0/X ops inserted. */
183 : int rdivs_inserted;
184 :
185 : /* Number of 1.0/FUNC ops inserted. */
186 : int rfuncs_inserted;
187 : } reciprocal_stats;
188 :
189 : static struct
190 : {
191 : /* Number of cexpi calls inserted. */
192 : int inserted;
193 :
194 : /* Number of conversions removed. */
195 : int conv_removed;
196 :
197 : } sincos_stats;
198 :
199 : static struct
200 : {
201 : /* Number of widening multiplication ops inserted. */
202 : int widen_mults_inserted;
203 :
204 : /* Number of integer multiply-and-accumulate ops inserted. */
205 : int maccs_inserted;
206 :
207 : /* Number of fp fused multiply-add ops inserted. */
208 : int fmas_inserted;
209 :
210 : /* Number of divmod calls inserted. */
211 : int divmod_calls_inserted;
212 :
213 : /* Number of highpart multiplication ops inserted. */
214 : int highpart_mults_inserted;
215 : } widen_mul_stats;
216 :
217 : /* The instance of "struct occurrence" representing the highest
218 : interesting block in the dominator tree. */
219 : static struct occurrence *occ_head;
220 :
221 : /* Allocation pool for getting instances of "struct occurrence". */
222 : static object_allocator<occurrence> *occ_pool;
223 :
224 610 : void* occurrence::operator new (size_t n)
225 : {
226 610 : gcc_assert (n == sizeof(occurrence));
227 610 : return occ_pool->allocate_raw ();
228 : }
229 :
230 610 : void occurrence::operator delete (void *occ, size_t n)
231 : {
232 610 : gcc_assert (n == sizeof(occurrence));
233 610 : occ_pool->remove_raw (occ);
234 610 : }
235 :
236 : /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
237 : list of "struct occurrence"s, one per basic block, having IDOM as
238 : their common dominator.
239 :
240 : We try to insert NEW_OCC as deep as possible in the tree, and we also
241 : insert any other block that is a common dominator for BB and one
242 : block already in the tree. */
243 :
244 : static void
245 598 : insert_bb (struct occurrence *new_occ, basic_block idom,
246 : struct occurrence **p_head)
247 : {
248 604 : struct occurrence *occ, **p_occ;
249 :
250 630 : for (p_occ = p_head; (occ = *p_occ) != NULL; )
251 : {
252 32 : basic_block bb = new_occ->bb, occ_bb = occ->bb;
253 32 : basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
254 32 : if (dom == bb)
255 : {
256 : /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
257 : from its list. */
258 7 : *p_occ = occ->next;
259 7 : occ->next = new_occ->children;
260 7 : new_occ->children = occ;
261 :
262 : /* Try the next block (it may as well be dominated by BB). */
263 : }
264 :
265 25 : else if (dom == occ_bb)
266 : {
267 : /* OCC_BB dominates BB. Tail recurse to look deeper. */
268 6 : insert_bb (new_occ, dom, &occ->children);
269 6 : return;
270 : }
271 :
272 19 : else if (dom != idom)
273 : {
274 12 : gcc_assert (!dom->aux);
275 :
276 : /* There is a dominator between IDOM and BB, add it and make
277 : two children out of NEW_OCC and OCC. First, remove OCC from
278 : its list. */
279 12 : *p_occ = occ->next;
280 12 : new_occ->next = occ;
281 12 : occ->next = NULL;
282 :
283 : /* None of the previous blocks has DOM as a dominator: if we tail
284 : recursed, we would reexamine them uselessly. Just switch BB with
285 : DOM, and go on looking for blocks dominated by DOM. */
286 12 : new_occ = new occurrence (dom, new_occ);
287 : }
288 :
289 : else
290 : {
291 : /* Nothing special, go on with the next element. */
292 7 : p_occ = &occ->next;
293 : }
294 : }
295 :
296 : /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
297 598 : new_occ->next = *p_head;
298 598 : *p_head = new_occ;
299 : }
300 :
301 : /* Register that we found a division in BB.
302 : IMPORTANCE is a measure of how much weighting to give
303 : that division. Use IMPORTANCE = 2 to register a single
304 : division. If the division is going to be found multiple
305 : times use 1 (as it is with squares). */
306 :
307 : static inline void
308 699 : register_division_in (basic_block bb, int importance)
309 : {
310 699 : struct occurrence *occ;
311 :
312 699 : occ = (struct occurrence *) bb->aux;
313 699 : if (!occ)
314 : {
315 598 : occ = new occurrence (bb, NULL);
316 598 : insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
317 : }
318 :
319 699 : occ->bb_has_division = true;
320 699 : occ->num_divisions += importance;
321 699 : }
322 :
323 :
324 : /* Compute the number of divisions that postdominate each block in OCC and
325 : its children. */
326 :
327 : static void
328 31 : compute_merit (struct occurrence *occ)
329 : {
330 31 : struct occurrence *occ_child;
331 31 : basic_block dom = occ->bb;
332 :
333 60 : for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
334 : {
335 29 : basic_block bb;
336 29 : if (occ_child->children)
337 5 : compute_merit (occ_child);
338 :
339 29 : if (flag_exceptions)
340 6 : bb = single_noncomplex_succ (dom);
341 : else
342 : bb = dom;
343 :
344 29 : if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
345 12 : occ->num_divisions += occ_child->num_divisions;
346 : }
347 31 : }
348 :
349 :
350 : /* Return whether USE_STMT is a floating-point division by DEF. */
351 : static inline bool
352 347498 : is_division_by (gimple *use_stmt, tree def)
353 : {
354 347498 : return is_gimple_assign (use_stmt)
355 237611 : && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
356 1271 : && gimple_assign_rhs2 (use_stmt) == def
357 : /* Do not recognize x / x as valid division, as we are getting
358 : confused later by replacing all immediate uses x in such
359 : a stmt. */
360 876 : && gimple_assign_rhs1 (use_stmt) != def
361 348374 : && !stmt_can_throw_internal (cfun, use_stmt);
362 : }
363 :
364 : /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
365 : static inline bool
366 343741 : is_mult_by (gimple *use_stmt, tree def, tree a)
367 : {
368 343741 : if (gimple_code (use_stmt) == GIMPLE_ASSIGN
369 343741 : && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
370 : {
371 78084 : tree op0 = gimple_assign_rhs1 (use_stmt);
372 78084 : tree op1 = gimple_assign_rhs2 (use_stmt);
373 :
374 78084 : return (op0 == def && op1 == a)
375 78084 : || (op0 == a && op1 == def);
376 : }
377 : return 0;
378 : }
379 :
380 : /* Return whether USE_STMT is DEF * DEF. */
381 : static inline bool
382 343696 : is_square_of (gimple *use_stmt, tree def)
383 : {
384 5 : return is_mult_by (use_stmt, def, def);
385 : }
386 :
387 : /* Return whether USE_STMT is a floating-point division by
388 : DEF * DEF. */
389 : static inline bool
390 181 : is_division_by_square (gimple *use_stmt, tree def)
391 : {
392 181 : if (gimple_code (use_stmt) == GIMPLE_ASSIGN
393 174 : && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
394 7 : && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
395 188 : && !stmt_can_throw_internal (cfun, use_stmt))
396 : {
397 7 : tree denominator = gimple_assign_rhs2 (use_stmt);
398 7 : if (TREE_CODE (denominator) == SSA_NAME)
399 7 : return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
400 : }
401 : return 0;
402 : }
403 :
404 : /* Walk the subset of the dominator tree rooted at OCC, setting the
405 : RECIP_DEF field to a definition of 1.0 / DEF that can be used in
406 : the given basic block. The field may be left NULL, of course,
407 : if it is not possible or profitable to do the optimization.
408 :
409 : DEF_BSI is an iterator pointing at the statement defining DEF.
410 : If RECIP_DEF is set, a dominator already has a computation that can
411 : be used.
412 :
413 : If should_insert_square_recip is set, then this also inserts
414 : the square of the reciprocal immediately after the definition
415 : of the reciprocal. */
416 :
417 : static void
418 55 : insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
419 : tree def, tree recip_def, tree square_recip_def,
420 : int should_insert_square_recip, int threshold)
421 : {
422 55 : tree type;
423 55 : gassign *new_stmt, *new_square_stmt;
424 55 : gimple_stmt_iterator gsi;
425 55 : struct occurrence *occ_child;
426 :
427 55 : if (!recip_def
428 39 : && (occ->bb_has_division || !flag_trapping_math)
429 : /* Divide by two as all divisions are counted twice in
430 : the costing loop. */
431 35 : && occ->num_divisions / 2 >= threshold)
432 : {
433 : /* Make a variable with the replacement and substitute it. */
434 24 : type = TREE_TYPE (def);
435 24 : recip_def = create_tmp_reg (type, "reciptmp");
436 24 : new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
437 : build_one_cst (type), def);
438 :
439 24 : if (should_insert_square_recip)
440 : {
441 4 : square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
442 4 : new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
443 : recip_def, recip_def);
444 : }
445 :
446 24 : if (occ->bb_has_division)
447 : {
448 : /* Case 1: insert before an existing division. */
449 21 : gsi = gsi_after_labels (occ->bb);
450 201 : while (!gsi_end_p (gsi)
451 201 : && (!is_division_by (gsi_stmt (gsi), def))
452 382 : && (!is_division_by_square (gsi_stmt (gsi), def)))
453 180 : gsi_next (&gsi);
454 :
455 21 : gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
456 21 : if (should_insert_square_recip)
457 3 : gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
458 : }
459 3 : else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
460 : {
461 : /* Case 2: insert right after the definition. Note that this will
462 : never happen if the definition statement can throw, because in
463 : that case the sole successor of the statement's basic block will
464 : dominate all the uses as well. */
465 2 : gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
466 2 : if (should_insert_square_recip)
467 1 : gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
468 : }
469 : else
470 : {
471 : /* Case 3: insert in a basic block not containing defs/uses. */
472 1 : gsi = gsi_after_labels (occ->bb);
473 1 : gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
474 1 : if (should_insert_square_recip)
475 0 : gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
476 : }
477 :
478 24 : reciprocal_stats.rdivs_inserted++;
479 :
480 24 : occ->recip_def_stmt = new_stmt;
481 : }
482 :
483 55 : occ->recip_def = recip_def;
484 55 : occ->square_recip_def = square_recip_def;
485 84 : for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
486 29 : insert_reciprocals (def_gsi, occ_child, def, recip_def,
487 : square_recip_def, should_insert_square_recip,
488 : threshold);
489 55 : }
490 :
491 : /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
492 : Take as argument the use for (x * x). */
493 : static inline void
494 4 : replace_reciprocal_squares (use_operand_p use_p)
495 : {
496 4 : gimple *use_stmt = USE_STMT (use_p);
497 4 : basic_block bb = gimple_bb (use_stmt);
498 4 : struct occurrence *occ = (struct occurrence *) bb->aux;
499 :
500 8 : if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
501 8 : && occ->recip_def)
502 : {
503 4 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
504 4 : gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
505 4 : gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
506 4 : SET_USE (use_p, occ->square_recip_def);
507 4 : fold_stmt_inplace (&gsi);
508 4 : update_stmt (use_stmt);
509 : }
510 4 : }
511 :
512 :
513 : /* Replace the division at USE_P with a multiplication by the reciprocal, if
514 : possible. */
515 :
516 : static inline void
517 115 : replace_reciprocal (use_operand_p use_p)
518 : {
519 115 : gimple *use_stmt = USE_STMT (use_p);
520 115 : basic_block bb = gimple_bb (use_stmt);
521 115 : struct occurrence *occ = (struct occurrence *) bb->aux;
522 :
523 115 : if (optimize_bb_for_speed_p (bb)
524 115 : && occ->recip_def && use_stmt != occ->recip_def_stmt)
525 : {
526 80 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
527 80 : gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
528 80 : SET_USE (use_p, occ->recip_def);
529 80 : fold_stmt_inplace (&gsi);
530 80 : update_stmt (use_stmt);
531 : }
532 115 : }
533 :
534 :
535 : /* Free OCC and return one more "struct occurrence" to be freed. */
536 :
537 : static struct occurrence *
538 610 : free_bb (struct occurrence *occ)
539 : {
540 610 : struct occurrence *child, *next;
541 :
542 : /* First get the two pointers hanging off OCC. */
543 610 : next = occ->next;
544 610 : child = occ->children;
545 610 : delete occ;
546 :
547 : /* Now ensure that we don't recurse unless it is necessary. */
548 610 : if (!child)
549 : return next;
550 : else
551 : {
552 24 : while (next)
553 3 : next = free_bb (next);
554 :
555 : return child;
556 : }
557 : }
558 :
559 : /* Transform sequences like
560 : t = sqrt (a)
561 : x = 1.0 / t;
562 : r1 = x * x;
563 : r2 = a * x;
564 : into:
565 : t = sqrt (a)
566 : r1 = 1.0 / a;
567 : r2 = t;
568 : x = r1 * r2;
569 : depending on the uses of x, r1, r2. This removes one multiplication and
570 : allows the sqrt and division operations to execute in parallel.
571 : DEF_GSI is the gsi of the initial division by sqrt that defines
572 : DEF (x in the example above). */
573 :
574 : static void
575 561 : optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
576 : {
577 561 : gimple *use_stmt;
578 561 : imm_use_iterator use_iter;
579 561 : gimple *stmt = gsi_stmt (*def_gsi);
580 561 : tree x = def;
581 561 : tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
582 561 : tree div_rhs1 = gimple_assign_rhs1 (stmt);
583 :
584 561 : if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
585 556 : || TREE_CODE (div_rhs1) != REAL_CST
586 731 : || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
587 467 : return;
588 :
589 94 : gcall *sqrt_stmt
590 591 : = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
591 :
592 42 : if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
593 : return;
594 :
595 42 : switch (gimple_call_combined_fn (sqrt_stmt))
596 : {
597 31 : CASE_CFN_SQRT:
598 31 : CASE_CFN_SQRT_FN:
599 31 : break;
600 :
601 : default:
602 : return;
603 : }
604 31 : tree a = gimple_call_arg (sqrt_stmt, 0);
605 :
606 : /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
607 :
608 : /* Statements that use x in x * x. */
609 43 : auto_vec<gimple *> sqr_stmts;
610 : /* Statements that use x in a * x. */
611 12 : auto_vec<gimple *> mult_stmts;
612 31 : bool has_other_use = false;
613 31 : bool mult_on_main_path = false;
614 :
615 89 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
616 : {
617 58 : if (is_gimple_debug (use_stmt))
618 1 : continue;
619 57 : if (is_square_of (use_stmt, x))
620 : {
621 12 : sqr_stmts.safe_push (use_stmt);
622 12 : if (gimple_bb (use_stmt) == gimple_bb (stmt))
623 17 : mult_on_main_path = true;
624 : }
625 45 : else if (is_mult_by (use_stmt, x, a))
626 : {
627 14 : mult_stmts.safe_push (use_stmt);
628 14 : if (gimple_bb (use_stmt) == gimple_bb (stmt))
629 17 : mult_on_main_path = true;
630 : }
631 : else
632 : has_other_use = true;
633 31 : }
634 :
635 : /* In the x * x and a * x cases we just rewire stmt operands or
636 : remove multiplications. In the has_other_use case we introduce
637 : a multiplication so make sure we don't introduce a multiplication
638 : on a path where there was none. */
639 31 : if (has_other_use && !mult_on_main_path)
640 19 : return;
641 :
642 12 : if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
643 : return;
644 :
645 : /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
646 : to be able to compose it from the sqr and mult cases. */
647 41 : if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
648 : return;
649 :
650 12 : if (dump_file)
651 : {
652 10 : fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
653 10 : print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
654 10 : print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
655 10 : fprintf (dump_file, "\n");
656 : }
657 :
658 12 : bool delete_div = !has_other_use;
659 12 : tree sqr_ssa_name = NULL_TREE;
660 12 : if (!sqr_stmts.is_empty ())
661 : {
662 : /* r1 = x * x. Transform the original
663 : x = 1.0 / t
664 : into
665 : tmp1 = 1.0 / a
666 : r1 = tmp1. */
667 :
668 10 : sqr_ssa_name
669 10 : = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
670 :
671 10 : if (dump_file)
672 : {
673 10 : fprintf (dump_file, "Replacing original division\n");
674 10 : print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
675 10 : fprintf (dump_file, "with new division\n");
676 : }
677 10 : stmt
678 10 : = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
679 : gimple_assign_rhs1 (stmt), a);
680 10 : gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
681 10 : gsi_remove (def_gsi, true);
682 10 : *def_gsi = gsi_for_stmt (stmt);
683 10 : fold_stmt_inplace (def_gsi);
684 10 : update_stmt (stmt);
685 :
686 10 : if (dump_file)
687 10 : print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
688 :
689 20 : delete_div = false;
690 : gimple *sqr_stmt;
691 : unsigned int i;
692 20 : FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
693 : {
694 10 : gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
695 10 : gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
696 10 : update_stmt (sqr_stmt);
697 : }
698 : }
699 12 : if (!mult_stmts.is_empty ())
700 : {
701 : /* r2 = a * x. Transform this into:
702 : r2 = t (The original sqrt (a)). */
703 : unsigned int i;
704 24 : gimple *mult_stmt = NULL;
705 24 : FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
706 : {
707 12 : gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
708 :
709 12 : if (dump_file)
710 : {
711 10 : fprintf (dump_file, "Replacing squaring multiplication\n");
712 10 : print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
713 10 : fprintf (dump_file, "with assignment\n");
714 : }
715 12 : gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
716 12 : fold_stmt_inplace (&gsi2);
717 12 : update_stmt (mult_stmt);
718 12 : if (dump_file)
719 10 : print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
720 : }
721 : }
722 :
723 12 : if (has_other_use)
724 : {
725 : /* Using the two temporaries tmp1, tmp2 from above
726 : the original x is now:
727 : x = tmp1 * tmp2. */
728 10 : gcc_assert (orig_sqrt_ssa_name);
729 10 : gcc_assert (sqr_ssa_name);
730 :
731 10 : gimple *new_stmt
732 10 : = gimple_build_assign (x, MULT_EXPR,
733 : orig_sqrt_ssa_name, sqr_ssa_name);
734 10 : gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
735 10 : update_stmt (stmt);
736 : }
737 2 : else if (delete_div)
738 : {
739 : /* Remove the original division. */
740 2 : gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
741 2 : gsi_remove (&gsi2, true);
742 2 : release_defs (stmt);
743 : }
744 : else
745 0 : release_ssa_name (x);
746 : }
747 :
748 : /* Look for floating-point divisions among DEF's uses, and try to
749 : replace them by multiplications with the reciprocal. Add
750 : as many statements computing the reciprocal as needed.
751 :
752 : DEF must be a GIMPLE register of a floating-point type. */
753 :
754 : static void
755 208853 : execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
756 : {
757 208853 : use_operand_p use_p, square_use_p;
758 208853 : imm_use_iterator use_iter, square_use_iter;
759 208853 : tree square_def;
760 208853 : struct occurrence *occ;
761 208853 : int count = 0;
762 208853 : int threshold;
763 208853 : int square_recip_count = 0;
764 208853 : int sqrt_recip_count = 0;
765 :
766 208853 : gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
767 208853 : threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
768 :
769 : /* If DEF is a square (x * x), count the number of divisions by x.
770 : If there are more divisions by x than by (DEF * DEF), prefer to optimize
771 : the reciprocal of x instead of DEF. This improves cases like:
772 : def = x * x
773 : t0 = a / def
774 : t1 = b / def
775 : t2 = c / x
776 : Reciprocal optimization of x results in 1 division rather than 2 or 3. */
777 208853 : gimple *def_stmt = SSA_NAME_DEF_STMT (def);
778 :
779 208853 : if (is_gimple_assign (def_stmt)
780 162773 : && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
781 39565 : && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
782 248341 : && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
783 : {
784 665 : tree op0 = gimple_assign_rhs1 (def_stmt);
785 :
786 3379 : FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
787 : {
788 2049 : gimple *use_stmt = USE_STMT (use_p);
789 2049 : if (is_division_by (use_stmt, op0))
790 14 : sqrt_recip_count++;
791 665 : }
792 : }
793 :
794 761333 : FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
795 : {
796 343627 : gimple *use_stmt = USE_STMT (use_p);
797 343627 : if (is_division_by (use_stmt, def))
798 : {
799 633 : register_division_in (gimple_bb (use_stmt), 2);
800 633 : count++;
801 : }
802 :
803 343627 : if (is_square_of (use_stmt, def))
804 : {
805 1338 : square_def = gimple_assign_lhs (use_stmt);
806 4156 : FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
807 : {
808 1480 : gimple *square_use_stmt = USE_STMT (square_use_p);
809 1480 : if (is_division_by (square_use_stmt, square_def))
810 : {
811 : /* This is executed twice for each division by a square. */
812 66 : register_division_in (gimple_bb (square_use_stmt), 1);
813 66 : square_recip_count++;
814 : }
815 1338 : }
816 : }
817 208853 : }
818 :
819 : /* Square reciprocals were counted twice above. */
820 208853 : square_recip_count /= 2;
821 :
822 : /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
823 208853 : if (sqrt_recip_count > square_recip_count)
824 14 : goto out;
825 :
826 : /* Do the expensive part only if we can hope to optimize something. */
827 208839 : if (count + square_recip_count >= threshold && count >= 1)
828 : {
829 26 : gimple *use_stmt;
830 52 : for (occ = occ_head; occ; occ = occ->next)
831 : {
832 26 : compute_merit (occ);
833 26 : insert_reciprocals (def_gsi, occ, def, NULL, NULL,
834 : square_recip_count, threshold);
835 : }
836 :
837 185 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
838 : {
839 133 : if (is_division_by (use_stmt, def))
840 : {
841 345 : FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
842 115 : replace_reciprocal (use_p);
843 : }
844 23 : else if (square_recip_count > 0 && is_square_of (use_stmt, def))
845 : {
846 16 : FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
847 : {
848 : /* Find all uses of the square that are divisions and
849 : * replace them by multiplications with the inverse. */
850 8 : imm_use_iterator square_iterator;
851 8 : gimple *powmult_use_stmt = USE_STMT (use_p);
852 8 : tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
853 :
854 24 : FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
855 : square_iterator, powmult_def_name)
856 24 : FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
857 : {
858 8 : gimple *powmult_use_stmt = USE_STMT (square_use_p);
859 8 : if (is_division_by (powmult_use_stmt, powmult_def_name))
860 4 : replace_reciprocal_squares (square_use_p);
861 8 : }
862 : }
863 : }
864 26 : }
865 : }
866 :
867 208813 : out:
868 209460 : for (occ = occ_head; occ; )
869 607 : occ = free_bb (occ);
870 :
871 208853 : occ_head = NULL;
872 208853 : }
873 :
874 : /* Return an internal function that implements the reciprocal of CALL,
875 : or IFN_LAST if there is no such function that the target supports. */
876 :
877 : internal_fn
878 131 : internal_fn_reciprocal (gcall *call)
879 : {
880 131 : internal_fn ifn;
881 :
882 131 : switch (gimple_call_combined_fn (call))
883 : {
884 115 : CASE_CFN_SQRT:
885 115 : CASE_CFN_SQRT_FN:
886 115 : ifn = IFN_RSQRT;
887 115 : break;
888 :
889 : default:
890 : return IFN_LAST;
891 : }
892 :
893 115 : tree_pair types = direct_internal_fn_types (ifn, call);
894 115 : if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
895 : return IFN_LAST;
896 :
897 : return ifn;
898 : }
899 :
900 : /* Go through all the floating-point SSA_NAMEs, and call
901 : execute_cse_reciprocals_1 on each of them. */
902 : namespace {
903 :
904 : const pass_data pass_data_cse_reciprocals =
905 : {
906 : GIMPLE_PASS, /* type */
907 : "recip", /* name */
908 : OPTGROUP_NONE, /* optinfo_flags */
909 : TV_TREE_RECIP, /* tv_id */
910 : PROP_ssa, /* properties_required */
911 : 0, /* properties_provided */
912 : 0, /* properties_destroyed */
913 : 0, /* todo_flags_start */
914 : TODO_update_ssa, /* todo_flags_finish */
915 : };
916 :
917 : class pass_cse_reciprocals : public gimple_opt_pass
918 : {
919 : public:
920 285722 : pass_cse_reciprocals (gcc::context *ctxt)
921 571444 : : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
922 : {}
923 :
924 : /* opt_pass methods: */
925 1041484 : bool gate (function *) final override
926 : {
927 1041484 : return optimize && flag_reciprocal_math;
928 : }
929 : unsigned int execute (function *) final override;
930 :
931 : }; // class pass_cse_reciprocals
932 :
933 : unsigned int
934 8737 : pass_cse_reciprocals::execute (function *fun)
935 : {
936 8737 : basic_block bb;
937 8737 : tree arg;
938 :
939 8737 : occ_pool = new object_allocator<occurrence> ("dominators for recip");
940 :
941 8737 : memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
942 8737 : calculate_dominance_info (CDI_DOMINATORS);
943 8737 : calculate_dominance_info (CDI_POST_DOMINATORS);
944 :
945 8737 : if (flag_checking)
946 94542 : FOR_EACH_BB_FN (bb, fun)
947 85805 : gcc_assert (!bb->aux);
948 :
949 21565 : for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
950 20310 : if (FLOAT_TYPE_P (TREE_TYPE (arg))
951 13915 : && is_gimple_reg (arg))
952 : {
953 6433 : tree name = ssa_default_def (fun, arg);
954 6433 : if (name)
955 5438 : execute_cse_reciprocals_1 (NULL, name);
956 : }
957 :
958 94542 : FOR_EACH_BB_FN (bb, fun)
959 : {
960 85805 : tree def;
961 :
962 194363 : for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
963 108558 : gsi_next (&gsi))
964 : {
965 108558 : gphi *phi = gsi.phi ();
966 108558 : def = PHI_RESULT (phi);
967 108558 : if (! virtual_operand_p (def)
968 108558 : && FLOAT_TYPE_P (TREE_TYPE (def)))
969 30537 : execute_cse_reciprocals_1 (NULL, def);
970 : }
971 :
972 1380088 : for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
973 1294283 : gsi_next (&gsi))
974 : {
975 1294283 : gimple *stmt = gsi_stmt (gsi);
976 :
977 2588566 : if (gimple_has_lhs (stmt)
978 813290 : && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
979 772327 : && FLOAT_TYPE_P (TREE_TYPE (def))
980 195952 : && TREE_CODE (def) == SSA_NAME)
981 : {
982 172878 : execute_cse_reciprocals_1 (&gsi, def);
983 172878 : stmt = gsi_stmt (gsi);
984 172878 : if (flag_unsafe_math_optimizations
985 172849 : && is_gimple_assign (stmt)
986 162746 : && gimple_assign_lhs (stmt) == def
987 162744 : && !stmt_can_throw_internal (cfun, stmt)
988 335578 : && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
989 561 : optimize_recip_sqrt (&gsi, def);
990 : }
991 : }
992 :
993 85805 : if (optimize_bb_for_size_p (bb))
994 5370 : continue;
995 :
996 : /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
997 1352653 : for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
998 1272218 : gsi_next (&gsi))
999 : {
1000 1272218 : gimple *stmt = gsi_stmt (gsi);
1001 :
1002 1272218 : if (is_gimple_assign (stmt)
1003 1272218 : && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
1004 : {
1005 594 : tree arg1 = gimple_assign_rhs2 (stmt);
1006 594 : gimple *stmt1;
1007 :
1008 594 : if (TREE_CODE (arg1) != SSA_NAME)
1009 5 : continue;
1010 :
1011 589 : stmt1 = SSA_NAME_DEF_STMT (arg1);
1012 :
1013 589 : if (is_gimple_call (stmt1)
1014 589 : && gimple_call_lhs (stmt1))
1015 : {
1016 131 : bool fail;
1017 131 : imm_use_iterator ui;
1018 131 : use_operand_p use_p;
1019 131 : tree fndecl = NULL_TREE;
1020 :
1021 131 : gcall *call = as_a <gcall *> (stmt1);
1022 131 : internal_fn ifn = internal_fn_reciprocal (call);
1023 131 : if (ifn == IFN_LAST)
1024 : {
1025 66 : fndecl = gimple_call_fndecl (call);
1026 132 : if (!fndecl
1027 66 : || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1028 68 : continue;
1029 0 : fndecl = targetm.builtin_reciprocal (fndecl);
1030 0 : if (!fndecl)
1031 0 : continue;
1032 : }
1033 :
1034 : /* Check that all uses of the SSA name are divisions,
1035 : otherwise replacing the defining statement will do
1036 : the wrong thing. */
1037 65 : fail = false;
1038 195 : FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1039 : {
1040 67 : gimple *stmt2 = USE_STMT (use_p);
1041 67 : if (is_gimple_debug (stmt2))
1042 0 : continue;
1043 67 : if (!is_gimple_assign (stmt2)
1044 67 : || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1045 65 : || gimple_assign_rhs1 (stmt2) == arg1
1046 132 : || gimple_assign_rhs2 (stmt2) != arg1)
1047 : {
1048 : fail = true;
1049 : break;
1050 : }
1051 65 : }
1052 65 : if (fail)
1053 2 : continue;
1054 :
1055 63 : gimple_replace_ssa_lhs (call, arg1);
1056 63 : reset_flow_sensitive_info (arg1);
1057 63 : if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1058 : {
1059 46 : auto_vec<tree, 4> args;
1060 46 : for (unsigned int i = 0;
1061 92 : i < gimple_call_num_args (call); i++)
1062 46 : args.safe_push (gimple_call_arg (call, i));
1063 46 : gcall *stmt2;
1064 46 : if (ifn == IFN_LAST)
1065 0 : stmt2 = gimple_build_call_vec (fndecl, args);
1066 : else
1067 46 : stmt2 = gimple_build_call_internal_vec (ifn, args);
1068 46 : gimple_call_set_lhs (stmt2, arg1);
1069 46 : gimple_move_vops (stmt2, call);
1070 46 : gimple_call_set_nothrow (stmt2,
1071 46 : gimple_call_nothrow_p (call));
1072 46 : gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1073 46 : gsi_replace (&gsi2, stmt2, true);
1074 46 : }
1075 : else
1076 : {
1077 17 : if (ifn == IFN_LAST)
1078 0 : gimple_call_set_fndecl (call, fndecl);
1079 : else
1080 17 : gimple_call_set_internal_fn (call, ifn);
1081 17 : update_stmt (call);
1082 : }
1083 63 : reciprocal_stats.rfuncs_inserted++;
1084 :
1085 189 : FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1086 : {
1087 63 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1088 63 : gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1089 63 : fold_stmt_inplace (&gsi);
1090 63 : update_stmt (stmt);
1091 63 : }
1092 : }
1093 : }
1094 : }
1095 : }
1096 :
1097 8737 : statistics_counter_event (fun, "reciprocal divs inserted",
1098 : reciprocal_stats.rdivs_inserted);
1099 8737 : statistics_counter_event (fun, "reciprocal functions inserted",
1100 : reciprocal_stats.rfuncs_inserted);
1101 :
1102 8737 : free_dominance_info (CDI_DOMINATORS);
1103 8737 : free_dominance_info (CDI_POST_DOMINATORS);
1104 17474 : delete occ_pool;
1105 8737 : return 0;
1106 : }
1107 :
1108 : } // anon namespace
1109 :
1110 : gimple_opt_pass *
1111 285722 : make_pass_cse_reciprocals (gcc::context *ctxt)
1112 : {
1113 285722 : return new pass_cse_reciprocals (ctxt);
1114 : }
1115 :
1116 : /* If NAME is the result of a type conversion, look for other
1117 : equivalent dominating or dominated conversions, and replace all
1118 : uses with the earliest dominating name, removing the redundant
1119 : conversions. Return the prevailing name. */
1120 :
1121 : static tree
1122 1031 : execute_cse_conv_1 (tree name, bool *cfg_changed)
1123 : {
1124 1031 : if (SSA_NAME_IS_DEFAULT_DEF (name)
1125 1031 : || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1126 : return name;
1127 :
1128 939 : gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1129 :
1130 939 : if (!gimple_assign_cast_p (def_stmt))
1131 : return name;
1132 :
1133 136 : tree src = gimple_assign_rhs1 (def_stmt);
1134 :
1135 136 : if (TREE_CODE (src) != SSA_NAME)
1136 : return name;
1137 :
1138 136 : imm_use_iterator use_iter;
1139 136 : gimple *use_stmt;
1140 :
1141 : /* Find the earliest dominating def. */
1142 657 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1143 : {
1144 763 : if (use_stmt == def_stmt
1145 385 : || !gimple_assign_cast_p (use_stmt))
1146 763 : continue;
1147 :
1148 7 : tree lhs = gimple_assign_lhs (use_stmt);
1149 :
1150 7 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1151 14 : || (gimple_assign_rhs1 (use_stmt)
1152 7 : != gimple_assign_rhs1 (def_stmt))
1153 14 : || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1154 0 : continue;
1155 :
1156 7 : bool use_dominates;
1157 7 : if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1158 : {
1159 0 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1160 0 : while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1161 0 : gsi_next (&gsi);
1162 0 : use_dominates = !gsi_end_p (gsi);
1163 : }
1164 7 : else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1165 7 : gimple_bb (def_stmt)))
1166 : use_dominates = false;
1167 7 : else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1168 7 : gimple_bb (use_stmt)))
1169 : use_dominates = true;
1170 : else
1171 4 : continue;
1172 :
1173 0 : if (use_dominates)
1174 : {
1175 : std::swap (name, lhs);
1176 : std::swap (def_stmt, use_stmt);
1177 : }
1178 136 : }
1179 :
1180 : /* Now go through all uses of SRC again, replacing the equivalent
1181 : dominated conversions. We may replace defs that were not
1182 : dominated by the then-prevailing defs when we first visited
1183 : them. */
1184 657 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1185 : {
1186 763 : if (use_stmt == def_stmt
1187 385 : || !gimple_assign_cast_p (use_stmt))
1188 378 : continue;
1189 :
1190 7 : tree lhs = gimple_assign_lhs (use_stmt);
1191 :
1192 7 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1193 14 : || (gimple_assign_rhs1 (use_stmt)
1194 7 : != gimple_assign_rhs1 (def_stmt))
1195 14 : || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1196 0 : continue;
1197 :
1198 7 : basic_block use_bb = gimple_bb (use_stmt);
1199 7 : if (gimple_bb (def_stmt) == use_bb
1200 7 : || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (def_stmt)))
1201 : {
1202 3 : sincos_stats.conv_removed++;
1203 :
1204 3 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1205 3 : replace_uses_by (lhs, name);
1206 3 : if (gsi_remove (&gsi, true)
1207 3 : && gimple_purge_dead_eh_edges (use_bb))
1208 3 : *cfg_changed = true;
1209 3 : release_defs (use_stmt);
1210 : }
1211 136 : }
1212 :
1213 136 : return name;
1214 : }
1215 :
1216 : /* Records an occurrence at statement USE_STMT in the vector of trees
1217 : STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1218 : is not yet initialized. Returns true if the occurrence was pushed on
1219 : the vector. Adjusts *TOP_BB to be the basic block dominating all
1220 : statements in the vector. */
1221 :
1222 : static bool
1223 1241 : maybe_record_sincos (vec<gimple *> *stmts,
1224 : basic_block *top_bb, gimple *use_stmt)
1225 : {
1226 1241 : basic_block use_bb = gimple_bb (use_stmt);
1227 1241 : if (*top_bb
1228 1241 : && (*top_bb == use_bb
1229 66 : || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1230 151 : stmts->safe_push (use_stmt);
1231 1090 : else if (!*top_bb
1232 1090 : || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1233 : {
1234 1070 : stmts->safe_push (use_stmt);
1235 1070 : *top_bb = use_bb;
1236 : }
1237 : else
1238 : return false;
1239 :
1240 : return true;
1241 : }
1242 :
1243 : /* Look for sin, cos and cexpi calls with the same argument NAME and
1244 : create a single call to cexpi CSEing the result in this case.
1245 : We first walk over all immediate uses of the argument collecting
1246 : statements that we can CSE in a vector and in a second pass replace
1247 : the statement rhs with a REALPART or IMAGPART expression on the
1248 : result of the cexpi call we insert before the use statement that
1249 : dominates all other candidates. */
1250 :
1251 : static bool
1252 1031 : execute_cse_sincos_1 (tree name)
1253 : {
1254 1031 : gimple_stmt_iterator gsi;
1255 1031 : imm_use_iterator use_iter;
1256 1031 : tree fndecl, res, type = NULL_TREE;
1257 1031 : gimple *def_stmt, *use_stmt, *stmt;
1258 1031 : int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1259 1031 : auto_vec<gimple *> stmts;
1260 1031 : basic_block top_bb = NULL;
1261 1031 : int i;
1262 1031 : bool cfg_changed = false;
1263 :
1264 1031 : name = execute_cse_conv_1 (name, &cfg_changed);
1265 :
1266 5000 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1267 : {
1268 2940 : if (gimple_code (use_stmt) != GIMPLE_CALL
1269 2940 : || !gimple_call_lhs (use_stmt))
1270 1670 : continue;
1271 :
1272 1270 : switch (gimple_call_combined_fn (use_stmt))
1273 : {
1274 451 : CASE_CFN_COS:
1275 451 : seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1276 451 : break;
1277 :
1278 784 : CASE_CFN_SIN:
1279 784 : seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1280 784 : break;
1281 :
1282 6 : CASE_CFN_CEXPI:
1283 6 : seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1284 6 : break;
1285 :
1286 29 : default:;
1287 29 : continue;
1288 : }
1289 :
1290 1241 : auto stmt_cfn = gimple_call_combined_fn (use_stmt);
1291 1241 : tree t = mathfn_built_in_type (stmt_cfn);
1292 1241 : if (!t)
1293 : {
1294 : /* It is possible to get IFN_{SIN,COS} calls, for which
1295 : mathfn_built_in_type will return NULL. Those are normally only
1296 : present for vector operations. We won't be able to CSE those
1297 : at the moment. */
1298 2 : gcc_checking_assert (internal_fn_p (stmt_cfn));
1299 : return false;
1300 : }
1301 :
1302 1239 : if (!type)
1303 : {
1304 1029 : type = t;
1305 1029 : t = TREE_TYPE (name);
1306 : }
1307 : /* This checks that NAME has the right type in the first round,
1308 : and, in subsequent rounds, that the built_in type is the same
1309 : type, or a compatible type. */
1310 1239 : if (type != t && !types_compatible_p (type, t))
1311 : return false;
1312 2 : }
1313 1029 : if (seen_cos + seen_sin + seen_cexpi <= 1)
1314 : return false;
1315 :
1316 : /* Simply insert cexpi at the beginning of top_bb but not earlier than
1317 : the name def statement. */
1318 190 : fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1319 190 : if (!fndecl)
1320 : return false;
1321 146 : stmt = gimple_build_call (fndecl, 1, name);
1322 146 : res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1323 146 : gimple_call_set_lhs (stmt, res);
1324 :
1325 146 : def_stmt = SSA_NAME_DEF_STMT (name);
1326 146 : if (!SSA_NAME_IS_DEFAULT_DEF (name)
1327 121 : && gimple_code (def_stmt) != GIMPLE_PHI
1328 259 : && gimple_bb (def_stmt) == top_bb)
1329 : {
1330 113 : gsi = gsi_for_stmt (def_stmt);
1331 113 : gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1332 : }
1333 : else
1334 : {
1335 33 : gsi = gsi_after_labels (top_bb);
1336 33 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1337 : }
1338 146 : sincos_stats.inserted++;
1339 :
1340 : /* And adjust the recorded old call sites. */
1341 438 : for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1342 : {
1343 292 : tree rhs = NULL;
1344 :
1345 292 : switch (gimple_call_combined_fn (use_stmt))
1346 : {
1347 146 : CASE_CFN_COS:
1348 146 : rhs = fold_build1 (REALPART_EXPR, type, res);
1349 146 : break;
1350 :
1351 146 : CASE_CFN_SIN:
1352 146 : rhs = fold_build1 (IMAGPART_EXPR, type, res);
1353 146 : break;
1354 :
1355 : CASE_CFN_CEXPI:
1356 : rhs = res;
1357 : break;
1358 :
1359 0 : default:;
1360 0 : gcc_unreachable ();
1361 : }
1362 :
1363 : /* Replace call with a copy. */
1364 292 : stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1365 :
1366 292 : gsi = gsi_for_stmt (use_stmt);
1367 292 : gsi_replace (&gsi, stmt, true);
1368 292 : if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1369 0 : cfg_changed = true;
1370 : }
1371 :
1372 146 : return cfg_changed;
1373 1031 : }
1374 :
1375 : /* To evaluate powi(x,n), the floating point value x raised to the
1376 : constant integer exponent n, we use a hybrid algorithm that
1377 : combines the "window method" with look-up tables. For an
1378 : introduction to exponentiation algorithms and "addition chains",
1379 : see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1380 : "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1381 : 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1382 : Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1383 :
1384 : /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1385 : multiplications to inline before calling the system library's pow
1386 : function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1387 : so this default never requires calling pow, powf or powl. */
1388 :
1389 : #ifndef POWI_MAX_MULTS
1390 : #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1391 : #endif
1392 :
1393 : /* The size of the "optimal power tree" lookup table. All
1394 : exponents less than this value are simply looked up in the
1395 : powi_table below. This threshold is also used to size the
1396 : cache of pseudo registers that hold intermediate results. */
1397 : #define POWI_TABLE_SIZE 256
1398 :
1399 : /* The size, in bits of the window, used in the "window method"
1400 : exponentiation algorithm. This is equivalent to a radix of
1401 : (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1402 : #define POWI_WINDOW_SIZE 3
1403 :
1404 : /* The following table is an efficient representation of an
1405 : "optimal power tree". For each value, i, the corresponding
1406 : value, j, in the table states than an optimal evaluation
1407 : sequence for calculating pow(x,i) can be found by evaluating
1408 : pow(x,j)*pow(x,i-j). An optimal power tree for the first
1409 : 100 integers is given in Knuth's "Seminumerical algorithms". */
1410 :
1411 : static const unsigned char powi_table[POWI_TABLE_SIZE] =
1412 : {
1413 : 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1414 : 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1415 : 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1416 : 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1417 : 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1418 : 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1419 : 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1420 : 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1421 : 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1422 : 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1423 : 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1424 : 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1425 : 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1426 : 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1427 : 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1428 : 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1429 : 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1430 : 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1431 : 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1432 : 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1433 : 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1434 : 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1435 : 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1436 : 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1437 : 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1438 : 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1439 : 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1440 : 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1441 : 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1442 : 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1443 : 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1444 : 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1445 : };
1446 :
1447 :
1448 : /* Return the number of multiplications required to calculate
1449 : powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1450 : subroutine of powi_cost. CACHE is an array indicating
1451 : which exponents have already been calculated. */
1452 :
1453 : static int
1454 1120 : powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1455 : {
1456 : /* If we've already calculated this exponent, then this evaluation
1457 : doesn't require any additional multiplications. */
1458 1861 : if (cache[n])
1459 1120 : return 0;
1460 :
1461 741 : cache[n] = true;
1462 741 : return powi_lookup_cost (n - powi_table[n], cache)
1463 741 : + powi_lookup_cost (powi_table[n], cache) + 1;
1464 : }
1465 :
1466 : /* Return the number of multiplications required to calculate
1467 : powi(x,n) for an arbitrary x, given the exponent N. This
1468 : function needs to be kept in sync with powi_as_mults below. */
1469 :
1470 : static int
1471 384 : powi_cost (HOST_WIDE_INT n)
1472 : {
1473 384 : bool cache[POWI_TABLE_SIZE];
1474 384 : unsigned HOST_WIDE_INT digit;
1475 384 : unsigned HOST_WIDE_INT val;
1476 384 : int result;
1477 :
1478 384 : if (n == 0)
1479 : return 0;
1480 :
1481 : /* Ignore the reciprocal when calculating the cost. */
1482 379 : val = absu_hwi (n);
1483 :
1484 : /* Initialize the exponent cache. */
1485 379 : memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1486 379 : cache[1] = true;
1487 :
1488 379 : result = 0;
1489 :
1490 379 : while (val >= POWI_TABLE_SIZE)
1491 : {
1492 0 : if (val & 1)
1493 : {
1494 0 : digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1495 0 : result += powi_lookup_cost (digit, cache)
1496 0 : + POWI_WINDOW_SIZE + 1;
1497 0 : val >>= POWI_WINDOW_SIZE;
1498 : }
1499 : else
1500 : {
1501 0 : val >>= 1;
1502 0 : result++;
1503 : }
1504 : }
1505 :
1506 379 : return result + powi_lookup_cost (val, cache);
1507 : }
1508 :
1509 : /* Recursive subroutine of powi_as_mults. This function takes the
1510 : array, CACHE, of already calculated exponents and an exponent N and
1511 : returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1512 :
1513 : static tree
1514 6113 : powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1515 : unsigned HOST_WIDE_INT n, tree *cache)
1516 : {
1517 6113 : tree op0, op1, ssa_target;
1518 6113 : unsigned HOST_WIDE_INT digit;
1519 6113 : gassign *mult_stmt;
1520 :
1521 6113 : if (n < POWI_TABLE_SIZE && cache[n])
1522 : return cache[n];
1523 :
1524 2166 : ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1525 :
1526 2166 : if (n < POWI_TABLE_SIZE)
1527 : {
1528 2163 : cache[n] = ssa_target;
1529 2163 : op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1530 2163 : op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1531 : }
1532 3 : else if (n & 1)
1533 : {
1534 1 : digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1535 1 : op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1536 1 : op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1537 : }
1538 : else
1539 : {
1540 2 : op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1541 2 : op1 = op0;
1542 : }
1543 :
1544 2166 : mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1545 2166 : gimple_set_location (mult_stmt, loc);
1546 2166 : gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1547 :
1548 2166 : return ssa_target;
1549 : }
1550 :
1551 : /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1552 : This function needs to be kept in sync with powi_cost above. */
1553 :
1554 : tree
1555 1783 : powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1556 : tree arg0, HOST_WIDE_INT n)
1557 : {
1558 1783 : tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1559 1783 : gassign *div_stmt;
1560 1783 : tree target;
1561 :
1562 1783 : if (n == 0)
1563 0 : return build_one_cst (type);
1564 :
1565 1783 : memset (cache, 0, sizeof (cache));
1566 1783 : cache[1] = arg0;
1567 :
1568 1783 : result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache);
1569 1783 : if (n >= 0)
1570 : return result;
1571 :
1572 : /* If the original exponent was negative, reciprocate the result. */
1573 8 : target = make_temp_ssa_name (type, NULL, "powmult");
1574 8 : div_stmt = gimple_build_assign (target, RDIV_EXPR,
1575 : build_real (type, dconst1), result);
1576 8 : gimple_set_location (div_stmt, loc);
1577 8 : gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1578 :
1579 8 : return target;
1580 : }
1581 :
1582 : /* ARG0 and N are the two arguments to a powi builtin in GSI with
1583 : location info LOC. If the arguments are appropriate, create an
1584 : equivalent sequence of statements prior to GSI using an optimal
1585 : number of multiplications, and return an expession holding the
1586 : result. */
1587 :
1588 : static tree
1589 630 : gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1590 : tree arg0, HOST_WIDE_INT n)
1591 : {
1592 630 : if ((n >= -1 && n <= 2)
1593 630 : || (optimize_function_for_speed_p (cfun)
1594 351 : && powi_cost (n) <= POWI_MAX_MULTS))
1595 622 : return powi_as_mults (gsi, loc, arg0, n);
1596 :
1597 : return NULL_TREE;
1598 : }
1599 :
1600 : /* Build a gimple call statement that calls FN with argument ARG.
1601 : Set the lhs of the call statement to a fresh SSA name. Insert the
1602 : statement prior to GSI's current position, and return the fresh
1603 : SSA name. */
1604 :
1605 : static tree
1606 44 : build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1607 : tree fn, tree arg)
1608 : {
1609 44 : gcall *call_stmt;
1610 44 : tree ssa_target;
1611 :
1612 44 : call_stmt = gimple_build_call (fn, 1, arg);
1613 44 : ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1614 44 : gimple_set_lhs (call_stmt, ssa_target);
1615 44 : gimple_set_location (call_stmt, loc);
1616 44 : gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1617 :
1618 44 : return ssa_target;
1619 : }
1620 :
1621 : /* Build a gimple binary operation with the given CODE and arguments
1622 : ARG0, ARG1, assigning the result to a new SSA name for variable
1623 : TARGET. Insert the statement prior to GSI's current position, and
1624 : return the fresh SSA name.*/
1625 :
1626 : static tree
1627 905 : build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1628 : const char *name, enum tree_code code,
1629 : tree arg0, tree arg1)
1630 : {
1631 905 : tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1632 905 : gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1633 905 : gimple_set_location (stmt, loc);
1634 905 : gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1635 905 : return result;
1636 : }
1637 :
1638 : /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1639 : prior to GSI's current position, and return the fresh SSA name. */
1640 :
1641 : static tree
1642 16911 : build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1643 : tree type, tree val)
1644 : {
1645 0 : return gimple_convert (gsi, true, GSI_SAME_STMT, loc, type, val);
1646 : }
1647 :
1648 : struct pow_synth_sqrt_info
1649 : {
1650 : bool *factors;
1651 : unsigned int deepest;
1652 : unsigned int num_mults;
1653 : };
1654 :
1655 : /* Return true iff the real value C can be represented as a
1656 : sum of powers of 0.5 up to N. That is:
1657 : C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1658 : Record in INFO the various parameters of the synthesis algorithm such
1659 : as the factors a[i], the maximum 0.5 power and the number of
1660 : multiplications that will be required. */
1661 :
1662 : bool
1663 33 : representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1664 : struct pow_synth_sqrt_info *info)
1665 : {
1666 33 : REAL_VALUE_TYPE factor = dconsthalf;
1667 33 : REAL_VALUE_TYPE remainder = c;
1668 :
1669 33 : info->deepest = 0;
1670 33 : info->num_mults = 0;
1671 33 : memset (info->factors, 0, n * sizeof (bool));
1672 :
1673 97 : for (unsigned i = 0; i < n; i++)
1674 : {
1675 90 : REAL_VALUE_TYPE res;
1676 :
1677 : /* If something inexact happened bail out now. */
1678 90 : if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1679 26 : return false;
1680 :
1681 : /* We have hit zero. The number is representable as a sum
1682 : of powers of 0.5. */
1683 90 : if (real_equal (&res, &dconst0))
1684 : {
1685 26 : info->factors[i] = true;
1686 26 : info->deepest = i + 1;
1687 26 : return true;
1688 : }
1689 64 : else if (!REAL_VALUE_NEGATIVE (res))
1690 : {
1691 29 : remainder = res;
1692 29 : info->factors[i] = true;
1693 29 : info->num_mults++;
1694 : }
1695 : else
1696 35 : info->factors[i] = false;
1697 :
1698 64 : real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1699 : }
1700 : return false;
1701 : }
1702 :
1703 : /* Return the tree corresponding to FN being applied
1704 : to ARG N times at GSI and LOC.
1705 : Look up previous results from CACHE if need be.
1706 : cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1707 :
1708 : static tree
1709 63 : get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1710 : tree fn, location_t loc, tree *cache)
1711 : {
1712 63 : tree res = cache[n];
1713 63 : if (!res)
1714 : {
1715 40 : tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1716 40 : res = build_and_insert_call (gsi, loc, fn, prev);
1717 40 : cache[n] = res;
1718 : }
1719 :
1720 63 : return res;
1721 : }
1722 :
1723 : /* Print to STREAM the repeated application of function FNAME to ARG
1724 : N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1725 : "foo (foo (x))". */
1726 :
1727 : static void
1728 36 : print_nested_fn (FILE* stream, const char *fname, const char* arg,
1729 : unsigned int n)
1730 : {
1731 36 : if (n == 0)
1732 10 : fprintf (stream, "%s", arg);
1733 : else
1734 : {
1735 26 : fprintf (stream, "%s (", fname);
1736 26 : print_nested_fn (stream, fname, arg, n - 1);
1737 26 : fprintf (stream, ")");
1738 : }
1739 36 : }
1740 :
1741 : /* Print to STREAM the fractional sequence of sqrt chains
1742 : applied to ARG, described by INFO. Used for the dump file. */
1743 :
1744 : static void
1745 7 : dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1746 : struct pow_synth_sqrt_info *info)
1747 : {
1748 29 : for (unsigned int i = 0; i < info->deepest; i++)
1749 : {
1750 22 : bool is_set = info->factors[i];
1751 22 : if (is_set)
1752 : {
1753 10 : print_nested_fn (stream, "sqrt", arg, i + 1);
1754 10 : if (i != info->deepest - 1)
1755 3 : fprintf (stream, " * ");
1756 : }
1757 : }
1758 7 : }
1759 :
1760 : /* Print to STREAM a representation of raising ARG to an integer
1761 : power N. Used for the dump file. */
1762 :
1763 : static void
1764 7 : dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1765 : {
1766 7 : if (n > 1)
1767 3 : fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1768 4 : else if (n == 1)
1769 3 : fprintf (stream, "%s", arg);
1770 7 : }
1771 :
1772 : /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1773 : square roots. Place at GSI and LOC. Limit the maximum depth
1774 : of the sqrt chains to MAX_DEPTH. Return the tree holding the
1775 : result of the expanded sequence or NULL_TREE if the expansion failed.
1776 :
1777 : This routine assumes that ARG1 is a real number with a fractional part
1778 : (the integer exponent case will have been handled earlier in
1779 : gimple_expand_builtin_pow).
1780 :
1781 : For ARG1 > 0.0:
1782 : * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1783 : FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1784 : FRAC_PART == ARG1 - WHOLE_PART:
1785 : Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1786 : POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1787 : if it can be expressed as such, that is if FRAC_PART satisfies:
1788 : FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1789 : where integer a[i] is either 0 or 1.
1790 :
1791 : Example:
1792 : POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1793 : --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1794 :
1795 : For ARG1 < 0.0 there are two approaches:
1796 : * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1797 : is calculated as above.
1798 :
1799 : Example:
1800 : POW (x, -5.625) == 1.0 / POW (x, 5.625)
1801 : --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1802 :
1803 : * (B) : WHOLE_PART := - ceil (abs (ARG1))
1804 : FRAC_PART := ARG1 - WHOLE_PART
1805 : and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1806 : Example:
1807 : POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1808 : --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1809 :
1810 : For ARG1 < 0.0 we choose between (A) and (B) depending on
1811 : how many multiplications we'd have to do.
1812 : So, for the example in (B): POW (x, -5.875), if we were to
1813 : follow algorithm (A) we would produce:
1814 : 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1815 : which contains more multiplications than approach (B).
1816 :
1817 : Hopefully, this approach will eliminate potentially expensive POW library
1818 : calls when unsafe floating point math is enabled and allow the compiler to
1819 : further optimise the multiplies, square roots and divides produced by this
1820 : function. */
1821 :
1822 : static tree
1823 25 : expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1824 : tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1825 : {
1826 25 : tree type = TREE_TYPE (arg0);
1827 25 : machine_mode mode = TYPE_MODE (type);
1828 25 : tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1829 25 : bool one_over = true;
1830 :
1831 25 : if (!sqrtfn)
1832 : return NULL_TREE;
1833 :
1834 25 : if (TREE_CODE (arg1) != REAL_CST)
1835 : return NULL_TREE;
1836 :
1837 25 : REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1838 :
1839 25 : gcc_assert (max_depth > 0);
1840 25 : tree *cache = XALLOCAVEC (tree, max_depth + 1);
1841 :
1842 25 : struct pow_synth_sqrt_info synth_info;
1843 25 : synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1844 25 : synth_info.deepest = 0;
1845 25 : synth_info.num_mults = 0;
1846 :
1847 25 : bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1848 25 : REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1849 :
1850 : /* The whole and fractional parts of exp. */
1851 25 : REAL_VALUE_TYPE whole_part;
1852 25 : REAL_VALUE_TYPE frac_part;
1853 :
1854 25 : real_floor (&whole_part, mode, &exp);
1855 25 : real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1856 :
1857 :
1858 25 : REAL_VALUE_TYPE ceil_whole = dconst0;
1859 25 : REAL_VALUE_TYPE ceil_fract = dconst0;
1860 :
1861 25 : if (neg_exp)
1862 : {
1863 10 : real_ceil (&ceil_whole, mode, &exp);
1864 10 : real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1865 : }
1866 :
1867 25 : if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1868 : return NULL_TREE;
1869 :
1870 : /* Check whether it's more profitable to not use 1.0 / ... */
1871 18 : if (neg_exp)
1872 : {
1873 8 : struct pow_synth_sqrt_info alt_synth_info;
1874 8 : alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1875 8 : alt_synth_info.deepest = 0;
1876 8 : alt_synth_info.num_mults = 0;
1877 :
1878 8 : if (representable_as_half_series_p (ceil_fract, max_depth,
1879 : &alt_synth_info)
1880 8 : && alt_synth_info.deepest <= synth_info.deepest
1881 16 : && alt_synth_info.num_mults < synth_info.num_mults)
1882 : {
1883 2 : whole_part = ceil_whole;
1884 2 : frac_part = ceil_fract;
1885 2 : synth_info.deepest = alt_synth_info.deepest;
1886 2 : synth_info.num_mults = alt_synth_info.num_mults;
1887 2 : memcpy (synth_info.factors, alt_synth_info.factors,
1888 : (max_depth + 1) * sizeof (bool));
1889 2 : one_over = false;
1890 : }
1891 : }
1892 :
1893 18 : HOST_WIDE_INT n = real_to_integer (&whole_part);
1894 18 : REAL_VALUE_TYPE cint;
1895 18 : real_from_integer (&cint, VOIDmode, n, SIGNED);
1896 :
1897 18 : if (!real_identical (&whole_part, &cint))
1898 : return NULL_TREE;
1899 :
1900 18 : if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1901 : return NULL_TREE;
1902 :
1903 18 : memset (cache, 0, (max_depth + 1) * sizeof (tree));
1904 :
1905 18 : tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1906 :
1907 : /* Calculate the integer part of the exponent. */
1908 18 : if (n > 1)
1909 : {
1910 6 : integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1911 6 : if (!integer_res)
1912 : return NULL_TREE;
1913 : }
1914 :
1915 18 : if (dump_file)
1916 : {
1917 7 : char string[64];
1918 :
1919 7 : real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1920 7 : fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1921 :
1922 7 : if (neg_exp)
1923 : {
1924 2 : if (one_over)
1925 : {
1926 1 : fprintf (dump_file, "1.0 / (");
1927 1 : dump_integer_part (dump_file, "x", n);
1928 1 : if (n > 0)
1929 1 : fprintf (dump_file, " * ");
1930 1 : dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1931 1 : fprintf (dump_file, ")");
1932 : }
1933 : else
1934 : {
1935 1 : dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1936 1 : fprintf (dump_file, " / (");
1937 1 : dump_integer_part (dump_file, "x", n);
1938 1 : fprintf (dump_file, ")");
1939 : }
1940 : }
1941 : else
1942 : {
1943 5 : dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1944 5 : if (n > 0)
1945 4 : fprintf (dump_file, " * ");
1946 5 : dump_integer_part (dump_file, "x", n);
1947 : }
1948 :
1949 7 : fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1950 : }
1951 :
1952 :
1953 18 : tree fract_res = NULL_TREE;
1954 18 : cache[0] = arg0;
1955 :
1956 : /* Calculate the fractional part of the exponent. */
1957 58 : for (unsigned i = 0; i < synth_info.deepest; i++)
1958 : {
1959 40 : if (synth_info.factors[i])
1960 : {
1961 23 : tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1962 :
1963 23 : if (!fract_res)
1964 : fract_res = sqrt_chain;
1965 :
1966 : else
1967 5 : fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1968 : fract_res, sqrt_chain);
1969 : }
1970 : }
1971 :
1972 18 : tree res = NULL_TREE;
1973 :
1974 18 : if (neg_exp)
1975 : {
1976 8 : if (one_over)
1977 : {
1978 6 : if (n > 0)
1979 4 : res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1980 : fract_res, integer_res);
1981 : else
1982 : res = fract_res;
1983 :
1984 6 : res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1985 : build_real (type, dconst1), res);
1986 : }
1987 : else
1988 : {
1989 2 : res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1990 : fract_res, integer_res);
1991 : }
1992 : }
1993 : else
1994 10 : res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1995 : fract_res, integer_res);
1996 : return res;
1997 : }
1998 :
1999 : /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2000 : with location info LOC. If possible, create an equivalent and
2001 : less expensive sequence of statements prior to GSI, and return an
2002 : expession holding the result. */
2003 :
2004 : static tree
2005 600 : gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2006 : tree arg0, tree arg1)
2007 : {
2008 600 : REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2009 600 : REAL_VALUE_TYPE c2, dconst3;
2010 600 : HOST_WIDE_INT n;
2011 600 : tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2012 600 : machine_mode mode;
2013 600 : bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2014 600 : bool hw_sqrt_exists, c_is_int, c2_is_int;
2015 :
2016 600 : dconst1_4 = dconst1;
2017 600 : SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2018 :
2019 : /* If the exponent isn't a constant, there's nothing of interest
2020 : to be done. */
2021 600 : if (TREE_CODE (arg1) != REAL_CST)
2022 : return NULL_TREE;
2023 :
2024 : /* Don't perform the operation if flag_signaling_nans is on
2025 : and the operand is a signaling NaN. */
2026 362 : if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2027 362 : && ((TREE_CODE (arg0) == REAL_CST
2028 0 : && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2029 1 : || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2030 0 : return NULL_TREE;
2031 :
2032 362 : if (flag_errno_math)
2033 : return NULL_TREE;
2034 :
2035 : /* If the exponent is equivalent to an integer, expand to an optimal
2036 : multiplication sequence when profitable. */
2037 75 : c = TREE_REAL_CST (arg1);
2038 75 : n = real_to_integer (&c);
2039 75 : real_from_integer (&cint, VOIDmode, n, SIGNED);
2040 75 : c_is_int = real_identical (&c, &cint);
2041 :
2042 75 : if (c_is_int
2043 75 : && ((n >= -1 && n <= 2)
2044 21 : || (flag_unsafe_math_optimizations
2045 11 : && speed_p
2046 11 : && powi_cost (n) <= POWI_MAX_MULTS)))
2047 30 : return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2048 :
2049 : /* Attempt various optimizations using sqrt and cbrt. */
2050 45 : type = TREE_TYPE (arg0);
2051 45 : mode = TYPE_MODE (type);
2052 45 : sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2053 :
2054 : /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2055 : unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2056 : sqrt(-0) = -0. */
2057 45 : if (sqrtfn
2058 45 : && real_equal (&c, &dconsthalf)
2059 52 : && !HONOR_SIGNED_ZEROS (mode))
2060 0 : return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2061 :
2062 45 : hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2063 :
2064 : /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2065 : optimizations since 1./3. is not exactly representable. If x
2066 : is negative and finite, the correct value of pow(x,1./3.) is
2067 : a NaN with the "invalid" exception raised, because the value
2068 : of 1./3. actually has an even denominator. The correct value
2069 : of cbrt(x) is a negative real value. */
2070 45 : cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2071 45 : dconst1_3 = real_value_truncate (mode, dconst_third ());
2072 :
2073 45 : if (flag_unsafe_math_optimizations
2074 25 : && cbrtfn
2075 25 : && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2076 70 : && real_equal (&c, &dconst1_3))
2077 0 : return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2078 :
2079 : /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2080 : if we don't have a hardware sqrt insn. */
2081 45 : dconst1_6 = dconst1_3;
2082 45 : SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2083 :
2084 45 : if (flag_unsafe_math_optimizations
2085 25 : && sqrtfn
2086 25 : && cbrtfn
2087 25 : && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2088 : && speed_p
2089 25 : && hw_sqrt_exists
2090 70 : && real_equal (&c, &dconst1_6))
2091 : {
2092 : /* sqrt(x) */
2093 0 : sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2094 :
2095 : /* cbrt(sqrt(x)) */
2096 0 : return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2097 : }
2098 :
2099 :
2100 : /* Attempt to expand the POW as a product of square root chains.
2101 : Expand the 0.25 case even when otpimising for size. */
2102 45 : if (flag_unsafe_math_optimizations
2103 25 : && sqrtfn
2104 25 : && hw_sqrt_exists
2105 25 : && (speed_p || real_equal (&c, &dconst1_4))
2106 70 : && !HONOR_SIGNED_ZEROS (mode))
2107 : {
2108 50 : unsigned int max_depth = speed_p
2109 25 : ? param_max_pow_sqrt_depth
2110 : : 2;
2111 :
2112 25 : tree expand_with_sqrts
2113 25 : = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2114 :
2115 25 : if (expand_with_sqrts)
2116 : return expand_with_sqrts;
2117 : }
2118 :
2119 27 : real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2120 27 : n = real_to_integer (&c2);
2121 27 : real_from_integer (&cint, VOIDmode, n, SIGNED);
2122 27 : c2_is_int = real_identical (&c2, &cint);
2123 :
2124 : /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2125 :
2126 : powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2127 : 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2128 :
2129 : Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2130 : different from pow(x, 1./3.) due to rounding and behavior with
2131 : negative x, we need to constrain this transformation to unsafe
2132 : math and positive x or finite math. */
2133 27 : real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2134 27 : real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2135 27 : real_round (&c2, mode, &c2);
2136 27 : n = real_to_integer (&c2);
2137 27 : real_from_integer (&cint, VOIDmode, n, SIGNED);
2138 27 : real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2139 27 : real_convert (&c2, mode, &c2);
2140 :
2141 27 : if (flag_unsafe_math_optimizations
2142 7 : && cbrtfn
2143 7 : && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2144 7 : && real_identical (&c2, &c)
2145 4 : && !c2_is_int
2146 4 : && optimize_function_for_speed_p (cfun)
2147 31 : && powi_cost (n / 3) <= POWI_MAX_MULTS)
2148 : {
2149 4 : tree powi_x_ndiv3 = NULL_TREE;
2150 :
2151 : /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2152 : possible or profitable, give up. Skip the degenerate case when
2153 : abs(n) < 3, where the result is always 1. */
2154 4 : if (absu_hwi (n) >= 3)
2155 : {
2156 4 : powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2157 : abs_hwi (n / 3));
2158 4 : if (!powi_x_ndiv3)
2159 : return NULL_TREE;
2160 : }
2161 :
2162 : /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2163 : as that creates an unnecessary variable. Instead, just produce
2164 : either cbrt(x) or cbrt(x) * cbrt(x). */
2165 4 : cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2166 :
2167 4 : if (absu_hwi (n) % 3 == 1)
2168 : powi_cbrt_x = cbrt_x;
2169 : else
2170 2 : powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2171 : cbrt_x, cbrt_x);
2172 :
2173 : /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2174 4 : if (absu_hwi (n) < 3)
2175 : result = powi_cbrt_x;
2176 : else
2177 4 : result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2178 : powi_x_ndiv3, powi_cbrt_x);
2179 :
2180 : /* If n is negative, reciprocate the result. */
2181 4 : if (n < 0)
2182 1 : result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2183 : build_real (type, dconst1), result);
2184 :
2185 4 : return result;
2186 : }
2187 :
2188 : /* No optimizations succeeded. */
2189 : return NULL_TREE;
2190 : }
2191 :
2192 : /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2193 : on the SSA_NAME argument of each of them. */
2194 :
2195 : namespace {
2196 :
2197 : const pass_data pass_data_cse_sincos =
2198 : {
2199 : GIMPLE_PASS, /* type */
2200 : "sincos", /* name */
2201 : OPTGROUP_NONE, /* optinfo_flags */
2202 : TV_TREE_SINCOS, /* tv_id */
2203 : PROP_ssa, /* properties_required */
2204 : 0, /* properties_provided */
2205 : 0, /* properties_destroyed */
2206 : 0, /* todo_flags_start */
2207 : TODO_update_ssa, /* todo_flags_finish */
2208 : };
2209 :
2210 : class pass_cse_sincos : public gimple_opt_pass
2211 : {
2212 : public:
2213 285722 : pass_cse_sincos (gcc::context *ctxt)
2214 571444 : : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2215 : {}
2216 :
2217 : /* opt_pass methods: */
2218 1041484 : bool gate (function *) final override
2219 : {
2220 1041484 : return optimize;
2221 : }
2222 :
2223 : unsigned int execute (function *) final override;
2224 :
2225 : }; // class pass_cse_sincos
2226 :
2227 : unsigned int
2228 1041459 : pass_cse_sincos::execute (function *fun)
2229 : {
2230 1041459 : basic_block bb;
2231 1041459 : bool cfg_changed = false;
2232 :
2233 1041459 : calculate_dominance_info (CDI_DOMINATORS);
2234 1041459 : memset (&sincos_stats, 0, sizeof (sincos_stats));
2235 :
2236 10992581 : FOR_EACH_BB_FN (bb, fun)
2237 : {
2238 9951122 : gimple_stmt_iterator gsi;
2239 :
2240 93578676 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2241 : {
2242 83627554 : gimple *stmt = gsi_stmt (gsi);
2243 :
2244 83627554 : if (is_gimple_call (stmt)
2245 83627554 : && gimple_call_lhs (stmt))
2246 : {
2247 2024783 : tree arg;
2248 2024783 : switch (gimple_call_combined_fn (stmt))
2249 : {
2250 1031 : CASE_CFN_COS:
2251 1031 : CASE_CFN_SIN:
2252 1031 : CASE_CFN_CEXPI:
2253 1031 : arg = gimple_call_arg (stmt, 0);
2254 : /* Make sure we have either sincos or cexp. */
2255 1031 : if (!targetm.libc_has_function (function_c99_math_complex,
2256 1031 : TREE_TYPE (arg))
2257 1031 : && !targetm.libc_has_function (function_sincos,
2258 0 : TREE_TYPE (arg)))
2259 : break;
2260 :
2261 1031 : if (TREE_CODE (arg) == SSA_NAME)
2262 1031 : cfg_changed |= execute_cse_sincos_1 (arg);
2263 : break;
2264 : default:
2265 : break;
2266 : }
2267 : }
2268 : }
2269 : }
2270 :
2271 1041459 : statistics_counter_event (fun, "sincos statements inserted",
2272 : sincos_stats.inserted);
2273 1041459 : statistics_counter_event (fun, "conv statements removed",
2274 : sincos_stats.conv_removed);
2275 :
2276 1041459 : return cfg_changed ? TODO_cleanup_cfg : 0;
2277 : }
2278 :
2279 : } // anon namespace
2280 :
2281 : gimple_opt_pass *
2282 285722 : make_pass_cse_sincos (gcc::context *ctxt)
2283 : {
2284 285722 : return new pass_cse_sincos (ctxt);
2285 : }
2286 :
2287 : /* Expand powi(x,n) into an optimal number of multiplies, when n is a
2288 : constant. */
2289 : namespace {
2290 :
2291 : const pass_data pass_data_expand_pow =
2292 : {
2293 : GIMPLE_PASS, /* type */
2294 : "pow", /* name */
2295 : OPTGROUP_NONE, /* optinfo_flags */
2296 : TV_TREE_POW, /* tv_id */
2297 : PROP_ssa, /* properties_required */
2298 : PROP_gimple_opt_math, /* properties_provided */
2299 : 0, /* properties_destroyed */
2300 : 0, /* todo_flags_start */
2301 : TODO_update_ssa, /* todo_flags_finish */
2302 : };
2303 :
2304 : class pass_expand_pow : public gimple_opt_pass
2305 : {
2306 : public:
2307 285722 : pass_expand_pow (gcc::context *ctxt)
2308 571444 : : gimple_opt_pass (pass_data_expand_pow, ctxt)
2309 : {}
2310 :
2311 : /* opt_pass methods: */
2312 1041484 : bool gate (function *) final override
2313 : {
2314 1041484 : return optimize;
2315 : }
2316 :
2317 : unsigned int execute (function *) final override;
2318 :
2319 : }; // class pass_expand_pow
2320 :
2321 : unsigned int
2322 1041479 : pass_expand_pow::execute (function *fun)
2323 : {
2324 1041479 : basic_block bb;
2325 1041479 : bool cfg_changed = false;
2326 :
2327 1041479 : calculate_dominance_info (CDI_DOMINATORS);
2328 :
2329 10457839 : FOR_EACH_BB_FN (bb, fun)
2330 : {
2331 9416360 : gimple_stmt_iterator gsi;
2332 9416360 : bool cleanup_eh = false;
2333 :
2334 90968345 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2335 : {
2336 81551985 : gimple *stmt = gsi_stmt (gsi);
2337 :
2338 : /* Only the last stmt in a bb could throw, no need to call
2339 : gimple_purge_dead_eh_edges if we change something in the middle
2340 : of a basic block. */
2341 81551985 : cleanup_eh = false;
2342 :
2343 81551985 : if (is_gimple_call (stmt)
2344 81551985 : && gimple_call_lhs (stmt))
2345 : {
2346 1997672 : tree arg0, arg1, result;
2347 1997672 : HOST_WIDE_INT n;
2348 1997672 : location_t loc;
2349 :
2350 1997672 : switch (gimple_call_combined_fn (stmt))
2351 : {
2352 600 : CASE_CFN_POW:
2353 600 : arg0 = gimple_call_arg (stmt, 0);
2354 600 : arg1 = gimple_call_arg (stmt, 1);
2355 :
2356 600 : loc = gimple_location (stmt);
2357 600 : result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2358 :
2359 600 : if (result)
2360 : {
2361 52 : tree lhs = gimple_get_lhs (stmt);
2362 52 : gassign *new_stmt = gimple_build_assign (lhs, result);
2363 52 : gimple_set_location (new_stmt, loc);
2364 52 : unlink_stmt_vdef (stmt);
2365 52 : gsi_replace (&gsi, new_stmt, true);
2366 52 : cleanup_eh = true;
2367 104 : if (gimple_vdef (stmt))
2368 0 : release_ssa_name (gimple_vdef (stmt));
2369 : }
2370 : break;
2371 :
2372 812 : CASE_CFN_POWI:
2373 812 : arg0 = gimple_call_arg (stmt, 0);
2374 812 : arg1 = gimple_call_arg (stmt, 1);
2375 812 : loc = gimple_location (stmt);
2376 :
2377 812 : if (real_minus_onep (arg0))
2378 : {
2379 11 : tree t0, t1, cond, one, minus_one;
2380 11 : gassign *stmt;
2381 :
2382 11 : t0 = TREE_TYPE (arg0);
2383 11 : t1 = TREE_TYPE (arg1);
2384 11 : one = build_real (t0, dconst1);
2385 11 : minus_one = build_real (t0, dconstm1);
2386 :
2387 11 : cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2388 11 : stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2389 : arg1, build_int_cst (t1, 1));
2390 11 : gimple_set_location (stmt, loc);
2391 11 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2392 :
2393 11 : result = make_temp_ssa_name (t0, NULL, "powi");
2394 11 : stmt = gimple_build_assign (result, COND_EXPR, cond,
2395 : minus_one, one);
2396 11 : gimple_set_location (stmt, loc);
2397 11 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2398 : }
2399 : else
2400 : {
2401 801 : if (!tree_fits_shwi_p (arg1))
2402 : break;
2403 :
2404 590 : n = tree_to_shwi (arg1);
2405 590 : result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2406 : }
2407 :
2408 601 : if (result)
2409 : {
2410 593 : tree lhs = gimple_get_lhs (stmt);
2411 593 : gassign *new_stmt = gimple_build_assign (lhs, result);
2412 593 : gimple_set_location (new_stmt, loc);
2413 593 : unlink_stmt_vdef (stmt);
2414 593 : gsi_replace (&gsi, new_stmt, true);
2415 593 : cleanup_eh = true;
2416 81553171 : if (gimple_vdef (stmt))
2417 0 : release_ssa_name (gimple_vdef (stmt));
2418 : }
2419 : break;
2420 :
2421 211 : default:;
2422 : }
2423 : }
2424 : }
2425 9416360 : if (cleanup_eh)
2426 1 : cfg_changed |= gimple_purge_dead_eh_edges (bb);
2427 : }
2428 :
2429 1041479 : return cfg_changed ? TODO_cleanup_cfg : 0;
2430 : }
2431 :
2432 : } // anon namespace
2433 :
2434 : gimple_opt_pass *
2435 285722 : make_pass_expand_pow (gcc::context *ctxt)
2436 : {
2437 285722 : return new pass_expand_pow (ctxt);
2438 : }
2439 :
2440 : /* Return true if stmt is a type conversion operation that can be stripped
2441 : when used in a widening multiply operation. */
2442 : static bool
2443 466879 : widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2444 : {
2445 466879 : enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2446 :
2447 466879 : if (TREE_CODE (result_type) == INTEGER_TYPE)
2448 : {
2449 466879 : tree op_type;
2450 466879 : tree inner_op_type;
2451 :
2452 466879 : if (!CONVERT_EXPR_CODE_P (rhs_code))
2453 : return false;
2454 :
2455 184462 : op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2456 :
2457 : /* If the type of OP has the same precision as the result, then
2458 : we can strip this conversion. The multiply operation will be
2459 : selected to create the correct extension as a by-product. */
2460 184462 : if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2461 : return true;
2462 :
2463 : /* We can also strip a conversion if it preserves the signed-ness of
2464 : the operation and doesn't narrow the range. */
2465 1162 : inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2466 :
2467 : /* If the inner-most type is unsigned, then we can strip any
2468 : intermediate widening operation. If it's signed, then the
2469 : intermediate widening operation must also be signed. */
2470 1162 : if ((TYPE_UNSIGNED (inner_op_type)
2471 1161 : || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2472 2323 : && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2473 : return true;
2474 :
2475 1162 : return false;
2476 : }
2477 :
2478 0 : return rhs_code == FIXED_CONVERT_EXPR;
2479 : }
2480 :
2481 : /* Return true if RHS is a suitable operand for a widening multiplication,
2482 : assuming a target type of TYPE.
2483 : There are two cases:
2484 :
2485 : - RHS makes some value at least twice as wide. Store that value
2486 : in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2487 :
2488 : - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2489 : but leave *TYPE_OUT untouched. */
2490 :
2491 : static bool
2492 915185 : is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2493 : tree *new_rhs_out)
2494 : {
2495 915185 : gimple *stmt;
2496 915185 : tree type1, rhs1;
2497 :
2498 915185 : if (TREE_CODE (rhs) == SSA_NAME)
2499 : {
2500 : /* Use tree_non_zero_bits to see if this operand is zero_extended
2501 : for unsigned widening multiplications or non-negative for
2502 : signed widening multiplications. */
2503 759732 : if (TREE_CODE (type) == INTEGER_TYPE
2504 759732 : && (TYPE_PRECISION (type) & 1) == 0
2505 1519464 : && int_mode_for_size (TYPE_PRECISION (type) / 2, 1).exists ())
2506 : {
2507 753800 : unsigned int prec = TYPE_PRECISION (type);
2508 753800 : unsigned int hprec = prec / 2;
2509 753800 : wide_int bits = wide_int::from (tree_nonzero_bits (rhs), prec,
2510 1507600 : TYPE_SIGN (TREE_TYPE (rhs)));
2511 753800 : if (TYPE_UNSIGNED (type)
2512 1311166 : && wi::bit_and (bits, wi::mask (hprec, true, prec)) == 0)
2513 : {
2514 147011 : *type_out = build_nonstandard_integer_type (hprec, true);
2515 : /* X & MODE_MASK can be simplified to (T)X. */
2516 147011 : stmt = SSA_NAME_DEF_STMT (rhs);
2517 294022 : if (is_gimple_assign (stmt)
2518 128097 : && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
2519 16913 : && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST
2520 180237 : && wide_int::from (wi::to_wide (gimple_assign_rhs2 (stmt)),
2521 16613 : prec, TYPE_SIGN (TREE_TYPE (rhs)))
2522 196850 : == wi::mask (hprec, false, prec))
2523 14837 : *new_rhs_out = gimple_assign_rhs1 (stmt);
2524 : else
2525 132174 : *new_rhs_out = rhs;
2526 147011 : return true;
2527 : }
2528 606789 : else if (!TYPE_UNSIGNED (type)
2529 803223 : && wi::bit_and (bits, wi::mask (hprec - 1, true, prec)) == 0)
2530 : {
2531 24141 : *type_out = build_nonstandard_integer_type (hprec, false);
2532 24141 : *new_rhs_out = rhs;
2533 24141 : return true;
2534 : }
2535 753800 : }
2536 :
2537 588580 : stmt = SSA_NAME_DEF_STMT (rhs);
2538 588580 : if (is_gimple_assign (stmt))
2539 : {
2540 :
2541 466879 : if (widening_mult_conversion_strippable_p (type, stmt))
2542 : {
2543 183300 : rhs1 = gimple_assign_rhs1 (stmt);
2544 :
2545 183300 : if (TREE_CODE (rhs1) == INTEGER_CST)
2546 : {
2547 0 : *new_rhs_out = rhs1;
2548 0 : *type_out = NULL;
2549 0 : return true;
2550 : }
2551 : }
2552 : else
2553 : rhs1 = rhs;
2554 : }
2555 : else
2556 : rhs1 = rhs;
2557 :
2558 588580 : type1 = TREE_TYPE (rhs1);
2559 :
2560 588580 : if (TREE_CODE (type1) != TREE_CODE (type)
2561 588580 : || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2562 : return false;
2563 :
2564 59481 : *new_rhs_out = rhs1;
2565 59481 : *type_out = type1;
2566 59481 : return true;
2567 : }
2568 :
2569 155453 : if (TREE_CODE (rhs) == INTEGER_CST)
2570 : {
2571 155453 : *new_rhs_out = rhs;
2572 155453 : *type_out = NULL;
2573 155453 : return true;
2574 : }
2575 :
2576 : return false;
2577 : }
2578 :
2579 : /* Return true if STMT performs a widening multiplication, assuming the
2580 : output type is TYPE. If so, store the unwidened types of the operands
2581 : in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2582 : *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2583 : and *TYPE2_OUT would give the operands of the multiplication. */
2584 :
2585 : static bool
2586 719258 : is_widening_mult_p (gimple *stmt,
2587 : tree *type1_out, tree *rhs1_out,
2588 : tree *type2_out, tree *rhs2_out)
2589 : {
2590 719258 : tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2591 :
2592 719258 : if (TREE_CODE (type) == INTEGER_TYPE)
2593 : {
2594 719258 : if (TYPE_OVERFLOW_TRAPS (type))
2595 : return false;
2596 : }
2597 0 : else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2598 : return false;
2599 :
2600 719230 : if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2601 : rhs1_out))
2602 : return false;
2603 :
2604 195955 : if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2605 : rhs2_out))
2606 : return false;
2607 :
2608 190131 : if (*type1_out == NULL)
2609 : {
2610 0 : if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2611 : return false;
2612 0 : *type1_out = *type2_out;
2613 : }
2614 :
2615 190131 : if (*type2_out == NULL)
2616 : {
2617 155453 : if (!int_fits_type_p (*rhs2_out, *type1_out))
2618 : return false;
2619 150848 : *type2_out = *type1_out;
2620 : }
2621 :
2622 : /* Ensure that the larger of the two operands comes first. */
2623 185526 : if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2624 : {
2625 87 : std::swap (*type1_out, *type2_out);
2626 87 : std::swap (*rhs1_out, *rhs2_out);
2627 : }
2628 :
2629 : return true;
2630 : }
2631 :
2632 : /* Check to see if the CALL statement is an invocation of copysign
2633 : with 1. being the first argument. */
2634 : static bool
2635 162833 : is_copysign_call_with_1 (gimple *call)
2636 : {
2637 167936 : gcall *c = dyn_cast <gcall *> (call);
2638 5156 : if (! c)
2639 : return false;
2640 :
2641 5156 : enum combined_fn code = gimple_call_combined_fn (c);
2642 :
2643 5156 : if (code == CFN_LAST)
2644 : return false;
2645 :
2646 4232 : if (builtin_fn_p (code))
2647 : {
2648 1200 : switch (as_builtin_fn (code))
2649 : {
2650 30 : CASE_FLT_FN (BUILT_IN_COPYSIGN):
2651 30 : CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2652 30 : return real_onep (gimple_call_arg (c, 0));
2653 : default:
2654 : return false;
2655 : }
2656 : }
2657 :
2658 3032 : if (internal_fn_p (code))
2659 : {
2660 3032 : switch (as_internal_fn (code))
2661 : {
2662 23 : case IFN_COPYSIGN:
2663 23 : return real_onep (gimple_call_arg (c, 0));
2664 : default:
2665 : return false;
2666 : }
2667 : }
2668 :
2669 : return false;
2670 : }
2671 :
2672 : /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2673 : This only happens when the xorsign optab is defined, if the
2674 : pattern is not a xorsign pattern or if expansion fails FALSE is
2675 : returned, otherwise TRUE is returned. */
2676 : static bool
2677 709513 : convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2678 : {
2679 709513 : tree treeop0, treeop1, lhs, type;
2680 709513 : location_t loc = gimple_location (stmt);
2681 709513 : lhs = gimple_assign_lhs (stmt);
2682 709513 : treeop0 = gimple_assign_rhs1 (stmt);
2683 709513 : treeop1 = gimple_assign_rhs2 (stmt);
2684 709513 : type = TREE_TYPE (lhs);
2685 709513 : machine_mode mode = TYPE_MODE (type);
2686 :
2687 709513 : if (HONOR_SNANS (type))
2688 : return false;
2689 :
2690 708990 : if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2691 : {
2692 212960 : gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2693 212960 : if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2694 : {
2695 212934 : call0 = SSA_NAME_DEF_STMT (treeop1);
2696 212934 : if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2697 212917 : return false;
2698 :
2699 : treeop1 = treeop0;
2700 : }
2701 43 : if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2702 : return false;
2703 :
2704 43 : gcall *c = as_a<gcall*> (call0);
2705 43 : treeop0 = gimple_call_arg (c, 1);
2706 :
2707 43 : gcall *call_stmt
2708 43 : = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2709 43 : gimple_set_lhs (call_stmt, lhs);
2710 43 : gimple_set_location (call_stmt, loc);
2711 43 : gsi_replace (gsi, call_stmt, true);
2712 43 : return true;
2713 : }
2714 :
2715 : return false;
2716 : }
2717 :
2718 : /* Process a single gimple statement STMT, which has a MULT_EXPR as
2719 : its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2720 : value is true iff we converted the statement. */
2721 :
2722 : static bool
2723 719640 : convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2724 : {
2725 719640 : tree lhs, rhs1, rhs2, type, type1, type2;
2726 719640 : enum insn_code handler;
2727 719640 : scalar_int_mode to_mode, from_mode, actual_mode;
2728 719640 : optab op;
2729 719640 : int actual_precision;
2730 719640 : location_t loc = gimple_location (stmt);
2731 719640 : bool from_unsigned1, from_unsigned2;
2732 :
2733 719640 : lhs = gimple_assign_lhs (stmt);
2734 719640 : type = TREE_TYPE (lhs);
2735 719640 : if (TREE_CODE (type) != INTEGER_TYPE)
2736 : return false;
2737 :
2738 583498 : if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2739 : return false;
2740 :
2741 : /* if any one of rhs1 and rhs2 is subject to abnormal coalescing,
2742 : avoid the tranform. */
2743 150133 : if ((TREE_CODE (rhs1) == SSA_NAME
2744 150133 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
2745 300265 : || (TREE_CODE (rhs2) == SSA_NAME
2746 24180 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2)))
2747 : return false;
2748 :
2749 150132 : to_mode = SCALAR_INT_TYPE_MODE (type);
2750 150132 : from_mode = SCALAR_INT_TYPE_MODE (type1);
2751 150132 : if (to_mode == from_mode)
2752 : return false;
2753 :
2754 150128 : from_unsigned1 = TYPE_UNSIGNED (type1);
2755 150128 : from_unsigned2 = TYPE_UNSIGNED (type2);
2756 :
2757 150128 : if (from_unsigned1 && from_unsigned2)
2758 : op = umul_widen_optab;
2759 55466 : else if (!from_unsigned1 && !from_unsigned2)
2760 : op = smul_widen_optab;
2761 : else
2762 1917 : op = usmul_widen_optab;
2763 :
2764 150128 : handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2765 : &actual_mode);
2766 :
2767 150128 : if (handler == CODE_FOR_nothing)
2768 : {
2769 140001 : if (op != smul_widen_optab)
2770 : {
2771 : /* We can use a signed multiply with unsigned types as long as
2772 : there is a wider mode to use, or it is the smaller of the two
2773 : types that is unsigned. Note that type1 >= type2, always. */
2774 87958 : if ((TYPE_UNSIGNED (type1)
2775 86254 : && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2776 87958 : || (TYPE_UNSIGNED (type2)
2777 1704 : && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2778 : {
2779 87958 : if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2780 175916 : || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2781 87958 : return false;
2782 : }
2783 :
2784 0 : op = smul_widen_optab;
2785 0 : handler = find_widening_optab_handler_and_mode (op, to_mode,
2786 : from_mode,
2787 : &actual_mode);
2788 :
2789 0 : if (handler == CODE_FOR_nothing)
2790 : return false;
2791 :
2792 : from_unsigned1 = from_unsigned2 = false;
2793 : }
2794 : else
2795 : {
2796 : /* Expand can synthesize smul_widen_optab if the target
2797 : supports umul_widen_optab. */
2798 52043 : op = umul_widen_optab;
2799 52043 : handler = find_widening_optab_handler_and_mode (op, to_mode,
2800 : from_mode,
2801 : &actual_mode);
2802 52043 : if (handler == CODE_FOR_nothing)
2803 : return false;
2804 : }
2805 : }
2806 :
2807 : /* Ensure that the inputs to the handler are in the correct precison
2808 : for the opcode. This will be the full mode size. */
2809 10127 : actual_precision = GET_MODE_PRECISION (actual_mode);
2810 10127 : if (2 * actual_precision > TYPE_PRECISION (type))
2811 : return false;
2812 10127 : if (actual_precision != TYPE_PRECISION (type1)
2813 10127 : || from_unsigned1 != TYPE_UNSIGNED (type1))
2814 : {
2815 7 : if (!useless_type_conversion_p (type1, TREE_TYPE (rhs1)))
2816 : {
2817 0 : if (TREE_CODE (rhs1) == INTEGER_CST)
2818 0 : rhs1 = fold_convert (type1, rhs1);
2819 : else
2820 0 : rhs1 = build_and_insert_cast (gsi, loc, type1, rhs1);
2821 : }
2822 7 : type1 = build_nonstandard_integer_type (actual_precision,
2823 : from_unsigned1);
2824 : }
2825 10127 : if (!useless_type_conversion_p (type1, TREE_TYPE (rhs1)))
2826 : {
2827 9390 : if (TREE_CODE (rhs1) == INTEGER_CST)
2828 0 : rhs1 = fold_convert (type1, rhs1);
2829 : else
2830 9390 : rhs1 = build_and_insert_cast (gsi, loc, type1, rhs1);
2831 : }
2832 10127 : if (actual_precision != TYPE_PRECISION (type2)
2833 10127 : || from_unsigned2 != TYPE_UNSIGNED (type2))
2834 : {
2835 7 : if (!useless_type_conversion_p (type2, TREE_TYPE (rhs2)))
2836 : {
2837 7 : if (TREE_CODE (rhs2) == INTEGER_CST)
2838 7 : rhs2 = fold_convert (type2, rhs2);
2839 : else
2840 0 : rhs2 = build_and_insert_cast (gsi, loc, type2, rhs2);
2841 : }
2842 7 : type2 = build_nonstandard_integer_type (actual_precision,
2843 : from_unsigned2);
2844 : }
2845 10127 : if (!useless_type_conversion_p (type2, TREE_TYPE (rhs2)))
2846 : {
2847 9581 : if (TREE_CODE (rhs2) == INTEGER_CST)
2848 2076 : rhs2 = fold_convert (type2, rhs2);
2849 : else
2850 7505 : rhs2 = build_and_insert_cast (gsi, loc, type2, rhs2);
2851 : }
2852 :
2853 10127 : gimple_assign_set_rhs1 (stmt, rhs1);
2854 10127 : gimple_assign_set_rhs2 (stmt, rhs2);
2855 10127 : gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2856 10127 : update_stmt (stmt);
2857 10127 : widen_mul_stats.widen_mults_inserted++;
2858 10127 : return true;
2859 : }
2860 :
2861 : /* Process a single gimple statement STMT, which is found at the
2862 : iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2863 : rhs (given by CODE), and try to convert it into a
2864 : WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2865 : is true iff we converted the statement. */
2866 :
2867 : static bool
2868 2579126 : convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2869 : enum tree_code code)
2870 : {
2871 2579126 : gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2872 2579126 : gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2873 2579126 : tree type, type1, type2, optype;
2874 2579126 : tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2875 2579126 : enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2876 2579126 : optab this_optab;
2877 2579126 : enum tree_code wmult_code;
2878 2579126 : enum insn_code handler;
2879 2579126 : scalar_mode to_mode, from_mode, actual_mode;
2880 2579126 : location_t loc = gimple_location (stmt);
2881 2579126 : int actual_precision;
2882 2579126 : bool from_unsigned1, from_unsigned2;
2883 :
2884 2579126 : lhs = gimple_assign_lhs (stmt);
2885 2579126 : type = TREE_TYPE (lhs);
2886 2579126 : if ((TREE_CODE (type) != INTEGER_TYPE
2887 397036 : && TREE_CODE (type) != FIXED_POINT_TYPE)
2888 2579126 : || !type_has_mode_precision_p (type))
2889 398379 : return false;
2890 :
2891 2180747 : if (code == MINUS_EXPR)
2892 : wmult_code = WIDEN_MULT_MINUS_EXPR;
2893 : else
2894 1941552 : wmult_code = WIDEN_MULT_PLUS_EXPR;
2895 :
2896 2180747 : rhs1 = gimple_assign_rhs1 (stmt);
2897 2180747 : rhs2 = gimple_assign_rhs2 (stmt);
2898 :
2899 2180747 : if (TREE_CODE (rhs1) == SSA_NAME)
2900 : {
2901 2148260 : rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2902 2148260 : if (is_gimple_assign (rhs1_stmt))
2903 1256598 : rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2904 : }
2905 :
2906 2180747 : if (TREE_CODE (rhs2) == SSA_NAME)
2907 : {
2908 787981 : rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2909 787981 : if (is_gimple_assign (rhs2_stmt))
2910 605589 : rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2911 : }
2912 :
2913 : /* Allow for one conversion statement between the multiply
2914 : and addition/subtraction statement. If there are more than
2915 : one conversions then we assume they would invalidate this
2916 : transformation. If that's not the case then they should have
2917 : been folded before now. */
2918 2180747 : if (CONVERT_EXPR_CODE_P (rhs1_code))
2919 : {
2920 420424 : conv1_stmt = rhs1_stmt;
2921 420424 : rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2922 420424 : if (TREE_CODE (rhs1) == SSA_NAME)
2923 : {
2924 355460 : rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2925 355460 : if (is_gimple_assign (rhs1_stmt))
2926 203305 : rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2927 : }
2928 : else
2929 : return false;
2930 : }
2931 2115783 : if (CONVERT_EXPR_CODE_P (rhs2_code))
2932 : {
2933 197460 : conv2_stmt = rhs2_stmt;
2934 197460 : rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2935 197460 : if (TREE_CODE (rhs2) == SSA_NAME)
2936 : {
2937 186105 : rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2938 186105 : if (is_gimple_assign (rhs2_stmt))
2939 116666 : rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2940 : }
2941 : else
2942 : return false;
2943 : }
2944 :
2945 : /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2946 : is_widening_mult_p, but we still need the rhs returns.
2947 :
2948 : It might also appear that it would be sufficient to use the existing
2949 : operands of the widening multiply, but that would limit the choice of
2950 : multiply-and-accumulate instructions.
2951 :
2952 : If the widened-multiplication result has more than one uses, it is
2953 : probably wiser not to do the conversion. Also restrict this operation
2954 : to single basic block to avoid moving the multiply to a different block
2955 : with a higher execution frequency. */
2956 2104428 : if (code == PLUS_EXPR
2957 1869983 : && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2958 : {
2959 138221 : if (!has_single_use (rhs1)
2960 76370 : || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2961 205212 : || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2962 : &type2, &mult_rhs2))
2963 117283 : return false;
2964 : add_rhs = rhs2;
2965 : conv_stmt = conv1_stmt;
2966 : }
2967 1966207 : else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2968 : {
2969 124708 : if (!has_single_use (rhs2)
2970 76088 : || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2971 193477 : || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2972 : &type2, &mult_rhs2))
2973 110253 : return false;
2974 : add_rhs = rhs1;
2975 : conv_stmt = conv2_stmt;
2976 : }
2977 : else
2978 : return false;
2979 :
2980 35393 : to_mode = SCALAR_TYPE_MODE (type);
2981 35393 : from_mode = SCALAR_TYPE_MODE (type1);
2982 35393 : if (to_mode == from_mode)
2983 : return false;
2984 :
2985 35390 : from_unsigned1 = TYPE_UNSIGNED (type1);
2986 35390 : from_unsigned2 = TYPE_UNSIGNED (type2);
2987 35390 : optype = type1;
2988 :
2989 : /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2990 35390 : if (from_unsigned1 != from_unsigned2)
2991 : {
2992 913 : if (!INTEGRAL_TYPE_P (type))
2993 : return false;
2994 : /* We can use a signed multiply with unsigned types as long as
2995 : there is a wider mode to use, or it is the smaller of the two
2996 : types that is unsigned. Note that type1 >= type2, always. */
2997 913 : if ((from_unsigned1
2998 56 : && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2999 913 : || (from_unsigned2
3000 857 : && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3001 : {
3002 1790 : if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
3003 1826 : || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
3004 877 : return false;
3005 : }
3006 :
3007 36 : from_unsigned1 = from_unsigned2 = false;
3008 36 : optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
3009 : false);
3010 : }
3011 :
3012 : /* If there was a conversion between the multiply and addition
3013 : then we need to make sure it fits a multiply-and-accumulate.
3014 : The should be a single mode change which does not change the
3015 : value. */
3016 34513 : if (conv_stmt)
3017 : {
3018 : /* We use the original, unmodified data types for this. */
3019 737 : tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3020 737 : tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3021 737 : int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3022 737 : bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3023 :
3024 737 : if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3025 : {
3026 : /* Conversion is a truncate. */
3027 0 : if (TYPE_PRECISION (to_type) < data_size)
3028 : return false;
3029 : }
3030 737 : else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3031 : {
3032 : /* Conversion is an extend. Check it's the right sort. */
3033 382 : if (TYPE_UNSIGNED (from_type) != is_unsigned
3034 382 : && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3035 : return false;
3036 : }
3037 : /* else convert is a no-op for our purposes. */
3038 : }
3039 :
3040 : /* Verify that the machine can perform a widening multiply
3041 : accumulate in this mode/signedness combination, otherwise
3042 : this transformation is likely to pessimize code. */
3043 34194 : this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3044 34194 : handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3045 : from_mode, &actual_mode);
3046 :
3047 34194 : if (handler == CODE_FOR_nothing)
3048 : return false;
3049 :
3050 : /* Ensure that the inputs to the handler are in the correct precison
3051 : for the opcode. This will be the full mode size. */
3052 0 : actual_precision = GET_MODE_PRECISION (actual_mode);
3053 0 : if (actual_precision != TYPE_PRECISION (type1)
3054 0 : || from_unsigned1 != TYPE_UNSIGNED (type1))
3055 : {
3056 0 : if (!useless_type_conversion_p (type1, TREE_TYPE (mult_rhs1)))
3057 : {
3058 0 : if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3059 0 : mult_rhs1 = fold_convert (type1, mult_rhs1);
3060 : else
3061 0 : mult_rhs1 = build_and_insert_cast (gsi, loc, type1, mult_rhs1);
3062 : }
3063 0 : type1 = build_nonstandard_integer_type (actual_precision,
3064 : from_unsigned1);
3065 : }
3066 0 : if (!useless_type_conversion_p (type1, TREE_TYPE (mult_rhs1)))
3067 : {
3068 0 : if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3069 0 : mult_rhs1 = fold_convert (type1, mult_rhs1);
3070 : else
3071 0 : mult_rhs1 = build_and_insert_cast (gsi, loc, type1, mult_rhs1);
3072 : }
3073 0 : if (actual_precision != TYPE_PRECISION (type2)
3074 0 : || from_unsigned2 != TYPE_UNSIGNED (type2))
3075 : {
3076 0 : if (!useless_type_conversion_p (type2, TREE_TYPE (mult_rhs2)))
3077 : {
3078 0 : if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3079 0 : mult_rhs2 = fold_convert (type2, mult_rhs2);
3080 : else
3081 0 : mult_rhs2 = build_and_insert_cast (gsi, loc, type2, mult_rhs2);
3082 : }
3083 0 : type2 = build_nonstandard_integer_type (actual_precision,
3084 : from_unsigned2);
3085 : }
3086 0 : if (!useless_type_conversion_p (type2, TREE_TYPE (mult_rhs2)))
3087 : {
3088 0 : if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3089 0 : mult_rhs2 = fold_convert (type2, mult_rhs2);
3090 : else
3091 0 : mult_rhs2 = build_and_insert_cast (gsi, loc, type2, mult_rhs2);
3092 : }
3093 :
3094 0 : if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3095 0 : add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3096 :
3097 0 : gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3098 : add_rhs);
3099 0 : update_stmt (gsi_stmt (*gsi));
3100 0 : widen_mul_stats.maccs_inserted++;
3101 0 : return true;
3102 : }
3103 :
3104 : /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3105 : OP2 and which we know is used in statements that can be, together with the
3106 : multiplication, converted to FMAs, perform the transformation. */
3107 :
3108 : static void
3109 17496 : convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
3110 : {
3111 17496 : gimple *use_stmt;
3112 17496 : imm_use_iterator imm_iter;
3113 17496 : gcall *fma_stmt;
3114 :
3115 52546 : FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3116 : {
3117 17554 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3118 17554 : tree addop, mulop1 = op1, result = mul_result;
3119 17554 : bool negate_p = false;
3120 17554 : gimple_seq seq = NULL;
3121 :
3122 17554 : if (is_gimple_debug (use_stmt))
3123 0 : continue;
3124 :
3125 : /* If the use is a type convert, look further into it if the operations
3126 : are the same under two's complement. */
3127 17554 : tree lhs_type;
3128 17554 : if (gimple_assign_cast_p (use_stmt)
3129 0 : && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
3130 17554 : && tree_nop_conversion_p (lhs_type, TREE_TYPE (op1)))
3131 : {
3132 0 : tree cast_lhs = gimple_get_lhs (use_stmt);
3133 0 : gimple *tmp_use;
3134 0 : use_operand_p tmp_use_p;
3135 0 : if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
3136 : {
3137 0 : release_defs (use_stmt);
3138 0 : use_stmt = tmp_use;
3139 0 : result = cast_lhs;
3140 0 : gsi_remove (&gsi, true);
3141 0 : gsi = gsi_for_stmt (use_stmt);
3142 : }
3143 : }
3144 :
3145 17554 : if (is_gimple_assign (use_stmt)
3146 17554 : && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3147 : {
3148 700 : result = gimple_assign_lhs (use_stmt);
3149 700 : use_operand_p use_p;
3150 700 : gimple *neguse_stmt;
3151 700 : single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3152 700 : gsi_remove (&gsi, true);
3153 700 : release_defs (use_stmt);
3154 :
3155 700 : use_stmt = neguse_stmt;
3156 700 : gsi = gsi_for_stmt (use_stmt);
3157 700 : negate_p = true;
3158 : }
3159 :
3160 17554 : tree cond, else_value, ops[3], len, bias;
3161 17554 : tree_code code;
3162 17554 : if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3163 : ops, &else_value,
3164 : &len, &bias))
3165 0 : gcc_unreachable ();
3166 17554 : addop = ops[0] == result ? ops[1] : ops[0];
3167 :
3168 17554 : if (code == MINUS_EXPR)
3169 : {
3170 5769 : if (ops[0] == result)
3171 : /* a * b - c -> a * b + (-c) */
3172 2910 : addop = gimple_build (&seq, NEGATE_EXPR, TREE_TYPE (addop), addop);
3173 : else
3174 : /* a - b * c -> (-b) * c + a */
3175 2859 : negate_p = !negate_p;
3176 : }
3177 :
3178 17554 : if (negate_p)
3179 3559 : mulop1 = gimple_build (&seq, NEGATE_EXPR, TREE_TYPE (mulop1), mulop1);
3180 :
3181 17554 : if (seq)
3182 5764 : gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3183 :
3184 : /* Ensure all the operands are of the same type. Use the type of the
3185 : addend as that's the statement being replaced. */
3186 17554 : op2 = gimple_convert (&gsi, true, GSI_SAME_STMT,
3187 17554 : UNKNOWN_LOCATION, TREE_TYPE (addop), op2);
3188 17554 : mulop1 = gimple_convert (&gsi, true, GSI_SAME_STMT,
3189 17554 : UNKNOWN_LOCATION, TREE_TYPE (addop), mulop1);
3190 :
3191 17554 : if (len)
3192 0 : fma_stmt
3193 0 : = gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2,
3194 : addop, else_value, len, bias);
3195 17554 : else if (cond)
3196 94 : fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3197 : op2, addop, else_value);
3198 : else
3199 17460 : fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3200 17554 : gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3201 17554 : gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3202 : use_stmt));
3203 17554 : gsi_replace (&gsi, fma_stmt, true);
3204 : /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3205 : regardless of where the negation occurs. */
3206 17554 : gimple *orig_stmt = gsi_stmt (gsi);
3207 17554 : if (fold_stmt (&gsi, follow_all_ssa_edges))
3208 : {
3209 5813 : if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3210 0 : gcc_unreachable ();
3211 5813 : update_stmt (gsi_stmt (gsi));
3212 : }
3213 :
3214 17554 : if (dump_file && (dump_flags & TDF_DETAILS))
3215 : {
3216 3 : fprintf (dump_file, "Generated FMA ");
3217 3 : print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3218 3 : fprintf (dump_file, "\n");
3219 : }
3220 :
3221 : /* If the FMA result is negated in a single use, fold the negation
3222 : too. */
3223 17554 : orig_stmt = gsi_stmt (gsi);
3224 17554 : use_operand_p use_p;
3225 17554 : gimple *neg_stmt;
3226 17554 : if (is_gimple_call (orig_stmt)
3227 17554 : && gimple_call_internal_p (orig_stmt)
3228 17554 : && gimple_call_lhs (orig_stmt)
3229 17554 : && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3230 17554 : && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3231 12536 : && is_gimple_assign (neg_stmt)
3232 9970 : && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3233 18907 : && !stmt_could_throw_p (cfun, neg_stmt))
3234 : {
3235 1353 : gsi = gsi_for_stmt (neg_stmt);
3236 1353 : if (fold_stmt (&gsi, follow_all_ssa_edges))
3237 : {
3238 1353 : if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3239 0 : gcc_unreachable ();
3240 1353 : update_stmt (gsi_stmt (gsi));
3241 1353 : if (dump_file && (dump_flags & TDF_DETAILS))
3242 : {
3243 0 : fprintf (dump_file, "Folded FMA negation ");
3244 0 : print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3245 0 : fprintf (dump_file, "\n");
3246 : }
3247 : }
3248 : }
3249 :
3250 17554 : widen_mul_stats.fmas_inserted++;
3251 17496 : }
3252 17496 : }
3253 :
3254 : /* Data necessary to perform the actual transformation from a multiplication
3255 : and an addition to an FMA after decision is taken it should be done and to
3256 : then delete the multiplication statement from the function IL. */
3257 :
3258 : struct fma_transformation_info
3259 : {
3260 : gimple *mul_stmt;
3261 : tree mul_result;
3262 : tree op1;
3263 : tree op2;
3264 : };
3265 :
3266 : /* Structure containing the current state of FMA deferring, i.e. whether we are
3267 : deferring, whether to continue deferring, and all data necessary to come
3268 : back and perform all deferred transformations. */
3269 :
3270 10219209 : class fma_deferring_state
3271 : {
3272 : public:
3273 : /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3274 : do any deferring. */
3275 :
3276 10219209 : fma_deferring_state (bool perform_deferring)
3277 10219209 : : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3278 10219209 : m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3279 :
3280 : /* List of FMA candidates for which we the transformation has been determined
3281 : possible but we at this point in BB analysis we do not consider them
3282 : beneficial. */
3283 : auto_vec<fma_transformation_info, 8> m_candidates;
3284 :
3285 : /* Set of results of multiplication that are part of an already deferred FMA
3286 : candidates. */
3287 : hash_set<tree> m_mul_result_set;
3288 :
3289 : /* The PHI that supposedly feeds back result of a FMA to another over loop
3290 : boundary. */
3291 : gphi *m_initial_phi;
3292 :
3293 : /* Result of the last produced FMA candidate or NULL if there has not been
3294 : one. */
3295 : tree m_last_result;
3296 :
3297 : /* If true, deferring might still be profitable. If false, transform all
3298 : candidates and no longer defer. */
3299 : bool m_deferring_p;
3300 : };
3301 :
3302 : /* Transform all deferred FMA candidates and mark STATE as no longer
3303 : deferring. */
3304 :
3305 : static void
3306 3618436 : cancel_fma_deferring (fma_deferring_state *state)
3307 : {
3308 3618436 : if (!state->m_deferring_p)
3309 : return;
3310 :
3311 2617478 : for (unsigned i = 0; i < state->m_candidates.length (); i++)
3312 : {
3313 918 : if (dump_file && (dump_flags & TDF_DETAILS))
3314 0 : fprintf (dump_file, "Generating deferred FMA\n");
3315 :
3316 918 : const fma_transformation_info &fti = state->m_candidates[i];
3317 918 : convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3318 :
3319 918 : gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3320 918 : gsi_remove (&gsi, true);
3321 918 : release_defs (fti.mul_stmt);
3322 : }
3323 2616560 : state->m_deferring_p = false;
3324 : }
3325 :
3326 : /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3327 : Otherwise return NULL. */
3328 :
3329 : static gphi *
3330 5242 : result_of_phi (tree op)
3331 : {
3332 0 : if (TREE_CODE (op) != SSA_NAME)
3333 : return NULL;
3334 :
3335 5117 : return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3336 : }
3337 :
3338 : /* After processing statements of a BB and recording STATE, return true if the
3339 : initial phi is fed by the last FMA candidate result ore one such result from
3340 : previously processed BBs marked in LAST_RESULT_SET. */
3341 :
3342 : static bool
3343 362 : last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3344 : hash_set<tree> *last_result_set)
3345 : {
3346 362 : ssa_op_iter iter;
3347 362 : use_operand_p use;
3348 898 : FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3349 : {
3350 630 : tree t = USE_FROM_PTR (use);
3351 630 : if (t == state->m_last_result
3352 630 : || last_result_set->contains (t))
3353 94 : return true;
3354 : }
3355 :
3356 : return false;
3357 : }
3358 :
3359 : /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3360 : with uses in additions and subtractions to form fused multiply-add
3361 : operations. Returns true if successful and MUL_STMT should be removed.
3362 : If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3363 : on MUL_COND, otherwise it is unconditional.
3364 :
3365 : If STATE indicates that we are deferring FMA transformation, that means
3366 : that we do not produce FMAs for basic blocks which look like:
3367 :
3368 : <bb 6>
3369 : # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3370 : _65 = _14 * _16;
3371 : accumulator_66 = _65 + accumulator_111;
3372 :
3373 : or its unrolled version, i.e. with several FMA candidates that feed result
3374 : of one into the addend of another. Instead, we add them to a list in STATE
3375 : and if we later discover an FMA candidate that is not part of such a chain,
3376 : we go back and perform all deferred past candidates. */
3377 :
3378 : static bool
3379 709599 : convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3380 : fma_deferring_state *state, tree mul_cond = NULL_TREE,
3381 : tree mul_len = NULL_TREE, tree mul_bias = NULL_TREE)
3382 : {
3383 709599 : tree mul_result = gimple_get_lhs (mul_stmt);
3384 : /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3385 : if the statement was left just for the side-effects. */
3386 709599 : if (!mul_result)
3387 : return false;
3388 709599 : tree type = TREE_TYPE (mul_result);
3389 709599 : gimple *use_stmt, *neguse_stmt;
3390 709599 : use_operand_p use_p;
3391 709599 : imm_use_iterator imm_iter;
3392 :
3393 612157 : if (FLOAT_TYPE_P (type)
3394 733933 : && flag_fp_contract_mode != FP_CONTRACT_FAST)
3395 : return false;
3396 :
3397 : /* We don't want to do bitfield reduction ops. */
3398 704459 : if (INTEGRAL_TYPE_P (type)
3399 704459 : && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3400 : return false;
3401 :
3402 : /* If the target doesn't support it, don't generate it. We assume that
3403 : if fma isn't available then fms, fnma or fnms are not either. */
3404 704285 : optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3405 704285 : if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3406 : return false;
3407 :
3408 : /* If the multiplication has zero uses, it is kept around probably because
3409 : of -fnon-call-exceptions. Don't optimize it away in that case,
3410 : it is DCE job. */
3411 23246 : if (has_zero_uses (mul_result))
3412 : return false;
3413 :
3414 23246 : bool check_defer
3415 23246 : = (state->m_deferring_p
3416 23246 : && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3417 23246 : param_avoid_fma_max_bits));
3418 23246 : bool defer = check_defer;
3419 23246 : bool seen_negate_p = false;
3420 :
3421 : /* There is no numerical difference between fused and unfused integer FMAs,
3422 : and the assumption below that FMA is as cheap as addition is unlikely
3423 : to be true, especially if the multiplication occurs multiple times on
3424 : the same chain. E.g., for something like:
3425 :
3426 : (((a * b) + c) >> 1) + (a * b)
3427 :
3428 : we do not want to duplicate the a * b into two additions, not least
3429 : because the result is not a natural FMA chain. */
3430 23246 : if (ANY_INTEGRAL_TYPE_P (type)
3431 23246 : && !has_single_use (mul_result))
3432 : return false;
3433 :
3434 23246 : if (!dbg_cnt (form_fma))
3435 : return false;
3436 :
3437 : /* Make sure that the multiplication statement becomes dead after
3438 : the transformation, thus that all uses are transformed to FMAs.
3439 : This means we assume that an FMA operation has the same cost
3440 : as an addition. */
3441 41630 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3442 : {
3443 24040 : tree result = mul_result;
3444 24040 : bool negate_p = false;
3445 :
3446 24040 : use_stmt = USE_STMT (use_p);
3447 :
3448 24040 : if (is_gimple_debug (use_stmt))
3449 278 : continue;
3450 :
3451 : /* If the use is a type convert, look further into it if the operations
3452 : are the same under two's complement. */
3453 23762 : tree lhs_type;
3454 23762 : if (gimple_assign_cast_p (use_stmt)
3455 295 : && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
3456 24057 : && tree_nop_conversion_p (lhs_type, TREE_TYPE (op1)))
3457 : {
3458 0 : tree cast_lhs = gimple_get_lhs (use_stmt);
3459 0 : gimple *tmp_use;
3460 0 : use_operand_p tmp_use_p;
3461 0 : if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
3462 0 : use_stmt = tmp_use;
3463 0 : result = cast_lhs;
3464 : }
3465 :
3466 : /* For now restrict this operations to single basic blocks. In theory
3467 : we would want to support sinking the multiplication in
3468 : m = a*b;
3469 : if ()
3470 : ma = m + c;
3471 : else
3472 : d = m;
3473 : to form a fma in the then block and sink the multiplication to the
3474 : else block. */
3475 23762 : if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3476 5656 : return false;
3477 :
3478 : /* A negate on the multiplication leads to FNMA. */
3479 22911 : if (is_gimple_assign (use_stmt)
3480 22911 : && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3481 : {
3482 706 : ssa_op_iter iter;
3483 706 : use_operand_p usep;
3484 :
3485 : /* If (due to earlier missed optimizations) we have two
3486 : negates of the same value, treat them as equivalent
3487 : to a single negate with multiple uses. */
3488 706 : if (seen_negate_p)
3489 0 : return false;
3490 :
3491 706 : result = gimple_assign_lhs (use_stmt);
3492 :
3493 : /* Make sure the negate statement becomes dead with this
3494 : single transformation. */
3495 706 : if (!single_imm_use (gimple_assign_lhs (use_stmt),
3496 : &use_p, &neguse_stmt))
3497 : return false;
3498 :
3499 : /* Make sure the multiplication isn't also used on that stmt. */
3500 2836 : FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3501 1424 : if (USE_FROM_PTR (usep) == mul_result)
3502 : return false;
3503 :
3504 : /* Re-validate. */
3505 706 : use_stmt = neguse_stmt;
3506 706 : if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3507 : return false;
3508 :
3509 706 : negate_p = seen_negate_p = true;
3510 : }
3511 :
3512 22911 : tree cond, else_value, ops[3], len, bias;
3513 22911 : tree_code code;
3514 22911 : if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3515 : &else_value, &len, &bias))
3516 : return false;
3517 :
3518 : /* The multiplication result must be one of the addition operands. */
3519 20347 : if (ops[0] != result && ops[1] != result)
3520 : return false;
3521 :
3522 19770 : switch (code)
3523 : {
3524 5775 : case MINUS_EXPR:
3525 5775 : if (ops[1] == result)
3526 2859 : negate_p = !negate_p;
3527 : break;
3528 : case PLUS_EXPR:
3529 : break;
3530 : default:
3531 : /* FMA can only be formed from PLUS and MINUS. */
3532 : return false;
3533 : }
3534 :
3535 18128 : if (len)
3536 : {
3537 : /* For COND_LEN_* operations, we may have dummpy mask which is
3538 : the all true mask. Such TREE type may be mul_cond != cond
3539 : but we still consider they are equal. */
3540 0 : if (mul_cond && cond != mul_cond
3541 0 : && !(integer_truep (mul_cond) && integer_truep (cond)))
3542 0 : return false;
3543 :
3544 0 : if (else_value == result)
3545 : return false;
3546 :
3547 0 : if (!direct_internal_fn_supported_p (IFN_COND_LEN_FMA, type,
3548 : opt_type))
3549 : return false;
3550 :
3551 0 : if (mul_len)
3552 : {
3553 0 : poly_int64 mul_value, value;
3554 0 : if (poly_int_tree_p (mul_len, &mul_value)
3555 0 : && poly_int_tree_p (len, &value)
3556 0 : && maybe_ne (mul_value, value))
3557 0 : return false;
3558 0 : else if (mul_len != len)
3559 : return false;
3560 :
3561 0 : if (wi::to_widest (mul_bias) != wi::to_widest (bias))
3562 : return false;
3563 : }
3564 : }
3565 : else
3566 : {
3567 18128 : if (mul_cond && cond != mul_cond)
3568 : return false;
3569 :
3570 18116 : if (cond)
3571 : {
3572 104 : if (cond == result || else_value == result)
3573 : return false;
3574 94 : if (!direct_internal_fn_supported_p (IFN_COND_FMA, type,
3575 : opt_type))
3576 : return false;
3577 : }
3578 : }
3579 :
3580 : /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3581 : we'll visit later, we might be able to get a more profitable
3582 : match with fnma.
3583 : OTOH, if we don't, a negate / fma pair has likely lower latency
3584 : that a mult / subtract pair. */
3585 18106 : if (code == MINUS_EXPR
3586 5769 : && !negate_p
3587 2210 : && ops[0] == result
3588 2210 : && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3589 0 : && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3590 0 : && TREE_CODE (ops[1]) == SSA_NAME
3591 18106 : && has_single_use (ops[1]))
3592 : {
3593 0 : gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3594 0 : if (is_gimple_assign (stmt2)
3595 0 : && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3596 : return false;
3597 : }
3598 :
3599 : /* We can't handle a * b + a * b. */
3600 18106 : if (ops[0] == ops[1])
3601 : return false;
3602 : /* If deferring, make sure we are not looking at an instruction that
3603 : wouldn't have existed if we were not. */
3604 18106 : if (state->m_deferring_p
3605 18106 : && (state->m_mul_result_set.contains (ops[0])
3606 6408 : || state->m_mul_result_set.contains (ops[1])))
3607 0 : return false;
3608 :
3609 18106 : if (check_defer)
3610 : {
3611 6260 : tree use_lhs = gimple_get_lhs (use_stmt);
3612 6260 : if (state->m_last_result)
3613 : {
3614 1018 : if (ops[1] == state->m_last_result
3615 1018 : || ops[0] == state->m_last_result)
3616 : defer = true;
3617 : else
3618 6260 : defer = false;
3619 : }
3620 : else
3621 : {
3622 5242 : gcc_checking_assert (!state->m_initial_phi);
3623 5242 : gphi *phi;
3624 5242 : if (ops[0] == result)
3625 3282 : phi = result_of_phi (ops[1]);
3626 : else
3627 : {
3628 1960 : gcc_assert (ops[1] == result);
3629 1960 : phi = result_of_phi (ops[0]);
3630 : }
3631 :
3632 : if (phi)
3633 : {
3634 940 : state->m_initial_phi = phi;
3635 940 : defer = true;
3636 : }
3637 : else
3638 : defer = false;
3639 : }
3640 :
3641 6260 : state->m_last_result = use_lhs;
3642 6260 : check_defer = false;
3643 : }
3644 : else
3645 : defer = false;
3646 :
3647 : /* While it is possible to validate whether or not the exact form that
3648 : we've recognized is available in the backend, the assumption is that
3649 : if the deferring logic above did not trigger, the transformation is
3650 : never a loss. For instance, suppose the target only has the plain FMA
3651 : pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3652 : MUL+SUB for FMA+NEG, which is still two operations. Consider
3653 : -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3654 : form the two NEGs are independent and could be run in parallel. */
3655 5656 : }
3656 :
3657 17590 : if (defer)
3658 : {
3659 1012 : fma_transformation_info fti;
3660 1012 : fti.mul_stmt = mul_stmt;
3661 1012 : fti.mul_result = mul_result;
3662 1012 : fti.op1 = op1;
3663 1012 : fti.op2 = op2;
3664 1012 : state->m_candidates.safe_push (fti);
3665 1012 : state->m_mul_result_set.add (mul_result);
3666 :
3667 1012 : if (dump_file && (dump_flags & TDF_DETAILS))
3668 : {
3669 0 : fprintf (dump_file, "Deferred generating FMA for multiplication ");
3670 0 : print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3671 0 : fprintf (dump_file, "\n");
3672 : }
3673 :
3674 1012 : return false;
3675 : }
3676 : else
3677 : {
3678 16578 : if (state->m_deferring_p)
3679 4914 : cancel_fma_deferring (state);
3680 16578 : convert_mult_to_fma_1 (mul_result, op1, op2);
3681 16578 : return true;
3682 : }
3683 : }
3684 :
3685 :
3686 : /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3687 : a check for non-zero like:
3688 : _1 = x_4(D) * y_5(D);
3689 : *res_7(D) = _1;
3690 : if (x_4(D) != 0)
3691 : goto <bb 3>; [50.00%]
3692 : else
3693 : goto <bb 4>; [50.00%]
3694 :
3695 : <bb 3> [local count: 536870913]:
3696 : _2 = _1 / x_4(D);
3697 : _9 = _2 != y_5(D);
3698 : _10 = (int) _9;
3699 :
3700 : <bb 4> [local count: 1073741824]:
3701 : # iftmp.0_3 = PHI <_10(3), 0(2)>
3702 : then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3703 : optimize the x_4(D) != 0 condition to 1. */
3704 :
3705 : static void
3706 145 : maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3707 : gimple *div_stmt, bool *cfg_changed)
3708 : {
3709 145 : basic_block bb = gimple_bb (cond_stmt);
3710 290 : if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3711 51 : return;
3712 145 : edge pred_edge = single_pred_edge (bb);
3713 145 : basic_block pred_bb = pred_edge->src;
3714 145 : if (EDGE_COUNT (pred_bb->succs) != 2)
3715 : return;
3716 102 : edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3717 102 : edge other_succ_edge = NULL;
3718 102 : if (gimple_code (cond_stmt) == GIMPLE_COND)
3719 : {
3720 48 : if (EDGE_COUNT (bb->succs) != 2)
3721 : return;
3722 48 : other_succ_edge = EDGE_SUCC (bb, 0);
3723 48 : if (gimple_cond_code (cond_stmt) == NE_EXPR)
3724 : {
3725 24 : if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3726 24 : other_succ_edge = EDGE_SUCC (bb, 1);
3727 : }
3728 : else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3729 48 : other_succ_edge = EDGE_SUCC (bb, 0);
3730 48 : if (other_edge->dest != other_succ_edge->dest)
3731 : return;
3732 : }
3733 105 : else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3734 : return;
3735 202 : gcond *zero_cond = safe_dyn_cast <gcond *> (*gsi_last_bb (pred_bb));
3736 101 : if (zero_cond == NULL
3737 101 : || (gimple_cond_code (zero_cond)
3738 101 : != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3739 101 : || !integer_zerop (gimple_cond_rhs (zero_cond)))
3740 0 : return;
3741 101 : tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3742 101 : if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3743 : return;
3744 101 : if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3745 : {
3746 : /* Allow the divisor to be result of a same precision cast
3747 : from zero_cond_lhs. */
3748 53 : tree rhs2 = gimple_assign_rhs2 (div_stmt);
3749 53 : if (TREE_CODE (rhs2) != SSA_NAME)
3750 : return;
3751 53 : gimple *g = SSA_NAME_DEF_STMT (rhs2);
3752 53 : if (!gimple_assign_cast_p (g)
3753 53 : || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3754 53 : || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3755 106 : || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3756 53 : != TYPE_PRECISION (TREE_TYPE (rhs2))))
3757 : return;
3758 : }
3759 101 : gimple_stmt_iterator gsi = gsi_after_labels (bb);
3760 101 : mul_stmts.quick_push (div_stmt);
3761 101 : if (is_gimple_debug (gsi_stmt (gsi)))
3762 0 : gsi_next_nondebug (&gsi);
3763 : unsigned cast_count = 0;
3764 635 : while (gsi_stmt (gsi) != cond_stmt)
3765 : {
3766 : /* If original mul_stmt has a single use, allow it in the same bb,
3767 : we are looking then just at __builtin_mul_overflow_p.
3768 : Though, in that case the original mul_stmt will be replaced
3769 : by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3770 : gimple *mul_stmt;
3771 : unsigned int i;
3772 2274 : bool ok = false;
3773 2274 : FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3774 : {
3775 2127 : if (gsi_stmt (gsi) == mul_stmt)
3776 : {
3777 : ok = true;
3778 : break;
3779 : }
3780 : }
3781 534 : if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3782 : ok = true;
3783 387 : if (!ok)
3784 51 : return;
3785 534 : gsi_next_nondebug (&gsi);
3786 : }
3787 101 : if (gimple_code (cond_stmt) == GIMPLE_COND)
3788 : {
3789 47 : basic_block succ_bb = other_edge->dest;
3790 75 : for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3791 28 : gsi_next (&gpi))
3792 : {
3793 35 : gphi *phi = gpi.phi ();
3794 35 : tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3795 35 : tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3796 35 : if (!operand_equal_p (v1, v2, 0))
3797 7 : return;
3798 : }
3799 : }
3800 : else
3801 : {
3802 54 : tree lhs = gimple_assign_lhs (cond_stmt);
3803 54 : if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3804 : return;
3805 54 : gsi_next_nondebug (&gsi);
3806 54 : if (!gsi_end_p (gsi))
3807 : {
3808 54 : if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3809 : return;
3810 54 : gimple *cast_stmt = gsi_stmt (gsi);
3811 54 : if (!gimple_assign_cast_p (cast_stmt))
3812 : return;
3813 54 : tree new_lhs = gimple_assign_lhs (cast_stmt);
3814 54 : gsi_next_nondebug (&gsi);
3815 54 : if (!gsi_end_p (gsi)
3816 54 : || !new_lhs
3817 54 : || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3818 108 : || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3819 : return;
3820 : lhs = new_lhs;
3821 : }
3822 54 : edge succ_edge = single_succ_edge (bb);
3823 54 : basic_block succ_bb = succ_edge->dest;
3824 54 : gsi = gsi_start_phis (succ_bb);
3825 54 : if (gsi_end_p (gsi))
3826 : return;
3827 54 : gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3828 54 : gsi_next (&gsi);
3829 54 : if (!gsi_end_p (gsi))
3830 : return;
3831 54 : if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3832 : return;
3833 54 : tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3834 54 : if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3835 : {
3836 0 : tree cond = gimple_assign_rhs1 (cond_stmt);
3837 0 : if (TREE_CODE (cond) == NE_EXPR)
3838 : {
3839 0 : if (!operand_equal_p (other_val,
3840 0 : gimple_assign_rhs3 (cond_stmt), 0))
3841 : return;
3842 : }
3843 0 : else if (!operand_equal_p (other_val,
3844 0 : gimple_assign_rhs2 (cond_stmt), 0))
3845 : return;
3846 : }
3847 54 : else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3848 : {
3849 25 : if (!integer_zerop (other_val))
3850 : return;
3851 : }
3852 29 : else if (!integer_onep (other_val))
3853 : return;
3854 : }
3855 94 : if (pred_edge->flags & EDGE_TRUE_VALUE)
3856 41 : gimple_cond_make_true (zero_cond);
3857 : else
3858 53 : gimple_cond_make_false (zero_cond);
3859 94 : update_stmt (zero_cond);
3860 94 : reset_flow_sensitive_info_in_bb (bb);
3861 94 : *cfg_changed = true;
3862 : }
3863 :
3864 : /* Helper function for arith_overflow_check_p. Return true
3865 : if VAL1 is equal to VAL2 cast to corresponding integral type
3866 : with other signedness or vice versa. */
3867 :
3868 : static bool
3869 382 : arith_cast_equal_p (tree val1, tree val2)
3870 : {
3871 382 : if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3872 65 : return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3873 317 : else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3874 : return false;
3875 280 : if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3876 280 : && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3877 : return true;
3878 168 : if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3879 168 : && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3880 120 : return true;
3881 : return false;
3882 : }
3883 :
3884 : /* Helper function of match_arith_overflow. Return 1
3885 : if USE_STMT is unsigned overflow check ovf != 0 for
3886 : STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3887 : and 0 otherwise. */
3888 :
3889 : static int
3890 2953681 : arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3891 : tree maxval, tree *other)
3892 : {
3893 2953681 : enum tree_code ccode = ERROR_MARK;
3894 2953681 : tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3895 2953681 : enum tree_code code = gimple_assign_rhs_code (stmt);
3896 5872375 : tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3897 2953681 : tree rhs1 = gimple_assign_rhs1 (stmt);
3898 2953681 : tree rhs2 = gimple_assign_rhs2 (stmt);
3899 2953681 : tree multop = NULL_TREE, divlhs = NULL_TREE;
3900 2953681 : gimple *cur_use_stmt = use_stmt;
3901 :
3902 2953681 : if (code == MULT_EXPR)
3903 : {
3904 675501 : if (!is_gimple_assign (use_stmt))
3905 675205 : return 0;
3906 531361 : if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3907 : return 0;
3908 2379 : if (gimple_assign_rhs1 (use_stmt) != lhs)
3909 : return 0;
3910 2316 : if (cast_stmt)
3911 : {
3912 155 : if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3913 : multop = rhs2;
3914 81 : else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3915 : multop = rhs1;
3916 : else
3917 : return 0;
3918 : }
3919 2161 : else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3920 : multop = rhs2;
3921 2083 : else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3922 : multop = rhs1;
3923 : else
3924 : return 0;
3925 300 : if (stmt_ends_bb_p (use_stmt))
3926 : return 0;
3927 300 : divlhs = gimple_assign_lhs (use_stmt);
3928 300 : if (!divlhs)
3929 : return 0;
3930 300 : use_operand_p use;
3931 300 : if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3932 : return 0;
3933 296 : if (cast_stmt && gimple_assign_cast_p (cur_use_stmt))
3934 : {
3935 4 : tree cast_lhs = gimple_assign_lhs (cur_use_stmt);
3936 8 : if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3937 4 : && TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3938 4 : && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3939 4 : == TYPE_PRECISION (TREE_TYPE (divlhs)))
3940 8 : && single_imm_use (cast_lhs, &use, &cur_use_stmt))
3941 : {
3942 : cast_stmt = NULL;
3943 : divlhs = cast_lhs;
3944 : }
3945 : else
3946 0 : return 0;
3947 : }
3948 : }
3949 2278476 : if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3950 : {
3951 578756 : ccode = gimple_cond_code (cur_use_stmt);
3952 578756 : crhs1 = gimple_cond_lhs (cur_use_stmt);
3953 578756 : crhs2 = gimple_cond_rhs (cur_use_stmt);
3954 : }
3955 1699720 : else if (is_gimple_assign (cur_use_stmt))
3956 : {
3957 821827 : if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3958 : {
3959 495357 : ccode = gimple_assign_rhs_code (cur_use_stmt);
3960 495357 : crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3961 495357 : crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3962 : }
3963 326470 : else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3964 : {
3965 4877 : tree cond = gimple_assign_rhs1 (cur_use_stmt);
3966 4877 : if (COMPARISON_CLASS_P (cond))
3967 : {
3968 0 : ccode = TREE_CODE (cond);
3969 0 : crhs1 = TREE_OPERAND (cond, 0);
3970 0 : crhs2 = TREE_OPERAND (cond, 1);
3971 : }
3972 : else
3973 : return 0;
3974 : }
3975 : else
3976 : return 0;
3977 : }
3978 : else
3979 : return 0;
3980 :
3981 1074113 : if (maxval
3982 1074113 : && ccode == RSHIFT_EXPR
3983 97 : && crhs1 == lhs
3984 17 : && TREE_CODE (crhs2) == INTEGER_CST
3985 1074130 : && wi::to_widest (crhs2) == TYPE_PRECISION (TREE_TYPE (maxval)))
3986 : {
3987 16 : tree shiftlhs = gimple_assign_lhs (use_stmt);
3988 16 : if (!shiftlhs)
3989 : return 0;
3990 16 : use_operand_p use;
3991 16 : if (!single_imm_use (shiftlhs, &use, &cur_use_stmt))
3992 : return 0;
3993 12 : if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3994 : {
3995 0 : ccode = gimple_cond_code (cur_use_stmt);
3996 0 : crhs1 = gimple_cond_lhs (cur_use_stmt);
3997 0 : crhs2 = gimple_cond_rhs (cur_use_stmt);
3998 : }
3999 12 : else if (is_gimple_assign (cur_use_stmt))
4000 : {
4001 12 : if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
4002 : {
4003 0 : ccode = gimple_assign_rhs_code (cur_use_stmt);
4004 0 : crhs1 = gimple_assign_rhs1 (cur_use_stmt);
4005 0 : crhs2 = gimple_assign_rhs2 (cur_use_stmt);
4006 : }
4007 12 : else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
4008 : {
4009 0 : tree cond = gimple_assign_rhs1 (cur_use_stmt);
4010 0 : if (COMPARISON_CLASS_P (cond))
4011 : {
4012 0 : ccode = TREE_CODE (cond);
4013 0 : crhs1 = TREE_OPERAND (cond, 0);
4014 0 : crhs2 = TREE_OPERAND (cond, 1);
4015 : }
4016 : else
4017 : return 0;
4018 : }
4019 : else
4020 : {
4021 12 : enum tree_code sc = gimple_assign_rhs_code (cur_use_stmt);
4022 12 : tree castlhs = gimple_assign_lhs (cur_use_stmt);
4023 12 : if (!CONVERT_EXPR_CODE_P (sc)
4024 12 : || !castlhs
4025 12 : || !INTEGRAL_TYPE_P (TREE_TYPE (castlhs))
4026 24 : || (TYPE_PRECISION (TREE_TYPE (castlhs))
4027 12 : > TYPE_PRECISION (TREE_TYPE (maxval))))
4028 : return 0;
4029 : return 1;
4030 : }
4031 : }
4032 : else
4033 : return 0;
4034 0 : if ((ccode != EQ_EXPR && ccode != NE_EXPR)
4035 0 : || crhs1 != shiftlhs
4036 0 : || !integer_zerop (crhs2))
4037 0 : return 0;
4038 : return 1;
4039 : }
4040 :
4041 1074097 : if (TREE_CODE_CLASS (ccode) != tcc_comparison)
4042 : return 0;
4043 :
4044 615170 : switch (ccode)
4045 : {
4046 115239 : case GT_EXPR:
4047 115239 : case LE_EXPR:
4048 115239 : if (maxval)
4049 : {
4050 : /* r = a + b; r > maxval or r <= maxval */
4051 45 : if (crhs1 == lhs
4052 44 : && TREE_CODE (crhs2) == INTEGER_CST
4053 67 : && tree_int_cst_equal (crhs2, maxval))
4054 12 : return ccode == GT_EXPR ? 1 : -1;
4055 : break;
4056 : }
4057 : /* r = a - b; r > a or r <= a
4058 : r = a + b; a > r or a <= r or b > r or b <= r. */
4059 115194 : if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
4060 115130 : || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
4061 8789 : && crhs2 == lhs))
4062 9193 : return ccode == GT_EXPR ? 1 : -1;
4063 : /* r = ~a; b > r or b <= r. */
4064 106341 : if (code == BIT_NOT_EXPR && crhs2 == lhs)
4065 : {
4066 190 : if (other)
4067 95 : *other = crhs1;
4068 222 : return ccode == GT_EXPR ? 1 : -1;
4069 : }
4070 : break;
4071 64087 : case LT_EXPR:
4072 64087 : case GE_EXPR:
4073 64087 : if (maxval)
4074 : break;
4075 : /* r = a - b; a < r or a >= r
4076 : r = a + b; r < a or r >= a or r < b or r >= b. */
4077 64081 : if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
4078 63943 : || (code == PLUS_EXPR && crhs1 == lhs
4079 29965 : && (crhs2 == rhs1 || crhs2 == rhs2)))
4080 4085 : return ccode == LT_EXPR ? 1 : -1;
4081 : /* r = ~a; r < b or r >= b. */
4082 60038 : if (code == BIT_NOT_EXPR && crhs1 == lhs)
4083 : {
4084 167 : if (other)
4085 92 : *other = crhs2;
4086 219 : return ccode == LT_EXPR ? 1 : -1;
4087 : }
4088 : break;
4089 435844 : case EQ_EXPR:
4090 435844 : case NE_EXPR:
4091 : /* r = a * b; _1 = r / a; _1 == b
4092 : r = a * b; _1 = r / b; _1 == a
4093 : r = a * b; _1 = r / a; _1 != b
4094 : r = a * b; _1 = r / b; _1 != a. */
4095 435844 : if (code == MULT_EXPR)
4096 : {
4097 293 : if (cast_stmt)
4098 : {
4099 146 : if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
4100 146 : || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
4101 : {
4102 146 : use_stmt = cur_use_stmt;
4103 216 : return ccode == NE_EXPR ? 1 : -1;
4104 : }
4105 : }
4106 96 : else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
4107 147 : || (crhs2 == divlhs && crhs1 == multop))
4108 : {
4109 147 : use_stmt = cur_use_stmt;
4110 223 : return ccode == NE_EXPR ? 1 : -1;
4111 : }
4112 : }
4113 : break;
4114 : default:
4115 : break;
4116 : }
4117 : return 0;
4118 : }
4119 :
4120 : extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
4121 : extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
4122 : extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
4123 : extern bool gimple_unsigned_integer_sat_mul (tree, tree*, tree (*)(tree));
4124 :
4125 : extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
4126 : extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
4127 : extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree));
4128 :
4129 : static void
4130 159 : build_saturation_binary_arith_call_and_replace (gimple_stmt_iterator *gsi,
4131 : internal_fn fn, tree lhs,
4132 : tree op_0, tree op_1)
4133 : {
4134 159 : if (direct_internal_fn_supported_p (fn, TREE_TYPE (op_0), OPTIMIZE_FOR_BOTH))
4135 : {
4136 157 : gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
4137 157 : gimple_call_set_lhs (call, lhs);
4138 157 : gsi_replace (gsi, call, /* update_eh_info */ true);
4139 : }
4140 159 : }
4141 :
4142 : static bool
4143 51 : build_saturation_binary_arith_call_and_insert (gimple_stmt_iterator *gsi,
4144 : internal_fn fn, tree lhs,
4145 : tree op_0, tree op_1)
4146 : {
4147 51 : if (!direct_internal_fn_supported_p (fn, TREE_TYPE (op_0), OPTIMIZE_FOR_BOTH))
4148 : return false;
4149 :
4150 43 : gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
4151 43 : gimple_call_set_lhs (call, lhs);
4152 43 : gsi_insert_before (gsi, call, GSI_SAME_STMT);
4153 :
4154 43 : return true;
4155 : }
4156 :
4157 : /*
4158 : * Try to match saturation unsigned add with assign.
4159 : * _7 = _4 + _6;
4160 : * _8 = _4 > _7;
4161 : * _9 = (long unsigned int) _8;
4162 : * _10 = -_9;
4163 : * _12 = _7 | _10;
4164 : * =>
4165 : * _12 = .SAT_ADD (_4, _6);
4166 : *
4167 : * Try to match IMM=-1 saturation signed add with assign.
4168 : * <bb 2> [local count: 1073741824]:
4169 : * x.0_1 = (unsigned char) x_5(D);
4170 : * _3 = -x.0_1;
4171 : * _10 = (signed char) _3;
4172 : * _8 = x_5(D) & _10;
4173 : * if (_8 < 0)
4174 : * goto <bb 4>; [1.40%]
4175 : * else
4176 : * goto <bb 3>; [98.60%]
4177 : * <bb 3> [local count: 434070867]:
4178 : * _2 = x.0_1 + 255;
4179 : * <bb 4> [local count: 1073741824]:
4180 : * # _9 = PHI <_2(3), 128(2)>
4181 : * _4 = (int8_t) _9;
4182 : * =>
4183 : * _4 = .SAT_ADD (x_5, -1); */
4184 :
4185 : static void
4186 4850298 : match_saturation_add_with_assign (gimple_stmt_iterator *gsi, gassign *stmt)
4187 : {
4188 4850298 : tree ops[2];
4189 4850298 : tree lhs = gimple_assign_lhs (stmt);
4190 :
4191 4850298 : if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
4192 4850298 : || gimple_signed_integer_sat_add (lhs, ops, NULL))
4193 34 : build_saturation_binary_arith_call_and_replace (gsi, IFN_SAT_ADD, lhs,
4194 : ops[0], ops[1]);
4195 4850298 : }
4196 :
4197 : /*
4198 : * Try to match saturation add with PHI.
4199 : * For unsigned integer:
4200 : * <bb 2> :
4201 : * _1 = x_3(D) + y_4(D);
4202 : * if (_1 >= x_3(D))
4203 : * goto <bb 3>; [INV]
4204 : * else
4205 : * goto <bb 4>; [INV]
4206 : *
4207 : * <bb 3> :
4208 : *
4209 : * <bb 4> :
4210 : * # _2 = PHI <255(2), _1(3)>
4211 : * =>
4212 : * <bb 4> [local count: 1073741824]:
4213 : * _2 = .SAT_ADD (x_4(D), y_5(D));
4214 : *
4215 : * For signed integer:
4216 : * x.0_1 = (long unsigned int) x_7(D);
4217 : * y.1_2 = (long unsigned int) y_8(D);
4218 : * _3 = x.0_1 + y.1_2;
4219 : * sum_9 = (int64_t) _3;
4220 : * _4 = x_7(D) ^ y_8(D);
4221 : * _5 = x_7(D) ^ sum_9;
4222 : * _15 = ~_4;
4223 : * _16 = _5 & _15;
4224 : * if (_16 < 0)
4225 : * goto <bb 3>; [41.00%]
4226 : * else
4227 : * goto <bb 4>; [59.00%]
4228 : * _11 = x_7(D) < 0;
4229 : * _12 = (long int) _11;
4230 : * _13 = -_12;
4231 : * _14 = _13 ^ 9223372036854775807;
4232 : * # _6 = PHI <_14(3), sum_9(2)>
4233 : * =>
4234 : * _6 = .SAT_ADD (x_5(D), y_6(D)); [tail call] */
4235 :
4236 : static bool
4237 4227449 : match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
4238 : {
4239 4227449 : if (gimple_phi_num_args (phi) != 2)
4240 : return false;
4241 :
4242 3346223 : tree ops[2];
4243 3346223 : tree phi_result = gimple_phi_result (phi);
4244 :
4245 3346223 : if (!gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
4246 3346223 : && !gimple_signed_integer_sat_add (phi_result, ops, NULL))
4247 : return false;
4248 :
4249 21 : if (!TYPE_UNSIGNED (TREE_TYPE (ops[0])) && TREE_CODE (ops[1]) == INTEGER_CST)
4250 0 : ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4251 :
4252 21 : return build_saturation_binary_arith_call_and_insert (gsi, IFN_SAT_ADD,
4253 : phi_result, ops[0],
4254 21 : ops[1]);
4255 : }
4256 :
4257 : /*
4258 : * Try to match saturation unsigned sub.
4259 : * _1 = _4 >= _5;
4260 : * _3 = _4 - _5;
4261 : * _6 = _1 ? _3 : 0;
4262 : * =>
4263 : * _6 = .SAT_SUB (_4, _5); */
4264 :
4265 : static void
4266 3328888 : match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
4267 : {
4268 3328888 : tree ops[2];
4269 3328888 : tree lhs = gimple_assign_lhs (stmt);
4270 :
4271 3328888 : if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL))
4272 125 : build_saturation_binary_arith_call_and_replace (gsi, IFN_SAT_SUB, lhs,
4273 : ops[0], ops[1]);
4274 3328888 : }
4275 :
4276 : /*
4277 : * Try to match saturation unsigned mul.
4278 : * _1 = (unsigned int) a_6(D);
4279 : * _2 = (unsigned int) b_7(D);
4280 : * x_8 = _1 * _2;
4281 : * overflow_9 = x_8 > 255;
4282 : * _3 = (unsigned char) overflow_9;
4283 : * _4 = -_3;
4284 : * _5 = (unsigned char) x_8;
4285 : * _10 = _4 | _5;
4286 : * =>
4287 : * _10 = .SAT_SUB (a_6, b_7); */
4288 :
4289 : static void
4290 2567478 : match_unsigned_saturation_mul (gimple_stmt_iterator *gsi, gassign *stmt)
4291 : {
4292 2567478 : tree ops[2];
4293 2567478 : tree lhs = gimple_assign_lhs (stmt);
4294 :
4295 2567478 : if (gimple_unsigned_integer_sat_mul (lhs, ops, NULL))
4296 0 : build_saturation_binary_arith_call_and_replace (gsi, IFN_SAT_MUL, lhs,
4297 : ops[0], ops[1]);
4298 2567478 : }
4299 :
4300 : /* Try to match saturation unsigned mul, aka:
4301 : _6 = .MUL_OVERFLOW (a_4(D), b_5(D));
4302 : _2 = IMAGPART_EXPR <_6>;
4303 : if (_2 != 0)
4304 : goto <bb 4>; [35.00%]
4305 : else
4306 : goto <bb 3>; [65.00%]
4307 :
4308 : <bb 3> [local count: 697932184]:
4309 : _1 = REALPART_EXPR <_6>;
4310 :
4311 : <bb 4> [local count: 1073741824]:
4312 : # _3 = PHI <18446744073709551615(2), _1(3)>
4313 : =>
4314 : _3 = .SAT_MUL (a_4(D), b_5(D)); */
4315 :
4316 : static bool
4317 4227406 : match_saturation_mul (gimple_stmt_iterator *gsi, gphi *phi)
4318 : {
4319 4227406 : if (gimple_phi_num_args (phi) != 2)
4320 : return false;
4321 :
4322 3346180 : tree ops[2];
4323 3346180 : tree phi_result = gimple_phi_result (phi);
4324 :
4325 3346180 : if (!gimple_unsigned_integer_sat_mul (phi_result, ops, NULL))
4326 : return false;
4327 :
4328 0 : return build_saturation_binary_arith_call_and_insert (gsi, IFN_SAT_MUL,
4329 : phi_result, ops[0],
4330 0 : ops[1]);
4331 : }
4332 :
4333 : /*
4334 : * Try to match saturation unsigned sub.
4335 : * <bb 2> [local count: 1073741824]:
4336 : * if (x_2(D) > y_3(D))
4337 : * goto <bb 3>; [50.00%]
4338 : * else
4339 : * goto <bb 4>; [50.00%]
4340 : *
4341 : * <bb 3> [local count: 536870912]:
4342 : * _4 = x_2(D) - y_3(D);
4343 : *
4344 : * <bb 4> [local count: 1073741824]:
4345 : * # _1 = PHI <0(2), _4(3)>
4346 : * =>
4347 : * <bb 4> [local count: 1073741824]:
4348 : * _1 = .SAT_SUB (x_2(D), y_3(D)); */
4349 : static bool
4350 4227432 : match_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
4351 : {
4352 4227432 : if (gimple_phi_num_args (phi) != 2)
4353 : return false;
4354 :
4355 3346206 : tree ops[2];
4356 3346206 : tree phi_result = gimple_phi_result (phi);
4357 :
4358 3346206 : if (!gimple_unsigned_integer_sat_sub (phi_result, ops, NULL)
4359 3346206 : && !gimple_signed_integer_sat_sub (phi_result, ops, NULL))
4360 : return false;
4361 :
4362 30 : return build_saturation_binary_arith_call_and_insert (gsi, IFN_SAT_SUB,
4363 : phi_result, ops[0],
4364 30 : ops[1]);
4365 : }
4366 :
4367 : /*
4368 : * Try to match saturation unsigned sub.
4369 : * uint16_t x_4(D);
4370 : * uint8_t _6;
4371 : * overflow_5 = x_4(D) > 255;
4372 : * _1 = (unsigned char) x_4(D);
4373 : * _2 = (unsigned char) overflow_5;
4374 : * _3 = -_2;
4375 : * _6 = _1 | _3;
4376 : * =>
4377 : * _6 = .SAT_TRUNC (x_4(D));
4378 : * */
4379 : static void
4380 2567478 : match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
4381 : {
4382 2567478 : tree ops[1];
4383 2567478 : tree lhs = gimple_assign_lhs (stmt);
4384 2567478 : tree type = TREE_TYPE (lhs);
4385 :
4386 2567478 : if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
4387 2567578 : && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
4388 100 : tree_pair (type, TREE_TYPE (ops[0])),
4389 : OPTIMIZE_FOR_BOTH))
4390 : {
4391 73 : gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
4392 73 : gimple_call_set_lhs (call, lhs);
4393 73 : gsi_replace (gsi, call, /* update_eh_info */ true);
4394 : }
4395 2567478 : }
4396 :
4397 : /*
4398 : * Try to match saturation truncate.
4399 : * Aka:
4400 : * x.0_1 = (unsigned long) x_4(D);
4401 : * _2 = x.0_1 + 2147483648;
4402 : * if (_2 > 4294967295)
4403 : * goto <bb 4>; [50.00%]
4404 : * else
4405 : * goto <bb 3>; [50.00%]
4406 : * ;; succ: 4
4407 : * ;; 3
4408 : *
4409 : * ;; basic block 3, loop depth 0
4410 : * ;; pred: 2
4411 : * trunc_5 = (int32_t) x_4(D);
4412 : * goto <bb 5>; [100.00%]
4413 : * ;; succ: 5
4414 : *
4415 : * ;; basic block 4, loop depth 0
4416 : * ;; pred: 2
4417 : * _7 = x_4(D) < 0;
4418 : * _8 = (int) _7;
4419 : * _9 = -_8;
4420 : * _10 = _9 ^ 2147483647;
4421 : * ;; succ: 5
4422 : *
4423 : * ;; basic block 5, loop depth 0
4424 : * ;; pred: 3
4425 : * ;; 4
4426 : * # _3 = PHI <trunc_5(3), _10(4)>
4427 : * =>
4428 : * _6 = .SAT_TRUNC (x_4(D));
4429 : */
4430 :
4431 : static bool
4432 4227406 : match_saturation_trunc (gimple_stmt_iterator *gsi, gphi *phi)
4433 : {
4434 4227406 : if (gimple_phi_num_args (phi) != 2)
4435 : return false;
4436 :
4437 3346180 : tree ops[1];
4438 3346180 : tree phi_result = gimple_phi_result (phi);
4439 3346180 : tree type = TREE_TYPE (phi_result);
4440 :
4441 3346180 : if (!gimple_unsigned_integer_sat_trunc (phi_result, ops, NULL)
4442 3346180 : && !gimple_signed_integer_sat_trunc (phi_result, ops, NULL))
4443 : return false;
4444 :
4445 0 : if (!direct_internal_fn_supported_p (IFN_SAT_TRUNC,
4446 0 : tree_pair (type, TREE_TYPE (ops[0])),
4447 : OPTIMIZE_FOR_BOTH))
4448 : return false;
4449 :
4450 0 : gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
4451 0 : gimple_call_set_lhs (call, phi_result);
4452 0 : gsi_insert_before (gsi, call, GSI_SAME_STMT);
4453 :
4454 0 : return true;
4455 : }
4456 :
4457 : /* Recognize for unsigned x
4458 : x = y - z;
4459 : if (x > y)
4460 : where there are other uses of x and replace it with
4461 : _7 = .SUB_OVERFLOW (y, z);
4462 : x = REALPART_EXPR <_7>;
4463 : _8 = IMAGPART_EXPR <_7>;
4464 : if (_8)
4465 : and similarly for addition.
4466 :
4467 : Also recognize:
4468 : yc = (type) y;
4469 : zc = (type) z;
4470 : x = yc + zc;
4471 : if (x > max)
4472 : where y and z have unsigned types with maximum max
4473 : and there are other uses of x and all of those cast x
4474 : back to that unsigned type and again replace it with
4475 : _7 = .ADD_OVERFLOW (y, z);
4476 : _9 = REALPART_EXPR <_7>;
4477 : _8 = IMAGPART_EXPR <_7>;
4478 : if (_8)
4479 : and replace (utype) x with _9.
4480 : Or with x >> popcount (max) instead of x > max.
4481 :
4482 : Also recognize:
4483 : x = ~z;
4484 : if (y > x)
4485 : and replace it with
4486 : _7 = .ADD_OVERFLOW (y, z);
4487 : _8 = IMAGPART_EXPR <_7>;
4488 : if (_8)
4489 :
4490 : And also recognize:
4491 : z = x * y;
4492 : if (x != 0)
4493 : goto <bb 3>; [50.00%]
4494 : else
4495 : goto <bb 4>; [50.00%]
4496 :
4497 : <bb 3> [local count: 536870913]:
4498 : _2 = z / x;
4499 : _9 = _2 != y;
4500 : _10 = (int) _9;
4501 :
4502 : <bb 4> [local count: 1073741824]:
4503 : # iftmp.0_3 = PHI <_10(3), 0(2)>
4504 : and replace it with
4505 : _7 = .MUL_OVERFLOW (x, y);
4506 : z = IMAGPART_EXPR <_7>;
4507 : _8 = IMAGPART_EXPR <_7>;
4508 : _9 = _8 != 0;
4509 : iftmp.0_3 = (int) _9; */
4510 :
4511 : static bool
4512 3317499 : match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
4513 : enum tree_code code, bool *cfg_changed)
4514 : {
4515 3317499 : tree lhs = gimple_assign_lhs (stmt);
4516 3317499 : tree type = TREE_TYPE (lhs);
4517 3317499 : use_operand_p use_p;
4518 3317499 : imm_use_iterator iter;
4519 3317499 : bool use_seen = false;
4520 3317499 : bool ovf_use_seen = false;
4521 3317499 : gimple *use_stmt;
4522 3317499 : gimple *add_stmt = NULL;
4523 3317499 : bool add_first = false;
4524 3317499 : gimple *cond_stmt = NULL;
4525 3317499 : gimple *cast_stmt = NULL;
4526 3317499 : tree cast_lhs = NULL_TREE;
4527 :
4528 3317499 : gcc_checking_assert (code == PLUS_EXPR
4529 : || code == MINUS_EXPR
4530 : || code == MULT_EXPR
4531 : || code == BIT_NOT_EXPR);
4532 3317499 : if (!INTEGRAL_TYPE_P (type)
4533 2798510 : || !TYPE_UNSIGNED (type)
4534 1961370 : || has_zero_uses (lhs)
4535 3317499 : || (code != PLUS_EXPR
4536 1961031 : && code != MULT_EXPR
4537 173712 : && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
4538 150015 : TYPE_MODE (type)) == CODE_FOR_nothing))
4539 1358411 : return false;
4540 :
4541 1959088 : tree rhs1 = gimple_assign_rhs1 (stmt);
4542 1959088 : tree rhs2 = gimple_assign_rhs2 (stmt);
4543 7398268 : FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4544 : {
4545 3486433 : use_stmt = USE_STMT (use_p);
4546 3486433 : if (is_gimple_debug (use_stmt))
4547 586429 : continue;
4548 :
4549 2900004 : tree other = NULL_TREE;
4550 2900004 : if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
4551 : {
4552 6833 : if (code == BIT_NOT_EXPR)
4553 : {
4554 187 : gcc_assert (other);
4555 187 : if (TREE_CODE (other) != SSA_NAME)
4556 0 : return false;
4557 187 : if (rhs2 == NULL)
4558 187 : rhs2 = other;
4559 : else
4560 : return false;
4561 187 : cond_stmt = use_stmt;
4562 : }
4563 : ovf_use_seen = true;
4564 : }
4565 : else
4566 : {
4567 2893171 : use_seen = true;
4568 2893171 : if (code == MULT_EXPR
4569 2893171 : && cast_stmt == NULL
4570 2893171 : && gimple_assign_cast_p (use_stmt))
4571 : {
4572 31813 : cast_lhs = gimple_assign_lhs (use_stmt);
4573 63626 : if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
4574 31272 : && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
4575 60572 : && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
4576 28759 : == TYPE_PRECISION (TREE_TYPE (lhs))))
4577 : cast_stmt = use_stmt;
4578 : else
4579 : cast_lhs = NULL_TREE;
4580 : }
4581 : }
4582 2900004 : if (ovf_use_seen && use_seen)
4583 : break;
4584 0 : }
4585 :
4586 1959088 : if (!ovf_use_seen
4587 1959088 : && code == MULT_EXPR
4588 448702 : && cast_stmt)
4589 : {
4590 28375 : if (TREE_CODE (rhs1) != SSA_NAME
4591 28375 : || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
4592 : return false;
4593 92693 : FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
4594 : {
4595 35943 : use_stmt = USE_STMT (use_p);
4596 35943 : if (is_gimple_debug (use_stmt))
4597 1053 : continue;
4598 :
4599 34890 : if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4600 : NULL_TREE, NULL))
4601 35943 : ovf_use_seen = true;
4602 28375 : }
4603 28375 : }
4604 : else
4605 : {
4606 : cast_stmt = NULL;
4607 : cast_lhs = NULL_TREE;
4608 : }
4609 :
4610 1959088 : tree maxval = NULL_TREE;
4611 1959088 : if (!ovf_use_seen
4612 13668 : || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
4613 6459 : || (code == PLUS_EXPR
4614 6189 : && optab_handler (uaddv4_optab,
4615 6189 : TYPE_MODE (type)) == CODE_FOR_nothing)
4616 1972455 : || (code == MULT_EXPR
4617 221 : && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
4618 148 : TYPE_MODE (type)) == CODE_FOR_nothing
4619 3 : && (use_seen
4620 3 : || cast_stmt
4621 0 : || !can_mult_highpart_p (TYPE_MODE (type), true))))
4622 : {
4623 1952484 : if (code != PLUS_EXPR)
4624 : return false;
4625 1356052 : if (TREE_CODE (rhs1) != SSA_NAME
4626 1356052 : || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
4627 : return false;
4628 325388 : rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4629 325388 : tree type1 = TREE_TYPE (rhs1);
4630 325388 : if (!INTEGRAL_TYPE_P (type1)
4631 175260 : || !TYPE_UNSIGNED (type1)
4632 36962 : || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4633 341487 : || (TYPE_PRECISION (type1)
4634 32198 : != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4635 314172 : return false;
4636 11216 : if (TREE_CODE (rhs2) == INTEGER_CST)
4637 : {
4638 4149 : if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4639 4149 : TYPE_PRECISION (type1),
4640 8298 : UNSIGNED), 0))
4641 : return false;
4642 1620 : rhs2 = fold_convert (type1, rhs2);
4643 : }
4644 : else
4645 : {
4646 7067 : if (TREE_CODE (rhs2) != SSA_NAME
4647 7067 : || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4648 : return false;
4649 3018 : rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4650 3018 : tree type2 = TREE_TYPE (rhs2);
4651 3018 : if (!INTEGRAL_TYPE_P (type2)
4652 1189 : || !TYPE_UNSIGNED (type2)
4653 416 : || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4654 3390 : || (TYPE_PRECISION (type2)
4655 744 : != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4656 2659 : return false;
4657 : }
4658 1979 : if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4659 : type = type1;
4660 : else
4661 5 : type = TREE_TYPE (rhs2);
4662 :
4663 1979 : if (TREE_CODE (type) != INTEGER_TYPE
4664 3958 : || optab_handler (uaddv4_optab,
4665 1979 : TYPE_MODE (type)) == CODE_FOR_nothing)
4666 0 : return false;
4667 :
4668 1979 : maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4669 : UNSIGNED));
4670 1979 : ovf_use_seen = false;
4671 1979 : use_seen = false;
4672 1979 : basic_block use_bb = NULL;
4673 4030 : FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4674 : {
4675 1991 : use_stmt = USE_STMT (use_p);
4676 1991 : if (is_gimple_debug (use_stmt))
4677 8 : continue;
4678 :
4679 1983 : if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4680 : {
4681 12 : ovf_use_seen = true;
4682 12 : use_bb = gimple_bb (use_stmt);
4683 : }
4684 : else
4685 : {
4686 1971 : if (!gimple_assign_cast_p (use_stmt)
4687 1971 : || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4688 : return false;
4689 113 : tree use_lhs = gimple_assign_lhs (use_stmt);
4690 226 : if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4691 226 : || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4692 113 : > TYPE_PRECISION (type)))
4693 : return false;
4694 : use_seen = true;
4695 : }
4696 1919 : }
4697 60 : if (!ovf_use_seen)
4698 : return false;
4699 12 : if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4700 : {
4701 2 : if (!use_seen)
4702 : return false;
4703 2 : tree new_rhs1 = make_ssa_name (type);
4704 2 : gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4705 2 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4706 2 : rhs1 = new_rhs1;
4707 : }
4708 10 : else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4709 : {
4710 2 : if (!use_seen)
4711 : return false;
4712 2 : tree new_rhs2 = make_ssa_name (type);
4713 2 : gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4714 2 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4715 2 : rhs2 = new_rhs2;
4716 : }
4717 8 : else if (!use_seen)
4718 : {
4719 : /* If there are no uses of the wider addition, check if
4720 : forwprop has not created a narrower addition.
4721 : Require it to be in the same bb as the overflow check. */
4722 18 : FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4723 : {
4724 10 : use_stmt = USE_STMT (use_p);
4725 10 : if (is_gimple_debug (use_stmt))
4726 0 : continue;
4727 :
4728 10 : if (use_stmt == stmt)
4729 0 : continue;
4730 :
4731 10 : if (!is_gimple_assign (use_stmt)
4732 10 : || gimple_bb (use_stmt) != use_bb
4733 20 : || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4734 2 : continue;
4735 :
4736 8 : if (gimple_assign_rhs1 (use_stmt) == rhs1)
4737 : {
4738 8 : if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4739 : rhs2, 0))
4740 0 : continue;
4741 : }
4742 0 : else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4743 : {
4744 0 : if (gimple_assign_rhs1 (use_stmt) != rhs2)
4745 0 : continue;
4746 : }
4747 : else
4748 0 : continue;
4749 :
4750 8 : add_stmt = use_stmt;
4751 8 : break;
4752 8 : }
4753 8 : if (add_stmt == NULL)
4754 : return false;
4755 :
4756 : /* If stmt and add_stmt are in the same bb, we need to find out
4757 : which one is earlier. If they are in different bbs, we've
4758 : checked add_stmt is in the same bb as one of the uses of the
4759 : stmt lhs, so stmt needs to dominate add_stmt too. */
4760 8 : if (gimple_bb (stmt) == gimple_bb (add_stmt))
4761 : {
4762 8 : gimple_stmt_iterator gsif = *gsi;
4763 8 : gimple_stmt_iterator gsib = *gsi;
4764 8 : int i;
4765 : /* Search both forward and backward from stmt and have a small
4766 : upper bound. */
4767 20 : for (i = 0; i < 128; i++)
4768 : {
4769 20 : if (!gsi_end_p (gsib))
4770 : {
4771 18 : gsi_prev_nondebug (&gsib);
4772 18 : if (gsi_stmt (gsib) == add_stmt)
4773 : {
4774 : add_first = true;
4775 : break;
4776 : }
4777 : }
4778 2 : else if (gsi_end_p (gsif))
4779 : break;
4780 18 : if (!gsi_end_p (gsif))
4781 : {
4782 18 : gsi_next_nondebug (&gsif);
4783 18 : if (gsi_stmt (gsif) == add_stmt)
4784 : break;
4785 : }
4786 : }
4787 8 : if (i == 128)
4788 0 : return false;
4789 8 : if (add_first)
4790 2 : *gsi = gsi_for_stmt (add_stmt);
4791 : }
4792 : }
4793 : }
4794 :
4795 6616 : if (code == BIT_NOT_EXPR)
4796 170 : *gsi = gsi_for_stmt (cond_stmt);
4797 :
4798 6616 : auto_vec<gimple *, 8> mul_stmts;
4799 6616 : if (code == MULT_EXPR && cast_stmt)
4800 : {
4801 75 : type = TREE_TYPE (cast_lhs);
4802 75 : gimple *g = SSA_NAME_DEF_STMT (rhs1);
4803 75 : if (gimple_assign_cast_p (g)
4804 38 : && useless_type_conversion_p (type,
4805 38 : TREE_TYPE (gimple_assign_rhs1 (g)))
4806 113 : && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4807 : rhs1 = gimple_assign_rhs1 (g);
4808 : else
4809 : {
4810 37 : g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4811 37 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4812 37 : rhs1 = gimple_assign_lhs (g);
4813 37 : mul_stmts.quick_push (g);
4814 : }
4815 75 : if (TREE_CODE (rhs2) == INTEGER_CST)
4816 32 : rhs2 = fold_convert (type, rhs2);
4817 : else
4818 : {
4819 43 : g = SSA_NAME_DEF_STMT (rhs2);
4820 43 : if (gimple_assign_cast_p (g)
4821 22 : && useless_type_conversion_p (type,
4822 22 : TREE_TYPE (gimple_assign_rhs1 (g)))
4823 65 : && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4824 : rhs2 = gimple_assign_rhs1 (g);
4825 : else
4826 : {
4827 21 : g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4828 21 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4829 21 : rhs2 = gimple_assign_lhs (g);
4830 21 : mul_stmts.quick_push (g);
4831 : }
4832 : }
4833 : }
4834 6616 : tree ctype = build_complex_type (type);
4835 13087 : gcall *g = gimple_build_call_internal (code == MULT_EXPR
4836 : ? IFN_MUL_OVERFLOW
4837 : : code != MINUS_EXPR
4838 6471 : ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4839 : 2, rhs1, rhs2);
4840 6616 : tree ctmp = make_ssa_name (ctype);
4841 6616 : gimple_call_set_lhs (g, ctmp);
4842 6616 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4843 6616 : tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4844 6616 : gassign *g2;
4845 6616 : if (code != BIT_NOT_EXPR)
4846 : {
4847 6446 : g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4848 : build1 (REALPART_EXPR, type, ctmp));
4849 6446 : if (maxval || cast_stmt)
4850 : {
4851 87 : gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4852 87 : if (add_first)
4853 2 : *gsi = gsi_for_stmt (stmt);
4854 : }
4855 : else
4856 6359 : gsi_replace (gsi, g2, true);
4857 6446 : if (code == MULT_EXPR)
4858 : {
4859 145 : mul_stmts.quick_push (g);
4860 145 : mul_stmts.quick_push (g2);
4861 145 : if (cast_stmt)
4862 : {
4863 75 : g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4864 75 : gsi_replace (gsi, g2, true);
4865 75 : mul_stmts.quick_push (g2);
4866 : }
4867 : }
4868 : }
4869 6616 : tree ovf = make_ssa_name (type);
4870 6616 : g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4871 : build1 (IMAGPART_EXPR, type, ctmp));
4872 6616 : if (code != BIT_NOT_EXPR)
4873 6446 : gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4874 : else
4875 170 : gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4876 6616 : if (code == MULT_EXPR)
4877 145 : mul_stmts.quick_push (g2);
4878 :
4879 35337 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4880 : {
4881 22180 : if (is_gimple_debug (use_stmt))
4882 5376 : continue;
4883 :
4884 16804 : gimple *orig_use_stmt = use_stmt;
4885 16804 : int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4886 : maxval, NULL);
4887 16804 : if (ovf_use == 0)
4888 : {
4889 10154 : gcc_assert (code != BIT_NOT_EXPR);
4890 10154 : if (maxval)
4891 : {
4892 4 : tree use_lhs = gimple_assign_lhs (use_stmt);
4893 4 : gimple_assign_set_rhs1 (use_stmt, new_lhs);
4894 4 : if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4895 4 : TREE_TYPE (new_lhs)))
4896 4 : gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4897 4 : update_stmt (use_stmt);
4898 : }
4899 10154 : continue;
4900 10154 : }
4901 6650 : if (gimple_code (use_stmt) == GIMPLE_COND)
4902 : {
4903 4425 : gcond *cond_stmt = as_a <gcond *> (use_stmt);
4904 4425 : gimple_cond_set_lhs (cond_stmt, ovf);
4905 4425 : gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4906 4576 : gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4907 : }
4908 : else
4909 : {
4910 2225 : gcc_checking_assert (is_gimple_assign (use_stmt));
4911 2225 : if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4912 : {
4913 2225 : if (gimple_assign_rhs_code (use_stmt) == RSHIFT_EXPR)
4914 : {
4915 6 : g2 = gimple_build_assign (make_ssa_name (boolean_type_node),
4916 : ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4917 : ovf, build_int_cst (type, 0));
4918 6 : gimple_stmt_iterator gsiu = gsi_for_stmt (use_stmt);
4919 6 : gsi_insert_before (&gsiu, g2, GSI_SAME_STMT);
4920 6 : gimple_assign_set_rhs_with_ops (&gsiu, NOP_EXPR,
4921 : gimple_assign_lhs (g2));
4922 6 : update_stmt (use_stmt);
4923 6 : use_operand_p use;
4924 6 : single_imm_use (gimple_assign_lhs (use_stmt), &use,
4925 : &use_stmt);
4926 6 : if (gimple_code (use_stmt) == GIMPLE_COND)
4927 : {
4928 0 : gcond *cond_stmt = as_a <gcond *> (use_stmt);
4929 0 : gimple_cond_set_lhs (cond_stmt, ovf);
4930 0 : gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4931 : }
4932 : else
4933 : {
4934 6 : gcc_checking_assert (is_gimple_assign (use_stmt));
4935 6 : if (gimple_assign_rhs_class (use_stmt)
4936 : == GIMPLE_BINARY_RHS)
4937 : {
4938 0 : gimple_assign_set_rhs1 (use_stmt, ovf);
4939 0 : gimple_assign_set_rhs2 (use_stmt,
4940 : build_int_cst (type, 0));
4941 : }
4942 6 : else if (gimple_assign_cast_p (use_stmt))
4943 6 : gimple_assign_set_rhs1 (use_stmt, ovf);
4944 : else
4945 : {
4946 0 : tree_code sc = gimple_assign_rhs_code (use_stmt);
4947 0 : gcc_checking_assert (sc == COND_EXPR);
4948 0 : tree cond = gimple_assign_rhs1 (use_stmt);
4949 0 : cond = build2 (TREE_CODE (cond),
4950 : boolean_type_node, ovf,
4951 : build_int_cst (type, 0));
4952 0 : gimple_assign_set_rhs1 (use_stmt, cond);
4953 : }
4954 : }
4955 6 : update_stmt (use_stmt);
4956 6 : gsi_remove (&gsiu, true);
4957 6 : gsiu = gsi_for_stmt (g2);
4958 6 : gsi_remove (&gsiu, true);
4959 6 : continue;
4960 6 : }
4961 : else
4962 : {
4963 2219 : gimple_assign_set_rhs1 (use_stmt, ovf);
4964 2219 : gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4965 2368 : gimple_assign_set_rhs_code (use_stmt,
4966 : ovf_use == 1
4967 : ? NE_EXPR : EQ_EXPR);
4968 : }
4969 : }
4970 : else
4971 : {
4972 0 : gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4973 : == COND_EXPR);
4974 0 : tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4975 : boolean_type_node, ovf,
4976 : build_int_cst (type, 0));
4977 0 : gimple_assign_set_rhs1 (use_stmt, cond);
4978 : }
4979 : }
4980 6644 : update_stmt (use_stmt);
4981 6644 : if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4982 : {
4983 145 : gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4984 145 : maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4985 : cfg_changed);
4986 145 : use_operand_p use;
4987 145 : gimple *cast_stmt;
4988 145 : if (single_imm_use (gimple_assign_lhs (orig_use_stmt), &use,
4989 : &cast_stmt)
4990 145 : && gimple_assign_cast_p (cast_stmt))
4991 : {
4992 2 : gimple_stmt_iterator gsi3 = gsi_for_stmt (cast_stmt);
4993 2 : gsi_remove (&gsi3, true);
4994 2 : release_ssa_name (gimple_assign_lhs (cast_stmt));
4995 : }
4996 145 : gsi_remove (&gsi2, true);
4997 145 : release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4998 : }
4999 6616 : }
5000 6616 : if (maxval)
5001 : {
5002 12 : gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
5003 12 : gsi_remove (&gsi2, true);
5004 12 : if (add_stmt)
5005 : {
5006 8 : gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
5007 : new_lhs);
5008 8 : gsi2 = gsi_for_stmt (add_stmt);
5009 8 : gsi_replace (&gsi2, g, true);
5010 : }
5011 : }
5012 6604 : else if (code == BIT_NOT_EXPR)
5013 : {
5014 170 : *gsi = gsi_for_stmt (stmt);
5015 170 : gsi_remove (gsi, true);
5016 170 : release_ssa_name (lhs);
5017 170 : return true;
5018 : }
5019 : return false;
5020 6616 : }
5021 :
5022 : /* Helper of match_uaddc_usubc. Look through an integral cast
5023 : which should preserve [0, 1] range value (unless source has
5024 : 1-bit signed type) and the cast has single use. */
5025 :
5026 : static gimple *
5027 2064480 : uaddc_cast (gimple *g)
5028 : {
5029 2064480 : if (!gimple_assign_cast_p (g))
5030 : return g;
5031 492791 : tree op = gimple_assign_rhs1 (g);
5032 492791 : if (TREE_CODE (op) == SSA_NAME
5033 417175 : && INTEGRAL_TYPE_P (TREE_TYPE (op))
5034 290115 : && (TYPE_PRECISION (TREE_TYPE (op)) > 1
5035 5513 : || TYPE_UNSIGNED (TREE_TYPE (op)))
5036 782906 : && has_single_use (gimple_assign_lhs (g)))
5037 175737 : return SSA_NAME_DEF_STMT (op);
5038 : return g;
5039 : }
5040 :
5041 : /* Helper of match_uaddc_usubc. Look through a NE_EXPR
5042 : comparison with 0 which also preserves [0, 1] value range. */
5043 :
5044 : static gimple *
5045 2064639 : uaddc_ne0 (gimple *g)
5046 : {
5047 2064639 : if (is_gimple_assign (g)
5048 1263605 : && gimple_assign_rhs_code (g) == NE_EXPR
5049 53407 : && integer_zerop (gimple_assign_rhs2 (g))
5050 5281 : && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
5051 2069920 : && has_single_use (gimple_assign_lhs (g)))
5052 5025 : return SSA_NAME_DEF_STMT (gimple_assign_rhs1 (g));
5053 : return g;
5054 : }
5055 :
5056 : /* Return true if G is {REAL,IMAG}PART_EXPR PART with SSA_NAME
5057 : operand. */
5058 :
5059 : static bool
5060 2065470 : uaddc_is_cplxpart (gimple *g, tree_code part)
5061 : {
5062 2065470 : return (is_gimple_assign (g)
5063 1263102 : && gimple_assign_rhs_code (g) == part
5064 2067809 : && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (g), 0)) == SSA_NAME);
5065 : }
5066 :
5067 : /* Try to match e.g.
5068 : _29 = .ADD_OVERFLOW (_3, _4);
5069 : _30 = REALPART_EXPR <_29>;
5070 : _31 = IMAGPART_EXPR <_29>;
5071 : _32 = .ADD_OVERFLOW (_30, _38);
5072 : _33 = REALPART_EXPR <_32>;
5073 : _34 = IMAGPART_EXPR <_32>;
5074 : _35 = _31 + _34;
5075 : as
5076 : _36 = .UADDC (_3, _4, _38);
5077 : _33 = REALPART_EXPR <_36>;
5078 : _35 = IMAGPART_EXPR <_36>;
5079 : or
5080 : _22 = .SUB_OVERFLOW (_6, _5);
5081 : _23 = REALPART_EXPR <_22>;
5082 : _24 = IMAGPART_EXPR <_22>;
5083 : _25 = .SUB_OVERFLOW (_23, _37);
5084 : _26 = REALPART_EXPR <_25>;
5085 : _27 = IMAGPART_EXPR <_25>;
5086 : _28 = _24 | _27;
5087 : as
5088 : _29 = .USUBC (_6, _5, _37);
5089 : _26 = REALPART_EXPR <_29>;
5090 : _288 = IMAGPART_EXPR <_29>;
5091 : provided _38 or _37 above have [0, 1] range
5092 : and _3, _4 and _30 or _6, _5 and _23 are unsigned
5093 : integral types with the same precision. Whether + or | or ^ is
5094 : used on the IMAGPART_EXPR results doesn't matter, with one of
5095 : added or subtracted operands in [0, 1] range at most one
5096 : .ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow. */
5097 :
5098 : static bool
5099 2784124 : match_uaddc_usubc (gimple_stmt_iterator *gsi, gimple *stmt, tree_code code)
5100 : {
5101 2784124 : tree rhs[4];
5102 2784124 : rhs[0] = gimple_assign_rhs1 (stmt);
5103 2784124 : rhs[1] = gimple_assign_rhs2 (stmt);
5104 2784124 : rhs[2] = NULL_TREE;
5105 2784124 : rhs[3] = NULL_TREE;
5106 2784124 : tree type = TREE_TYPE (rhs[0]);
5107 2784124 : if (!INTEGRAL_TYPE_P (type) || !TYPE_UNSIGNED (type))
5108 : return false;
5109 :
5110 1646949 : auto_vec<gimple *, 2> temp_stmts;
5111 1646949 : if (code != BIT_IOR_EXPR && code != BIT_XOR_EXPR)
5112 : {
5113 : /* If overflow flag is ignored on the MSB limb, we can end up with
5114 : the most significant limb handled as r = op1 + op2 + ovf1 + ovf2;
5115 : or r = op1 - op2 - ovf1 - ovf2; or various equivalent expressions
5116 : thereof. Handle those like the ovf = ovf1 + ovf2; case to recognize
5117 : the limb below the MSB, but also create another .UADDC/.USUBC call
5118 : for the last limb.
5119 :
5120 : First look through assignments with the same rhs code as CODE,
5121 : with the exception that subtraction of a constant is canonicalized
5122 : into addition of its negation. rhs[0] will be minuend for
5123 : subtractions and one of addends for addition, all other assigned
5124 : rhs[i] operands will be subtrahends or other addends. */
5125 1528069 : while (TREE_CODE (rhs[0]) == SSA_NAME && !rhs[3])
5126 : {
5127 1503226 : gimple *g = SSA_NAME_DEF_STMT (rhs[0]);
5128 1503226 : if (has_single_use (rhs[0])
5129 505790 : && is_gimple_assign (g)
5130 1949446 : && (gimple_assign_rhs_code (g) == code
5131 413853 : || (code == MINUS_EXPR
5132 54613 : && gimple_assign_rhs_code (g) == PLUS_EXPR
5133 17209 : && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST)))
5134 : {
5135 45691 : tree r2 = gimple_assign_rhs2 (g);
5136 45691 : if (gimple_assign_rhs_code (g) != code)
5137 : {
5138 13324 : r2 = const_unop (NEGATE_EXPR, TREE_TYPE (r2), r2);
5139 13324 : if (!r2)
5140 : break;
5141 : }
5142 45691 : rhs[0] = gimple_assign_rhs1 (g);
5143 45691 : tree &r = rhs[2] ? rhs[3] : rhs[2];
5144 45691 : r = r2;
5145 45691 : temp_stmts.quick_push (g);
5146 : }
5147 : else
5148 : break;
5149 : }
5150 4447134 : for (int i = 1; i <= 2; ++i)
5151 3006983 : while (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME && !rhs[3])
5152 : {
5153 528504 : gimple *g = SSA_NAME_DEF_STMT (rhs[i]);
5154 528504 : if (has_single_use (rhs[i])
5155 265538 : && is_gimple_assign (g)
5156 776259 : && gimple_assign_rhs_code (g) == PLUS_EXPR)
5157 : {
5158 42227 : rhs[i] = gimple_assign_rhs1 (g);
5159 42227 : if (rhs[2])
5160 8164 : rhs[3] = gimple_assign_rhs2 (g);
5161 : else
5162 34063 : rhs[2] = gimple_assign_rhs2 (g);
5163 42227 : temp_stmts.quick_push (g);
5164 : }
5165 : else
5166 : break;
5167 : }
5168 : /* If there are just 3 addends or one minuend and two subtrahends,
5169 : check for UADDC or USUBC being pattern recognized earlier.
5170 : Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part
5171 : got pattern matched earlier as __imag__ .UADDC (arg1, arg2, arg3)
5172 : etc. */
5173 1482378 : if (rhs[2] && !rhs[3])
5174 : {
5175 303304 : for (int i = (code == MINUS_EXPR ? 1 : 0); i < 3; ++i)
5176 176071 : if (TREE_CODE (rhs[i]) == SSA_NAME)
5177 : {
5178 137513 : gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
5179 137513 : im = uaddc_ne0 (im);
5180 137513 : if (uaddc_is_cplxpart (im, IMAGPART_EXPR))
5181 : {
5182 : /* We found one of the 3 addends or 2 subtrahends to be
5183 : __imag__ of something, verify it is .UADDC/.USUBC. */
5184 215 : tree rhs1 = gimple_assign_rhs1 (im);
5185 215 : gimple *ovf = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs1, 0));
5186 215 : tree ovf_lhs = NULL_TREE;
5187 215 : tree ovf_arg1 = NULL_TREE, ovf_arg2 = NULL_TREE;
5188 235 : if (gimple_call_internal_p (ovf, code == PLUS_EXPR
5189 : ? IFN_ADD_OVERFLOW
5190 : : IFN_SUB_OVERFLOW))
5191 : {
5192 : /* Or verify it is .ADD_OVERFLOW/.SUB_OVERFLOW.
5193 : This is for the case of 2 chained .UADDC/.USUBC,
5194 : where the first one uses 0 carry-in and the second
5195 : one ignores the carry-out.
5196 : So, something like:
5197 : _16 = .ADD_OVERFLOW (_1, _2);
5198 : _17 = REALPART_EXPR <_16>;
5199 : _18 = IMAGPART_EXPR <_16>;
5200 : _15 = _3 + _4;
5201 : _12 = _15 + _18;
5202 : where the first 3 statements come from the lower
5203 : limb addition and the last 2 from the higher limb
5204 : which ignores carry-out. */
5205 197 : ovf_lhs = gimple_call_lhs (ovf);
5206 197 : tree ovf_lhs_type = TREE_TYPE (TREE_TYPE (ovf_lhs));
5207 197 : ovf_arg1 = gimple_call_arg (ovf, 0);
5208 197 : ovf_arg2 = gimple_call_arg (ovf, 1);
5209 : /* In that case we need to punt if the types don't
5210 : mismatch. */
5211 197 : if (!types_compatible_p (type, ovf_lhs_type)
5212 197 : || !types_compatible_p (type, TREE_TYPE (ovf_arg1))
5213 391 : || !types_compatible_p (type,
5214 194 : TREE_TYPE (ovf_arg2)))
5215 : ovf_lhs = NULL_TREE;
5216 : else
5217 : {
5218 479 : for (int i = (code == PLUS_EXPR ? 1 : 0);
5219 479 : i >= 0; --i)
5220 : {
5221 339 : tree r = gimple_call_arg (ovf, i);
5222 339 : if (TREE_CODE (r) != SSA_NAME)
5223 0 : continue;
5224 339 : if (uaddc_is_cplxpart (SSA_NAME_DEF_STMT (r),
5225 : REALPART_EXPR))
5226 : {
5227 : /* Punt if one of the args which isn't
5228 : subtracted isn't __real__; that could
5229 : then prevent better match later.
5230 : Consider:
5231 : _3 = .ADD_OVERFLOW (_1, _2);
5232 : _4 = REALPART_EXPR <_3>;
5233 : _5 = IMAGPART_EXPR <_3>;
5234 : _7 = .ADD_OVERFLOW (_4, _6);
5235 : _8 = REALPART_EXPR <_7>;
5236 : _9 = IMAGPART_EXPR <_7>;
5237 : _12 = _10 + _11;
5238 : _13 = _12 + _9;
5239 : _14 = _13 + _5;
5240 : We want to match this when called on
5241 : the last stmt as a pair of .UADDC calls,
5242 : but without this check we could turn
5243 : that prematurely on _13 = _12 + _9;
5244 : stmt into .UADDC with 0 carry-in just
5245 : on the second .ADD_OVERFLOW call and
5246 : another replacing the _12 and _13
5247 : additions. */
5248 : ovf_lhs = NULL_TREE;
5249 : break;
5250 : }
5251 : }
5252 : }
5253 190 : if (ovf_lhs)
5254 : {
5255 140 : use_operand_p use_p;
5256 140 : imm_use_iterator iter;
5257 140 : tree re_lhs = NULL_TREE;
5258 560 : FOR_EACH_IMM_USE_FAST (use_p, iter, ovf_lhs)
5259 : {
5260 280 : gimple *use_stmt = USE_STMT (use_p);
5261 280 : if (is_gimple_debug (use_stmt))
5262 0 : continue;
5263 280 : if (use_stmt == im)
5264 140 : continue;
5265 140 : if (!uaddc_is_cplxpart (use_stmt,
5266 : REALPART_EXPR))
5267 : {
5268 : ovf_lhs = NULL_TREE;
5269 : break;
5270 : }
5271 140 : re_lhs = gimple_assign_lhs (use_stmt);
5272 140 : }
5273 140 : if (ovf_lhs && re_lhs)
5274 : {
5275 502 : FOR_EACH_IMM_USE_FAST (use_p, iter, re_lhs)
5276 : {
5277 281 : gimple *use_stmt = USE_STMT (use_p);
5278 281 : if (is_gimple_debug (use_stmt))
5279 102 : continue;
5280 179 : internal_fn ifn
5281 179 : = gimple_call_internal_fn (ovf);
5282 : /* Punt if the __real__ of lhs is used
5283 : in the same .*_OVERFLOW call.
5284 : Consider:
5285 : _3 = .ADD_OVERFLOW (_1, _2);
5286 : _4 = REALPART_EXPR <_3>;
5287 : _5 = IMAGPART_EXPR <_3>;
5288 : _7 = .ADD_OVERFLOW (_4, _6);
5289 : _8 = REALPART_EXPR <_7>;
5290 : _9 = IMAGPART_EXPR <_7>;
5291 : _12 = _10 + _11;
5292 : _13 = _12 + _5;
5293 : _14 = _13 + _9;
5294 : We want to match this when called on
5295 : the last stmt as a pair of .UADDC calls,
5296 : but without this check we could turn
5297 : that prematurely on _13 = _12 + _5;
5298 : stmt into .UADDC with 0 carry-in just
5299 : on the first .ADD_OVERFLOW call and
5300 : another replacing the _12 and _13
5301 : additions. */
5302 179 : if (gimple_call_internal_p (use_stmt, ifn))
5303 : {
5304 : ovf_lhs = NULL_TREE;
5305 : break;
5306 : }
5307 140 : }
5308 : }
5309 : }
5310 : }
5311 140 : if ((ovf_lhs
5312 143 : || gimple_call_internal_p (ovf,
5313 : code == PLUS_EXPR
5314 : ? IFN_UADDC : IFN_USUBC))
5315 241 : && (optab_handler (code == PLUS_EXPR
5316 : ? uaddc5_optab : usubc5_optab,
5317 87 : TYPE_MODE (type))
5318 : != CODE_FOR_nothing))
5319 : {
5320 : /* And in that case build another .UADDC/.USUBC
5321 : call for the most significand limb addition.
5322 : Overflow bit is ignored here. */
5323 63 : if (i != 2)
5324 63 : std::swap (rhs[i], rhs[2]);
5325 63 : gimple *g
5326 77 : = gimple_build_call_internal (code == PLUS_EXPR
5327 : ? IFN_UADDC
5328 : : IFN_USUBC,
5329 : 3, rhs[0], rhs[1],
5330 : rhs[2]);
5331 63 : tree nlhs = make_ssa_name (build_complex_type (type));
5332 63 : gimple_call_set_lhs (g, nlhs);
5333 63 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5334 63 : tree ilhs = gimple_assign_lhs (stmt);
5335 63 : g = gimple_build_assign (ilhs, REALPART_EXPR,
5336 : build1 (REALPART_EXPR,
5337 63 : TREE_TYPE (ilhs),
5338 : nlhs));
5339 63 : gsi_replace (gsi, g, true);
5340 : /* And if it is initialized from result of __imag__
5341 : of .{ADD,SUB}_OVERFLOW call, replace that
5342 : call with .U{ADD,SUB}C call with the same arguments,
5343 : just 0 added as third argument. This isn't strictly
5344 : necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5345 : produce the same result, but may result in better
5346 : generated code on some targets where the backend can
5347 : better prepare in how the result will be used. */
5348 63 : if (ovf_lhs)
5349 : {
5350 57 : tree zero = build_zero_cst (type);
5351 57 : g = gimple_build_call_internal (code == PLUS_EXPR
5352 : ? IFN_UADDC
5353 : : IFN_USUBC,
5354 : 3, ovf_arg1,
5355 : ovf_arg2, zero);
5356 57 : gimple_call_set_lhs (g, ovf_lhs);
5357 57 : gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf);
5358 57 : gsi_replace (&gsi2, g, true);
5359 : }
5360 63 : return true;
5361 : }
5362 : }
5363 : }
5364 : return false;
5365 : }
5366 1418730 : if (code == MINUS_EXPR && !rhs[2])
5367 : return false;
5368 285 : if (code == MINUS_EXPR)
5369 : /* Code below expects rhs[0] and rhs[1] to have the IMAGPART_EXPRs.
5370 : So, for MINUS_EXPR swap the single added rhs operand (others are
5371 : subtracted) to rhs[3]. */
5372 285 : std::swap (rhs[0], rhs[3]);
5373 : }
5374 : /* Walk from both operands of STMT (for +/- even sometimes from
5375 : all the 4 addends or 3 subtrahends), see through casts and != 0
5376 : statements which would preserve [0, 1] range of values and
5377 : check which is initialized from __imag__. */
5378 7359362 : gimple *im1 = NULL, *im2 = NULL;
5379 14717586 : for (int i = 0; i < (code == MINUS_EXPR ? 3 : 4); i++)
5380 5887596 : if (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME)
5381 : {
5382 1926875 : gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
5383 1926875 : im = uaddc_ne0 (im);
5384 1926875 : if (uaddc_is_cplxpart (im, IMAGPART_EXPR))
5385 : {
5386 1604 : if (im1 == NULL)
5387 : {
5388 1211 : im1 = im;
5389 1211 : if (i != 0)
5390 330 : std::swap (rhs[0], rhs[i]);
5391 : }
5392 : else
5393 : {
5394 393 : im2 = im;
5395 393 : if (i != 1)
5396 23 : std::swap (rhs[1], rhs[i]);
5397 : break;
5398 : }
5399 : }
5400 : }
5401 : /* If we don't find at least two, punt. */
5402 1472159 : if (!im2)
5403 : return false;
5404 : /* Check they are __imag__ of .ADD_OVERFLOW or .SUB_OVERFLOW call results,
5405 : either both .ADD_OVERFLOW or both .SUB_OVERFLOW and that we have
5406 : uaddc5/usubc5 named pattern for the corresponding mode. */
5407 393 : gimple *ovf1
5408 393 : = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im1), 0));
5409 393 : gimple *ovf2
5410 393 : = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im2), 0));
5411 393 : internal_fn ifn;
5412 393 : if (!is_gimple_call (ovf1)
5413 393 : || !gimple_call_internal_p (ovf1)
5414 393 : || ((ifn = gimple_call_internal_fn (ovf1)) != IFN_ADD_OVERFLOW
5415 60 : && ifn != IFN_SUB_OVERFLOW)
5416 370 : || !gimple_call_internal_p (ovf2, ifn)
5417 399 : || optab_handler (ifn == IFN_ADD_OVERFLOW ? uaddc5_optab : usubc5_optab,
5418 366 : TYPE_MODE (type)) == CODE_FOR_nothing
5419 94 : || (rhs[2]
5420 17 : && optab_handler (code == PLUS_EXPR ? uaddc5_optab : usubc5_optab,
5421 15 : TYPE_MODE (type)) == CODE_FOR_nothing)
5422 94 : || !types_compatible_p (type,
5423 94 : TREE_TYPE (TREE_TYPE (gimple_call_lhs (ovf1))))
5424 486 : || !types_compatible_p (type,
5425 93 : TREE_TYPE (TREE_TYPE (gimple_call_lhs (ovf2)))))
5426 300 : return false;
5427 93 : tree arg1, arg2, arg3 = NULL_TREE;
5428 93 : gimple *re1 = NULL, *re2 = NULL;
5429 : /* On one of the two calls, one of the .ADD_OVERFLOW/.SUB_OVERFLOW arguments
5430 : should be initialized from __real__ of the other of the two calls.
5431 : Though, for .SUB_OVERFLOW, it has to be the first argument, not the
5432 : second one. */
5433 340 : for (int i = (ifn == IFN_ADD_OVERFLOW ? 1 : 0); i >= 0; --i)
5434 349 : for (gimple *ovf = ovf1; ovf; ovf = (ovf == ovf1 ? ovf2 : NULL))
5435 : {
5436 288 : tree arg = gimple_call_arg (ovf, i);
5437 288 : if (TREE_CODE (arg) != SSA_NAME)
5438 2 : continue;
5439 286 : re1 = SSA_NAME_DEF_STMT (arg);
5440 286 : if (uaddc_is_cplxpart (re1, REALPART_EXPR)
5441 379 : && (SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (re1), 0))
5442 93 : == (ovf == ovf1 ? ovf2 : ovf1)))
5443 : {
5444 93 : if (ovf == ovf1)
5445 : {
5446 : /* Make sure ovf2 is the .*_OVERFLOW call with argument
5447 : initialized from __real__ of ovf1. */
5448 20 : std::swap (rhs[0], rhs[1]);
5449 20 : std::swap (im1, im2);
5450 20 : std::swap (ovf1, ovf2);
5451 : }
5452 93 : arg3 = gimple_call_arg (ovf, 1 - i);
5453 93 : i = -1;
5454 93 : break;
5455 : }
5456 : }
5457 93 : if (!arg3)
5458 : return false;
5459 93 : arg1 = gimple_call_arg (ovf1, 0);
5460 93 : arg2 = gimple_call_arg (ovf1, 1);
5461 93 : if (!types_compatible_p (type, TREE_TYPE (arg1)))
5462 : return false;
5463 93 : int kind[2] = { 0, 0 };
5464 93 : tree arg_im[2] = { NULL_TREE, NULL_TREE };
5465 : /* At least one of arg2 and arg3 should have type compatible
5466 : with arg1/rhs[0], and the other one should have value in [0, 1]
5467 : range. If both are in [0, 1] range and type compatible with
5468 : arg1/rhs[0], try harder to find after looking through casts,
5469 : != 0 comparisons which one is initialized to __imag__ of
5470 : .{ADD,SUB}_OVERFLOW or .U{ADD,SUB}C call results. */
5471 279 : for (int i = 0; i < 2; ++i)
5472 : {
5473 186 : tree arg = i == 0 ? arg2 : arg3;
5474 186 : if (types_compatible_p (type, TREE_TYPE (arg)))
5475 161 : kind[i] = 1;
5476 372 : if (!INTEGRAL_TYPE_P (TREE_TYPE (arg))
5477 372 : || (TYPE_PRECISION (TREE_TYPE (arg)) == 1
5478 25 : && !TYPE_UNSIGNED (TREE_TYPE (arg))))
5479 0 : continue;
5480 186 : if (tree_zero_one_valued_p (arg))
5481 51 : kind[i] |= 2;
5482 186 : if (TREE_CODE (arg) == SSA_NAME)
5483 : {
5484 184 : gimple *g = SSA_NAME_DEF_STMT (arg);
5485 184 : if (gimple_assign_cast_p (g))
5486 : {
5487 30 : tree op = gimple_assign_rhs1 (g);
5488 30 : if (TREE_CODE (op) == SSA_NAME
5489 30 : && INTEGRAL_TYPE_P (TREE_TYPE (op)))
5490 30 : g = SSA_NAME_DEF_STMT (op);
5491 : }
5492 184 : g = uaddc_ne0 (g);
5493 184 : if (!uaddc_is_cplxpart (g, IMAGPART_EXPR))
5494 124 : continue;
5495 60 : arg_im[i] = gimple_assign_lhs (g);
5496 60 : g = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g), 0));
5497 60 : if (!is_gimple_call (g) || !gimple_call_internal_p (g))
5498 0 : continue;
5499 60 : switch (gimple_call_internal_fn (g))
5500 : {
5501 60 : case IFN_ADD_OVERFLOW:
5502 60 : case IFN_SUB_OVERFLOW:
5503 60 : case IFN_UADDC:
5504 60 : case IFN_USUBC:
5505 60 : break;
5506 0 : default:
5507 0 : continue;
5508 : }
5509 60 : kind[i] |= 4;
5510 : }
5511 : }
5512 : /* Make arg2 the one with compatible type and arg3 the one
5513 : with [0, 1] range. If both is true for both operands,
5514 : prefer as arg3 result of __imag__ of some ifn. */
5515 93 : if ((kind[0] & 1) == 0 || ((kind[1] & 1) != 0 && kind[0] > kind[1]))
5516 : {
5517 1 : std::swap (arg2, arg3);
5518 1 : std::swap (kind[0], kind[1]);
5519 1 : std::swap (arg_im[0], arg_im[1]);
5520 : }
5521 93 : if ((kind[0] & 1) == 0 || (kind[1] & 6) == 0)
5522 : return false;
5523 69 : if (!has_single_use (gimple_assign_lhs (im1))
5524 67 : || !has_single_use (gimple_assign_lhs (im2))
5525 67 : || !has_single_use (gimple_assign_lhs (re1))
5526 136 : || num_imm_uses (gimple_call_lhs (ovf1)) != 2)
5527 : return false;
5528 : /* Check that ovf2's result is used in __real__ and set re2
5529 : to that statement. */
5530 67 : use_operand_p use_p;
5531 67 : imm_use_iterator iter;
5532 67 : tree lhs = gimple_call_lhs (ovf2);
5533 267 : FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
5534 : {
5535 133 : gimple *use_stmt = USE_STMT (use_p);
5536 133 : if (is_gimple_debug (use_stmt))
5537 0 : continue;
5538 133 : if (use_stmt == im2)
5539 67 : continue;
5540 66 : if (re2)
5541 : return false;
5542 66 : if (!uaddc_is_cplxpart (use_stmt, REALPART_EXPR))
5543 : return false;
5544 : re2 = use_stmt;
5545 0 : }
5546 : /* Build .UADDC/.USUBC call which will be placed before the stmt. */
5547 67 : gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf2);
5548 67 : gimple *g;
5549 67 : if ((kind[1] & 4) != 0 && types_compatible_p (type, TREE_TYPE (arg_im[1])))
5550 : arg3 = arg_im[1];
5551 67 : if ((kind[1] & 1) == 0)
5552 : {
5553 25 : if (TREE_CODE (arg3) == INTEGER_CST)
5554 0 : arg3 = fold_convert (type, arg3);
5555 : else
5556 : {
5557 25 : g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, arg3);
5558 25 : gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5559 25 : arg3 = gimple_assign_lhs (g);
5560 : }
5561 : }
5562 89 : g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
5563 : ? IFN_UADDC : IFN_USUBC,
5564 : 3, arg1, arg2, arg3);
5565 67 : tree nlhs = make_ssa_name (TREE_TYPE (lhs));
5566 67 : gimple_call_set_lhs (g, nlhs);
5567 67 : gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5568 : /* In the case where stmt is | or ^ of two overflow flags
5569 : or addition of those, replace stmt with __imag__ of the above
5570 : added call. In case of arg1 + arg2 + (ovf1 + ovf2) or
5571 : arg1 - arg2 - (ovf1 + ovf2) just emit it before stmt. */
5572 67 : tree ilhs = rhs[2] ? make_ssa_name (type) : gimple_assign_lhs (stmt);
5573 67 : g = gimple_build_assign (ilhs, IMAGPART_EXPR,
5574 67 : build1 (IMAGPART_EXPR, TREE_TYPE (ilhs), nlhs));
5575 67 : if (rhs[2])
5576 : {
5577 15 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5578 : /* Remove some further statements which can't be kept in the IL because
5579 : they can use SSA_NAMEs whose setter is going to be removed too. */
5580 75 : for (gimple *g2 : temp_stmts)
5581 : {
5582 30 : gsi2 = gsi_for_stmt (g2);
5583 30 : gsi_remove (&gsi2, true);
5584 30 : release_defs (g2);
5585 : }
5586 : }
5587 : else
5588 52 : gsi_replace (gsi, g, true);
5589 : /* Remove some statements which can't be kept in the IL because they
5590 : use SSA_NAME whose setter is going to be removed too. */
5591 67 : tree rhs1 = rhs[1];
5592 103 : for (int i = 0; i < 2; i++)
5593 85 : if (rhs1 == gimple_assign_lhs (im2))
5594 : break;
5595 : else
5596 : {
5597 36 : g = SSA_NAME_DEF_STMT (rhs1);
5598 36 : rhs1 = gimple_assign_rhs1 (g);
5599 36 : gsi2 = gsi_for_stmt (g);
5600 36 : gsi_remove (&gsi2, true);
5601 36 : release_defs (g);
5602 : }
5603 67 : gcc_checking_assert (rhs1 == gimple_assign_lhs (im2));
5604 67 : gsi2 = gsi_for_stmt (im2);
5605 67 : gsi_remove (&gsi2, true);
5606 67 : release_defs (im2);
5607 : /* Replace the re2 statement with __real__ of the newly added
5608 : .UADDC/.USUBC call. */
5609 67 : if (re2)
5610 : {
5611 66 : gsi2 = gsi_for_stmt (re2);
5612 66 : tree rlhs = gimple_assign_lhs (re2);
5613 66 : g = gimple_build_assign (rlhs, REALPART_EXPR,
5614 66 : build1 (REALPART_EXPR, TREE_TYPE (rlhs), nlhs));
5615 66 : gsi_replace (&gsi2, g, true);
5616 : }
5617 67 : if (rhs[2])
5618 : {
5619 : /* If this is the arg1 + arg2 + (ovf1 + ovf2) or
5620 : arg1 - arg2 - (ovf1 + ovf2) case for the most significant limb,
5621 : replace stmt with __real__ of another .UADDC/.USUBC call which
5622 : handles the most significant limb. Overflow flag from this is
5623 : ignored. */
5624 17 : g = gimple_build_call_internal (code == PLUS_EXPR
5625 : ? IFN_UADDC : IFN_USUBC,
5626 : 3, rhs[3], rhs[2], ilhs);
5627 15 : nlhs = make_ssa_name (TREE_TYPE (lhs));
5628 15 : gimple_call_set_lhs (g, nlhs);
5629 15 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5630 15 : ilhs = gimple_assign_lhs (stmt);
5631 15 : g = gimple_build_assign (ilhs, REALPART_EXPR,
5632 15 : build1 (REALPART_EXPR, TREE_TYPE (ilhs), nlhs));
5633 15 : gsi_replace (gsi, g, true);
5634 : }
5635 67 : if (TREE_CODE (arg3) == SSA_NAME)
5636 : {
5637 : /* When pattern recognizing the second least significant limb
5638 : above (i.e. first pair of .{ADD,SUB}_OVERFLOW calls for one limb),
5639 : check if the [0, 1] range argument (i.e. carry in) isn't the
5640 : result of another .{ADD,SUB}_OVERFLOW call (one handling the
5641 : least significant limb). Again look through casts and != 0. */
5642 67 : gimple *im3 = SSA_NAME_DEF_STMT (arg3);
5643 92 : for (int i = 0; i < 2; ++i)
5644 : {
5645 92 : gimple *im4 = uaddc_cast (im3);
5646 92 : if (im4 == im3)
5647 : break;
5648 : else
5649 25 : im3 = im4;
5650 : }
5651 67 : im3 = uaddc_ne0 (im3);
5652 67 : if (uaddc_is_cplxpart (im3, IMAGPART_EXPR))
5653 : {
5654 60 : gimple *ovf3
5655 60 : = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im3), 0));
5656 60 : if (gimple_call_internal_p (ovf3, ifn))
5657 : {
5658 25 : lhs = gimple_call_lhs (ovf3);
5659 25 : arg1 = gimple_call_arg (ovf3, 0);
5660 25 : arg2 = gimple_call_arg (ovf3, 1);
5661 25 : if (types_compatible_p (type, TREE_TYPE (TREE_TYPE (lhs)))
5662 25 : && types_compatible_p (type, TREE_TYPE (arg1))
5663 50 : && types_compatible_p (type, TREE_TYPE (arg2)))
5664 : {
5665 : /* And if it is initialized from result of __imag__
5666 : of .{ADD,SUB}_OVERFLOW call, replace that
5667 : call with .U{ADD,SUB}C call with the same arguments,
5668 : just 0 added as third argument. This isn't strictly
5669 : necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5670 : produce the same result, but may result in better
5671 : generated code on some targets where the backend can
5672 : better prepare in how the result will be used. */
5673 25 : g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
5674 : ? IFN_UADDC : IFN_USUBC,
5675 : 3, arg1, arg2,
5676 : build_zero_cst (type));
5677 25 : gimple_call_set_lhs (g, lhs);
5678 25 : gsi2 = gsi_for_stmt (ovf3);
5679 25 : gsi_replace (&gsi2, g, true);
5680 : }
5681 : }
5682 : }
5683 : }
5684 : return true;
5685 1646949 : }
5686 :
5687 : /* Replace .POPCOUNT (x) == 1 or .POPCOUNT (x) != 1 with
5688 : (x & (x - 1)) > x - 1 or (x & (x - 1)) <= x - 1 if .POPCOUNT
5689 : isn't a direct optab. Also handle `<=`/`>` to be
5690 : `x & (x - 1) !=/== x`. */
5691 :
5692 : static void
5693 4448513 : match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt)
5694 : {
5695 4448513 : tree clhs, crhs;
5696 4448513 : enum tree_code code;
5697 4448513 : bool was_le = false;
5698 4448513 : if (gimple_code (stmt) == GIMPLE_COND)
5699 : {
5700 4161016 : clhs = gimple_cond_lhs (stmt);
5701 4161016 : crhs = gimple_cond_rhs (stmt);
5702 4161016 : code = gimple_cond_code (stmt);
5703 : }
5704 : else
5705 : {
5706 287497 : clhs = gimple_assign_rhs1 (stmt);
5707 287497 : crhs = gimple_assign_rhs2 (stmt);
5708 287497 : code = gimple_assign_rhs_code (stmt);
5709 : }
5710 4448513 : if (code != LE_EXPR && code != GT_EXPR
5711 4448513 : && code != EQ_EXPR && code != NE_EXPR)
5712 4448507 : return;
5713 2079188 : if (code == LE_EXPR || code == GT_EXPR)
5714 4186141 : was_le = true;
5715 4186141 : if (TREE_CODE (clhs) != SSA_NAME || !integer_onep (crhs))
5716 4027986 : return;
5717 158155 : gimple *call = SSA_NAME_DEF_STMT (clhs);
5718 158155 : combined_fn cfn = gimple_call_combined_fn (call);
5719 158155 : switch (cfn)
5720 : {
5721 15 : CASE_CFN_POPCOUNT:
5722 15 : break;
5723 : default:
5724 : return;
5725 : }
5726 15 : if (!has_single_use (clhs))
5727 : return;
5728 14 : tree arg = gimple_call_arg (call, 0);
5729 14 : tree type = TREE_TYPE (arg);
5730 14 : if (!INTEGRAL_TYPE_P (type))
5731 : return;
5732 14 : bool nonzero_arg = tree_expr_nonzero_p (arg);
5733 14 : if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, OPTIMIZE_FOR_BOTH))
5734 : {
5735 : /* Tell expand_POPCOUNT the popcount result is only used in equality
5736 : comparison with one, so that it can decide based on rtx costs. */
5737 16 : gimple *g = gimple_build_call_internal (IFN_POPCOUNT, 2, arg,
5738 : was_le ? integer_minus_one_node
5739 8 : : (nonzero_arg ? integer_zero_node
5740 : : integer_one_node));
5741 8 : gimple_call_set_lhs (g, gimple_call_lhs (call));
5742 8 : gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
5743 8 : gsi_replace (&gsi2, g, true);
5744 8 : return;
5745 : }
5746 6 : tree argm1 = make_ssa_name (type);
5747 6 : gimple *g = gimple_build_assign (argm1, PLUS_EXPR, arg,
5748 : build_int_cst (type, -1));
5749 6 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5750 6 : g = gimple_build_assign (make_ssa_name (type),
5751 6 : (nonzero_arg || was_le) ? BIT_AND_EXPR : BIT_XOR_EXPR,
5752 : arg, argm1);
5753 6 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5754 6 : tree_code cmpcode;
5755 6 : if (was_le)
5756 : {
5757 0 : argm1 = build_zero_cst (type);
5758 0 : cmpcode = code == LE_EXPR ? EQ_EXPR : NE_EXPR;
5759 : }
5760 6 : else if (nonzero_arg)
5761 : {
5762 2 : argm1 = build_zero_cst (type);
5763 2 : cmpcode = code;
5764 : }
5765 : else
5766 4 : cmpcode = code == EQ_EXPR ? GT_EXPR : LE_EXPR;
5767 6 : if (gcond *cond = dyn_cast <gcond *> (stmt))
5768 : {
5769 2 : gimple_cond_set_lhs (cond, gimple_assign_lhs (g));
5770 2 : gimple_cond_set_rhs (cond, argm1);
5771 2 : gimple_cond_set_code (cond, cmpcode);
5772 : }
5773 : else
5774 : {
5775 4 : gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (g));
5776 4 : gimple_assign_set_rhs2 (stmt, argm1);
5777 4 : gimple_assign_set_rhs_code (stmt, cmpcode);
5778 : }
5779 6 : update_stmt (stmt);
5780 6 : gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
5781 6 : gsi_remove (&gsi2, true);
5782 6 : release_defs (call);
5783 : }
5784 :
5785 : /* Return true if target has support for divmod. */
5786 :
5787 : static bool
5788 28888 : target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
5789 : {
5790 : /* If target supports hardware divmod insn, use it for divmod. */
5791 28888 : if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
5792 : return true;
5793 :
5794 : /* Check if libfunc for divmod is available. */
5795 2596 : rtx libfunc = optab_libfunc (divmod_optab, mode);
5796 2596 : if (libfunc != NULL_RTX)
5797 : {
5798 : /* If optab_handler exists for div_optab, perhaps in a wider mode,
5799 : we don't want to use the libfunc even if it exists for given mode. */
5800 : machine_mode div_mode;
5801 10814 : FOR_EACH_MODE_FROM (div_mode, mode)
5802 8218 : if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
5803 : return false;
5804 :
5805 2596 : return targetm.expand_divmod_libfunc != NULL;
5806 : }
5807 :
5808 : return false;
5809 : }
5810 :
5811 : /* Check if stmt is candidate for divmod transform. */
5812 :
5813 : static bool
5814 47725 : divmod_candidate_p (gassign *stmt)
5815 : {
5816 47725 : tree type = TREE_TYPE (gimple_assign_lhs (stmt));
5817 47725 : machine_mode mode = TYPE_MODE (type);
5818 47725 : optab divmod_optab, div_optab;
5819 :
5820 47725 : if (TYPE_UNSIGNED (type))
5821 : {
5822 : divmod_optab = udivmod_optab;
5823 : div_optab = udiv_optab;
5824 : }
5825 : else
5826 : {
5827 20111 : divmod_optab = sdivmod_optab;
5828 20111 : div_optab = sdiv_optab;
5829 : }
5830 :
5831 47725 : tree op1 = gimple_assign_rhs1 (stmt);
5832 47725 : tree op2 = gimple_assign_rhs2 (stmt);
5833 :
5834 : /* Disable the transform if either is a constant, since division-by-constant
5835 : may have specialized expansion. */
5836 47725 : if (CONSTANT_CLASS_P (op1))
5837 : return false;
5838 :
5839 43954 : if (CONSTANT_CLASS_P (op2))
5840 : {
5841 17322 : if (integer_pow2p (op2))
5842 : return false;
5843 :
5844 15176 : if (element_precision (type) <= HOST_BITS_PER_WIDE_INT
5845 16258 : && element_precision (type) <= BITS_PER_WORD)
5846 : return false;
5847 :
5848 : /* If the divisor is not power of 2 and the precision wider than
5849 : HWI, expand_divmod punts on that, so in that case it is better
5850 : to use divmod optab or libfunc. Similarly if choose_multiplier
5851 : might need pre/post shifts of BITS_PER_WORD or more. */
5852 : }
5853 :
5854 : /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
5855 : expand using the [su]divv optabs. */
5856 28888 : if (TYPE_OVERFLOW_TRAPS (type))
5857 : return false;
5858 :
5859 28888 : if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
5860 : return false;
5861 :
5862 : return true;
5863 : }
5864 :
5865 : /* This function looks for:
5866 : t1 = a TRUNC_DIV_EXPR b;
5867 : t2 = a TRUNC_MOD_EXPR b;
5868 : and transforms it to the following sequence:
5869 : complex_tmp = DIVMOD (a, b);
5870 : t1 = REALPART_EXPR(a);
5871 : t2 = IMAGPART_EXPR(b);
5872 : For conditions enabling the transform see divmod_candidate_p().
5873 :
5874 : The pass has three parts:
5875 : 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
5876 : other trunc_div_expr and trunc_mod_expr stmts.
5877 : 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
5878 : to stmts vector.
5879 : 3) Insert DIVMOD call just before top_stmt and update entries in
5880 : stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
5881 : IMAGPART_EXPR for mod). */
5882 :
5883 : static bool
5884 47744 : convert_to_divmod (gassign *stmt)
5885 : {
5886 47744 : if (stmt_can_throw_internal (cfun, stmt)
5887 47744 : || !divmod_candidate_p (stmt))
5888 18856 : return false;
5889 :
5890 28888 : tree op1 = gimple_assign_rhs1 (stmt);
5891 28888 : tree op2 = gimple_assign_rhs2 (stmt);
5892 :
5893 28888 : imm_use_iterator use_iter;
5894 28888 : gimple *use_stmt;
5895 28888 : auto_vec<gimple *> stmts;
5896 :
5897 28888 : gimple *top_stmt = stmt;
5898 28888 : basic_block top_bb = gimple_bb (stmt);
5899 :
5900 : /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
5901 : at-least stmt and possibly other trunc_div/trunc_mod stmts
5902 : having same operands as stmt. */
5903 :
5904 123408 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
5905 : {
5906 94520 : if (is_gimple_assign (use_stmt)
5907 58129 : && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
5908 46334 : || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
5909 48115 : && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
5910 142518 : && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
5911 : {
5912 40587 : if (stmt_can_throw_internal (cfun, use_stmt))
5913 0 : continue;
5914 :
5915 40587 : basic_block bb = gimple_bb (use_stmt);
5916 :
5917 40587 : if (bb == top_bb)
5918 : {
5919 39861 : if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
5920 5154 : top_stmt = use_stmt;
5921 : }
5922 726 : else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
5923 : {
5924 193 : top_bb = bb;
5925 193 : top_stmt = use_stmt;
5926 : }
5927 : }
5928 28888 : }
5929 :
5930 28888 : tree top_op1 = gimple_assign_rhs1 (top_stmt);
5931 28888 : tree top_op2 = gimple_assign_rhs2 (top_stmt);
5932 :
5933 28888 : stmts.safe_push (top_stmt);
5934 28888 : bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
5935 :
5936 : /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
5937 : to stmts vector. The 2nd loop will always add stmt to stmts vector, since
5938 : gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
5939 : 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
5940 :
5941 123408 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
5942 : {
5943 94520 : if (is_gimple_assign (use_stmt)
5944 58129 : && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
5945 46334 : || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
5946 48115 : && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
5947 142518 : && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
5948 : {
5949 69571 : if (use_stmt == top_stmt
5950 11699 : || stmt_can_throw_internal (cfun, use_stmt)
5951 52286 : || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
5952 28984 : continue;
5953 :
5954 11603 : stmts.safe_push (use_stmt);
5955 11603 : if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
5956 94520 : div_seen = true;
5957 : }
5958 28888 : }
5959 :
5960 28888 : if (!div_seen)
5961 : return false;
5962 :
5963 : /* Part 3: Create libcall to internal fn DIVMOD:
5964 : divmod_tmp = DIVMOD (op1, op2). */
5965 :
5966 11577 : gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
5967 11577 : tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
5968 : call_stmt, "divmod_tmp");
5969 11577 : gimple_call_set_lhs (call_stmt, res);
5970 : /* We rejected throwing statements above. */
5971 11577 : gimple_call_set_nothrow (call_stmt, true);
5972 :
5973 : /* Insert the call before top_stmt. */
5974 11577 : gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
5975 11577 : gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
5976 :
5977 11577 : widen_mul_stats.divmod_calls_inserted++;
5978 :
5979 : /* Update all statements in stmts vector:
5980 : lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
5981 : lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
5982 :
5983 63643 : for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
5984 : {
5985 23178 : tree new_rhs;
5986 :
5987 23178 : switch (gimple_assign_rhs_code (use_stmt))
5988 : {
5989 11587 : case TRUNC_DIV_EXPR:
5990 11587 : new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
5991 11587 : break;
5992 :
5993 11591 : case TRUNC_MOD_EXPR:
5994 11591 : new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
5995 11591 : break;
5996 :
5997 0 : default:
5998 0 : gcc_unreachable ();
5999 : }
6000 :
6001 23178 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
6002 23178 : gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
6003 23178 : update_stmt (use_stmt);
6004 : }
6005 :
6006 : return true;
6007 28888 : }
6008 :
6009 : /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
6010 : its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
6011 : value is true iff we converted the statement. */
6012 :
6013 : static bool
6014 168128 : convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
6015 : {
6016 168128 : tree lhs = gimple_assign_lhs (stmt);
6017 168128 : tree stype = TREE_TYPE (lhs);
6018 168128 : tree sarg0 = gimple_assign_rhs1 (stmt);
6019 168128 : tree sarg1 = gimple_assign_rhs2 (stmt);
6020 :
6021 168128 : if (TREE_CODE (stype) != INTEGER_TYPE
6022 161418 : || TREE_CODE (sarg1) != INTEGER_CST
6023 146333 : || TREE_CODE (sarg0) != SSA_NAME
6024 146332 : || !tree_fits_uhwi_p (sarg1)
6025 314460 : || !has_single_use (sarg0))
6026 : return false;
6027 :
6028 40883 : gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
6029 37899 : if (!def)
6030 : return false;
6031 :
6032 37899 : enum tree_code mcode = gimple_assign_rhs_code (def);
6033 37899 : if (mcode == NOP_EXPR)
6034 : {
6035 5883 : tree tmp = gimple_assign_rhs1 (def);
6036 5883 : if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
6037 : return false;
6038 169026 : def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
6039 1740 : if (!def)
6040 : return false;
6041 1740 : mcode = gimple_assign_rhs_code (def);
6042 : }
6043 :
6044 33756 : if (mcode != WIDEN_MULT_EXPR
6045 33756 : || gimple_bb (def) != gimple_bb (stmt))
6046 : return false;
6047 844 : tree mtype = TREE_TYPE (gimple_assign_lhs (def));
6048 844 : if (TREE_CODE (mtype) != INTEGER_TYPE
6049 844 : || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
6050 : return false;
6051 :
6052 844 : tree mop1 = gimple_assign_rhs1 (def);
6053 844 : tree mop2 = gimple_assign_rhs2 (def);
6054 844 : tree optype = TREE_TYPE (mop1);
6055 844 : bool unsignedp = TYPE_UNSIGNED (optype);
6056 844 : unsigned int prec = TYPE_PRECISION (optype);
6057 :
6058 844 : if (unsignedp != TYPE_UNSIGNED (mtype)
6059 844 : || TYPE_PRECISION (mtype) != 2 * prec)
6060 : return false;
6061 :
6062 844 : unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
6063 844 : if (bits < prec || bits >= 2 * prec)
6064 : return false;
6065 :
6066 : /* For the time being, require operands to have the same sign. */
6067 842 : if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
6068 : return false;
6069 :
6070 842 : machine_mode mode = TYPE_MODE (optype);
6071 842 : optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
6072 842 : if (optab_handler (tab, mode) == CODE_FOR_nothing)
6073 : return false;
6074 :
6075 842 : location_t loc = gimple_location (stmt);
6076 842 : tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
6077 : MULT_HIGHPART_EXPR, mop1, mop2);
6078 842 : tree highpart2 = highpart1;
6079 842 : tree ntype = optype;
6080 :
6081 842 : if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
6082 : {
6083 16 : ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
6084 7 : : signed_type_for (optype);
6085 16 : highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
6086 : }
6087 842 : if (bits > prec)
6088 29 : highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
6089 : RSHIFT_EXPR, highpart2,
6090 29 : build_int_cst (ntype, bits - prec));
6091 :
6092 842 : gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
6093 842 : gsi_replace (gsi, new_stmt, true);
6094 :
6095 842 : widen_mul_stats.highpart_mults_inserted++;
6096 842 : return true;
6097 : }
6098 :
6099 : /* If target has spaceship<MODE>3 expander, pattern recognize
6100 : <bb 2> [local count: 1073741824]:
6101 : if (a_2(D) == b_3(D))
6102 : goto <bb 6>; [34.00%]
6103 : else
6104 : goto <bb 3>; [66.00%]
6105 :
6106 : <bb 3> [local count: 708669601]:
6107 : if (a_2(D) < b_3(D))
6108 : goto <bb 6>; [1.04%]
6109 : else
6110 : goto <bb 4>; [98.96%]
6111 :
6112 : <bb 4> [local count: 701299439]:
6113 : if (a_2(D) > b_3(D))
6114 : goto <bb 5>; [48.89%]
6115 : else
6116 : goto <bb 6>; [51.11%]
6117 :
6118 : <bb 5> [local count: 342865295]:
6119 :
6120 : <bb 6> [local count: 1073741824]:
6121 : and turn it into:
6122 : <bb 2> [local count: 1073741824]:
6123 : _1 = .SPACESHIP (a_2(D), b_3(D), 0);
6124 : if (_1 == 0)
6125 : goto <bb 6>; [34.00%]
6126 : else
6127 : goto <bb 3>; [66.00%]
6128 :
6129 : <bb 3> [local count: 708669601]:
6130 : if (_1 == -1)
6131 : goto <bb 6>; [1.04%]
6132 : else
6133 : goto <bb 4>; [98.96%]
6134 :
6135 : <bb 4> [local count: 701299439]:
6136 : if (_1 == 1)
6137 : goto <bb 5>; [48.89%]
6138 : else
6139 : goto <bb 6>; [51.11%]
6140 :
6141 : <bb 5> [local count: 342865295]:
6142 :
6143 : <bb 6> [local count: 1073741824]:
6144 : so that the backend can emit optimal comparison and
6145 : conditional jump sequence. If the
6146 : <bb 6> [local count: 1073741824]:
6147 : above has a single PHI like:
6148 : # _27 = PHI<0(2), -1(3), -128(4), 1(5)>
6149 : then replace it with effectively
6150 : _1 = .SPACESHIP (a_2(D), b_3(D), -128);
6151 : _27 = _1; */
6152 :
6153 : static void
6154 4161016 : optimize_spaceship (gcond *stmt)
6155 : {
6156 4161016 : enum tree_code code = gimple_cond_code (stmt);
6157 4161016 : if (code != EQ_EXPR && code != NE_EXPR)
6158 4160824 : return;
6159 3367023 : tree arg1 = gimple_cond_lhs (stmt);
6160 3367023 : tree arg2 = gimple_cond_rhs (stmt);
6161 3367023 : if ((!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
6162 3256781 : && !INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
6163 2607949 : || optab_handler (spaceship_optab,
6164 2607949 : TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
6165 5933754 : || operand_equal_p (arg1, arg2, 0))
6166 801611 : return;
6167 :
6168 2565412 : basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL;
6169 2565412 : edge em1 = NULL, e1 = NULL, e2 = NULL;
6170 2565412 : bb1 = EDGE_SUCC (bb0, 1)->dest;
6171 2565412 : if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
6172 1546278 : bb1 = EDGE_SUCC (bb0, 0)->dest;
6173 :
6174 7644219 : gcond *g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb1));
6175 1123157 : if (g == NULL
6176 1123157 : || !single_pred_p (bb1)
6177 707570 : || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
6178 589207 : ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
6179 470844 : : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
6180 1672 : || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
6181 599180 : || !cond_only_block_p (bb1))
6182 2556131 : return;
6183 :
6184 9281 : enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
6185 9281 : ? LT_EXPR : GT_EXPR);
6186 9281 : switch (gimple_cond_code (g))
6187 : {
6188 : case LT_EXPR:
6189 : case LE_EXPR:
6190 : break;
6191 7813 : case GT_EXPR:
6192 7813 : case GE_EXPR:
6193 7813 : ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
6194 : break;
6195 : default:
6196 : return;
6197 : }
6198 :
6199 27753 : for (int i = 0; i < 2; ++i)
6200 : {
6201 : /* With NaNs, </<=/>/>= are false, so we need to look for the
6202 : third comparison on the false edge from whatever non-equality
6203 : comparison the second comparison is. */
6204 18556 : if (HONOR_NANS (TREE_TYPE (arg1))
6205 18556 : && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
6206 199 : continue;
6207 :
6208 18357 : bb2 = EDGE_SUCC (bb1, i)->dest;
6209 54746 : g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb2));
6210 12571 : if (g == NULL
6211 12571 : || !single_pred_p (bb2)
6212 17611 : || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
6213 9924 : ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
6214 2237 : : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
6215 11 : || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
6216 85 : || !cond_only_block_p (bb2)
6217 10009 : || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
6218 18272 : continue;
6219 :
6220 85 : enum tree_code ccode2
6221 85 : = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
6222 85 : switch (gimple_cond_code (g))
6223 : {
6224 : case LT_EXPR:
6225 : case LE_EXPR:
6226 : break;
6227 55 : case GT_EXPR:
6228 55 : case GE_EXPR:
6229 55 : ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
6230 : break;
6231 1 : default:
6232 1 : continue;
6233 : }
6234 84 : if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
6235 0 : continue;
6236 :
6237 168 : if ((ccode == LT_EXPR)
6238 84 : ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
6239 : {
6240 55 : em1 = EDGE_SUCC (bb1, 1 - i);
6241 55 : e1 = EDGE_SUCC (bb2, 0);
6242 55 : e2 = EDGE_SUCC (bb2, 1);
6243 55 : if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
6244 0 : std::swap (e1, e2);
6245 : }
6246 : else
6247 : {
6248 29 : e1 = EDGE_SUCC (bb1, 1 - i);
6249 29 : em1 = EDGE_SUCC (bb2, 0);
6250 29 : e2 = EDGE_SUCC (bb2, 1);
6251 29 : if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
6252 : std::swap (em1, e2);
6253 : }
6254 : break;
6255 : }
6256 :
6257 9252 : if (em1 == NULL)
6258 : {
6259 18394 : if ((ccode == LT_EXPR)
6260 9197 : ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
6261 : {
6262 3131 : em1 = EDGE_SUCC (bb1, 1);
6263 3131 : e1 = EDGE_SUCC (bb1, 0);
6264 3131 : e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
6265 : }
6266 : else
6267 : {
6268 6066 : em1 = EDGE_SUCC (bb1, 0);
6269 6066 : e1 = EDGE_SUCC (bb1, 1);
6270 6066 : e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
6271 : }
6272 : }
6273 :
6274 : /* Check if there is a single bb into which all failed conditions
6275 : jump to (perhaps through an empty block) and if it results in
6276 : a single integral PHI which just sets it to -1, 0, 1, X
6277 : (or -1, 0, 1 when NaNs can't happen). In that case use 1 rather
6278 : than 0 as last .SPACESHIP argument to tell backends it might
6279 : consider different code generation and just cast the result
6280 : of .SPACESHIP to the PHI result. X above is some value
6281 : other than -1, 0, 1, for libstdc++ -128, for libc++ -127. */
6282 9281 : tree arg3 = integer_zero_node;
6283 9281 : edge e = EDGE_SUCC (bb0, 0);
6284 9281 : if (e->dest == bb1)
6285 6616 : e = EDGE_SUCC (bb0, 1);
6286 9281 : basic_block bbp = e->dest;
6287 9281 : gphi *phi = NULL;
6288 9281 : for (gphi_iterator psi = gsi_start_phis (bbp);
6289 11260 : !gsi_end_p (psi); gsi_next (&psi))
6290 : {
6291 3596 : gphi *gp = psi.phi ();
6292 3596 : tree res = gimple_phi_result (gp);
6293 :
6294 3596 : if (phi != NULL
6295 3237 : || virtual_operand_p (res)
6296 2258 : || !INTEGRAL_TYPE_P (TREE_TYPE (res))
6297 5722 : || TYPE_PRECISION (TREE_TYPE (res)) < 2)
6298 : {
6299 : phi = NULL;
6300 : break;
6301 : }
6302 1979 : phi = gp;
6303 : }
6304 9281 : if (phi
6305 1620 : && integer_zerop (gimple_phi_arg_def_from_edge (phi, e))
6306 9809 : && EDGE_COUNT (bbp->preds) == (HONOR_NANS (TREE_TYPE (arg1)) ? 4 : 3))
6307 : {
6308 107 : HOST_WIDE_INT argval
6309 107 : = SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) ? -128 : -1;
6310 630 : for (unsigned i = 0; phi && i < EDGE_COUNT (bbp->preds) - 1; ++i)
6311 : {
6312 228 : edge e3 = i == 0 ? e1 : i == 1 ? em1 : e2;
6313 228 : if (e3->dest != bbp)
6314 : {
6315 106 : if (!empty_block_p (e3->dest)
6316 96 : || !single_succ_p (e3->dest)
6317 202 : || single_succ (e3->dest) != bbp)
6318 : {
6319 : phi = NULL;
6320 : break;
6321 : }
6322 : e3 = single_succ_edge (e3->dest);
6323 : }
6324 218 : tree a = gimple_phi_arg_def_from_edge (phi, e3);
6325 218 : if (TREE_CODE (a) != INTEGER_CST
6326 218 : || (i == 0 && !integer_onep (a))
6327 430 : || (i == 1 && !integer_all_onesp (a)))
6328 : {
6329 : phi = NULL;
6330 : break;
6331 : }
6332 212 : if (i == 2)
6333 : {
6334 30 : tree minv = TYPE_MIN_VALUE (signed_char_type_node);
6335 30 : tree maxv = TYPE_MAX_VALUE (signed_char_type_node);
6336 30 : widest_int w = widest_int::from (wi::to_wide (a), SIGNED);
6337 41 : if ((w >= -1 && w <= 1)
6338 26 : || w < wi::to_widest (minv)
6339 56 : || w > wi::to_widest (maxv))
6340 : {
6341 4 : phi = NULL;
6342 4 : break;
6343 : }
6344 26 : argval = w.to_shwi ();
6345 26 : }
6346 : }
6347 107 : if (phi)
6348 87 : arg3 = build_int_cst (integer_type_node,
6349 103 : TYPE_UNSIGNED (TREE_TYPE (arg1)) ? 1 : argval);
6350 : }
6351 :
6352 : /* For integral <=> comparisons only use .SPACESHIP if it is turned
6353 : into an integer (-1, 0, 1). */
6354 9281 : if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) && arg3 == integer_zero_node)
6355 : return;
6356 :
6357 279 : gcall *gc = gimple_build_call_internal (IFN_SPACESHIP, 3, arg1, arg2, arg3);
6358 279 : tree lhs = make_ssa_name (integer_type_node);
6359 279 : gimple_call_set_lhs (gc, lhs);
6360 279 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6361 279 : gsi_insert_before (&gsi, gc, GSI_SAME_STMT);
6362 :
6363 471 : wide_int wmin = wi::minus_one (TYPE_PRECISION (integer_type_node));
6364 471 : wide_int wmax = wi::one (TYPE_PRECISION (integer_type_node));
6365 279 : if (HONOR_NANS (TREE_TYPE (arg1)))
6366 : {
6367 199 : if (arg3 == integer_zero_node)
6368 173 : wmin = wi::shwi (-128, TYPE_PRECISION (integer_type_node));
6369 26 : else if (tree_int_cst_sgn (arg3) < 0)
6370 19 : wmin = wi::to_wide (arg3);
6371 : else
6372 7 : wmax = wi::to_wide (arg3);
6373 : }
6374 471 : int_range<1> vr (TREE_TYPE (lhs), wmin, wmax);
6375 279 : set_range_info (lhs, vr);
6376 :
6377 279 : if (arg3 != integer_zero_node)
6378 : {
6379 87 : tree type = TREE_TYPE (gimple_phi_result (phi));
6380 87 : if (!useless_type_conversion_p (type, integer_type_node))
6381 : {
6382 63 : tree tem = make_ssa_name (type);
6383 63 : gimple *gcv = gimple_build_assign (tem, NOP_EXPR, lhs);
6384 63 : gsi_insert_before (&gsi, gcv, GSI_SAME_STMT);
6385 63 : lhs = tem;
6386 : }
6387 87 : SET_PHI_ARG_DEF_ON_EDGE (phi, e, lhs);
6388 87 : gimple_cond_set_lhs (stmt, boolean_false_node);
6389 87 : gimple_cond_set_rhs (stmt, boolean_false_node);
6390 161 : gimple_cond_set_code (stmt, (e->flags & EDGE_TRUE_VALUE)
6391 : ? EQ_EXPR : NE_EXPR);
6392 87 : update_stmt (stmt);
6393 87 : return;
6394 : }
6395 :
6396 192 : gimple_cond_set_lhs (stmt, lhs);
6397 192 : gimple_cond_set_rhs (stmt, integer_zero_node);
6398 192 : update_stmt (stmt);
6399 :
6400 384 : gcond *cond = as_a <gcond *> (*gsi_last_bb (bb1));
6401 192 : gimple_cond_set_lhs (cond, lhs);
6402 192 : if (em1->src == bb1 && e2 != em1)
6403 : {
6404 113 : gimple_cond_set_rhs (cond, integer_minus_one_node);
6405 119 : gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
6406 : ? EQ_EXPR : NE_EXPR);
6407 : }
6408 : else
6409 : {
6410 79 : gcc_assert (e1->src == bb1 && e2 != e1);
6411 79 : gimple_cond_set_rhs (cond, integer_one_node);
6412 79 : gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
6413 : ? EQ_EXPR : NE_EXPR);
6414 : }
6415 192 : update_stmt (cond);
6416 :
6417 192 : if (e2 != e1 && e2 != em1)
6418 : {
6419 116 : cond = as_a <gcond *> (*gsi_last_bb (bb2));
6420 58 : gimple_cond_set_lhs (cond, lhs);
6421 58 : if (em1->src == bb2)
6422 29 : gimple_cond_set_rhs (cond, integer_minus_one_node);
6423 : else
6424 : {
6425 29 : gcc_assert (e1->src == bb2);
6426 29 : gimple_cond_set_rhs (cond, integer_one_node);
6427 : }
6428 58 : gimple_cond_set_code (cond,
6429 58 : (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
6430 58 : update_stmt (cond);
6431 : }
6432 : }
6433 :
6434 :
6435 : /* Find integer multiplications where the operands are extended from
6436 : smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
6437 : or MULT_HIGHPART_EXPR where appropriate. */
6438 :
6439 : namespace {
6440 :
6441 : const pass_data pass_data_optimize_widening_mul =
6442 : {
6443 : GIMPLE_PASS, /* type */
6444 : "widening_mul", /* name */
6445 : OPTGROUP_NONE, /* optinfo_flags */
6446 : TV_TREE_WIDEN_MUL, /* tv_id */
6447 : PROP_ssa, /* properties_required */
6448 : 0, /* properties_provided */
6449 : 0, /* properties_destroyed */
6450 : 0, /* todo_flags_start */
6451 : TODO_update_ssa, /* todo_flags_finish */
6452 : };
6453 :
6454 : class pass_optimize_widening_mul : public gimple_opt_pass
6455 : {
6456 : public:
6457 285722 : pass_optimize_widening_mul (gcc::context *ctxt)
6458 571444 : : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
6459 : {}
6460 :
6461 : /* opt_pass methods: */
6462 1041484 : bool gate (function *) final override
6463 : {
6464 1041484 : return flag_expensive_optimizations && optimize;
6465 : }
6466 :
6467 : unsigned int execute (function *) final override;
6468 :
6469 : }; // class pass_optimize_widening_mul
6470 :
6471 : /* Walker class to perform the transformation in reverse dominance order. */
6472 :
6473 : class math_opts_dom_walker : public dom_walker
6474 : {
6475 : public:
6476 : /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
6477 : if walking modidifes the CFG. */
6478 :
6479 964208 : math_opts_dom_walker (bool *cfg_changed_p)
6480 2892624 : : dom_walker (CDI_DOMINATORS), m_last_result_set (),
6481 964208 : m_cfg_changed_p (cfg_changed_p) {}
6482 :
6483 : /* The actual actions performed in the walk. */
6484 :
6485 : void after_dom_children (basic_block) final override;
6486 :
6487 : /* Set of results of chains of multiply and add statement combinations that
6488 : were not transformed into FMAs because of active deferring. */
6489 : hash_set<tree> m_last_result_set;
6490 :
6491 : /* Pointer to a flag of the user that needs to be set if CFG has been
6492 : modified. */
6493 : bool *m_cfg_changed_p;
6494 : };
6495 :
6496 : void
6497 10219209 : math_opts_dom_walker::after_dom_children (basic_block bb)
6498 : {
6499 10219209 : gimple_stmt_iterator gsi;
6500 :
6501 10219209 : fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
6502 :
6503 14446658 : for (gphi_iterator psi_next, psi = gsi_start_phis (bb); !gsi_end_p (psi);
6504 4227449 : psi = psi_next)
6505 : {
6506 4227449 : psi_next = psi;
6507 4227449 : gsi_next (&psi_next);
6508 :
6509 4227449 : gimple_stmt_iterator gsi = gsi_after_labels (bb);
6510 4227449 : gphi *phi = psi.phi ();
6511 :
6512 4227449 : if (match_saturation_add (&gsi, phi)
6513 4227432 : || match_saturation_sub (&gsi, phi)
6514 4227406 : || match_saturation_trunc (&gsi, phi)
6515 8454855 : || match_saturation_mul (&gsi, phi))
6516 43 : remove_phi_node (&psi, /* release_lhs_p */ false);
6517 : }
6518 :
6519 91135263 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
6520 : {
6521 80916054 : gimple *stmt = gsi_stmt (gsi);
6522 80916054 : enum tree_code code;
6523 :
6524 80916054 : if (is_gimple_assign (stmt))
6525 : {
6526 20889172 : code = gimple_assign_rhs_code (stmt);
6527 20889172 : switch (code)
6528 : {
6529 719640 : case MULT_EXPR:
6530 719640 : if (!convert_mult_to_widen (stmt, &gsi)
6531 709513 : && !convert_expand_mult_copysign (stmt, &gsi)
6532 1429110 : && convert_mult_to_fma (stmt,
6533 : gimple_assign_rhs1 (stmt),
6534 : gimple_assign_rhs2 (stmt),
6535 : &fma_state))
6536 : {
6537 16494 : gsi_remove (&gsi, true);
6538 16494 : release_defs (stmt);
6539 16494 : continue;
6540 : }
6541 703146 : match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
6542 703146 : match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
6543 703146 : break;
6544 :
6545 2282820 : case PLUS_EXPR:
6546 2282820 : match_saturation_add_with_assign (&gsi, as_a<gassign *> (stmt));
6547 2282820 : match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
6548 : /* fall-through */
6549 2579126 : case MINUS_EXPR:
6550 2579126 : if (!convert_plusminus_to_widen (&gsi, stmt, code))
6551 : {
6552 2579126 : match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
6553 2579126 : if (gsi_stmt (gsi) == stmt)
6554 2572688 : match_uaddc_usubc (&gsi, stmt, code);
6555 : }
6556 : break;
6557 :
6558 35227 : case BIT_NOT_EXPR:
6559 35227 : if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
6560 170 : continue;
6561 : break;
6562 :
6563 47744 : case TRUNC_MOD_EXPR:
6564 47744 : convert_to_divmod (as_a<gassign *> (stmt));
6565 47744 : break;
6566 :
6567 168128 : case RSHIFT_EXPR:
6568 168128 : convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
6569 168128 : break;
6570 :
6571 182317 : case BIT_IOR_EXPR:
6572 182317 : match_unsigned_saturation_mul (&gsi, as_a<gassign *> (stmt));
6573 182317 : match_saturation_add_with_assign (&gsi, as_a<gassign *> (stmt));
6574 182317 : match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
6575 : /* fall-through */
6576 211436 : case BIT_XOR_EXPR:
6577 211436 : match_uaddc_usubc (&gsi, stmt, code);
6578 211436 : break;
6579 :
6580 287497 : case EQ_EXPR:
6581 287497 : case NE_EXPR:
6582 287497 : case LE_EXPR:
6583 287497 : case GT_EXPR:
6584 287497 : match_single_bit_test (&gsi, stmt);
6585 287497 : break;
6586 :
6587 342922 : case COND_EXPR:
6588 342922 : case BIT_AND_EXPR:
6589 342922 : match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
6590 342922 : break;
6591 :
6592 2385161 : case NOP_EXPR:
6593 2385161 : match_unsigned_saturation_mul (&gsi, as_a<gassign *> (stmt));
6594 2385161 : match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
6595 2385161 : match_saturation_add_with_assign (&gsi, as_a<gassign *> (stmt));
6596 2385161 : break;
6597 :
6598 : default:;
6599 : }
6600 : }
6601 60026882 : else if (is_gimple_call (stmt))
6602 : {
6603 4755294 : switch (gimple_call_combined_fn (stmt))
6604 : {
6605 129 : case CFN_COND_MUL:
6606 129 : if (convert_mult_to_fma (stmt,
6607 : gimple_call_arg (stmt, 1),
6608 : gimple_call_arg (stmt, 2),
6609 : &fma_state,
6610 : gimple_call_arg (stmt, 0)))
6611 :
6612 : {
6613 84 : gsi_remove (&gsi, true);
6614 84 : release_defs (stmt);
6615 84 : continue;
6616 : }
6617 : break;
6618 :
6619 0 : case CFN_COND_LEN_MUL:
6620 0 : if (convert_mult_to_fma (stmt,
6621 : gimple_call_arg (stmt, 1),
6622 : gimple_call_arg (stmt, 2),
6623 : &fma_state,
6624 : gimple_call_arg (stmt, 0),
6625 : gimple_call_arg (stmt, 4),
6626 : gimple_call_arg (stmt, 5)))
6627 :
6628 : {
6629 0 : gsi_remove (&gsi, true);
6630 0 : release_defs (stmt);
6631 0 : continue;
6632 : }
6633 : break;
6634 :
6635 3613254 : case CFN_LAST:
6636 3613254 : cancel_fma_deferring (&fma_state);
6637 3613254 : break;
6638 :
6639 : default:
6640 : break;
6641 : }
6642 : }
6643 55271588 : else if (gimple_code (stmt) == GIMPLE_COND)
6644 : {
6645 4161016 : match_single_bit_test (&gsi, stmt);
6646 4161016 : optimize_spaceship (as_a <gcond *> (stmt));
6647 : }
6648 80899306 : gsi_next (&gsi);
6649 : }
6650 10219209 : if (fma_state.m_deferring_p
6651 7502249 : && fma_state.m_initial_phi)
6652 : {
6653 362 : gcc_checking_assert (fma_state.m_last_result);
6654 362 : if (!last_fma_candidate_feeds_initial_phi (&fma_state,
6655 : &m_last_result_set))
6656 268 : cancel_fma_deferring (&fma_state);
6657 : else
6658 94 : m_last_result_set.add (fma_state.m_last_result);
6659 : }
6660 10219209 : }
6661 :
6662 :
6663 : unsigned int
6664 964208 : pass_optimize_widening_mul::execute (function *fun)
6665 : {
6666 964208 : bool cfg_changed = false;
6667 :
6668 964208 : memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
6669 964208 : calculate_dominance_info (CDI_DOMINATORS);
6670 964208 : renumber_gimple_stmt_uids (cfun);
6671 :
6672 964208 : math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6673 :
6674 964208 : statistics_counter_event (fun, "widening multiplications inserted",
6675 : widen_mul_stats.widen_mults_inserted);
6676 964208 : statistics_counter_event (fun, "widening maccs inserted",
6677 : widen_mul_stats.maccs_inserted);
6678 964208 : statistics_counter_event (fun, "fused multiply-adds inserted",
6679 : widen_mul_stats.fmas_inserted);
6680 964208 : statistics_counter_event (fun, "divmod calls inserted",
6681 : widen_mul_stats.divmod_calls_inserted);
6682 964208 : statistics_counter_event (fun, "highpart multiplications inserted",
6683 : widen_mul_stats.highpart_mults_inserted);
6684 :
6685 964208 : return cfg_changed ? TODO_cleanup_cfg : 0;
6686 : }
6687 :
6688 : } // anon namespace
6689 :
6690 : gimple_opt_pass *
6691 285722 : make_pass_optimize_widening_mul (gcc::context *ctxt)
6692 : {
6693 285722 : return new pass_optimize_widening_mul (ctxt);
6694 : }
|