Line data Source code
1 : /* Global, SSA-based optimizations using mathematical identities.
2 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 : operations. These are common in sequences such as this one:
22 :
23 : modulus = sqrt(x*x + y*y + z*z);
24 : x = x / modulus;
25 : y = y / modulus;
26 : z = z / modulus;
27 :
28 : that can be optimized to
29 :
30 : modulus = sqrt(x*x + y*y + z*z);
31 : rmodulus = 1.0 / modulus;
32 : x = x * rmodulus;
33 : y = y * rmodulus;
34 : z = z * rmodulus;
35 :
36 : We do this for loop invariant divisors, and with this pass whenever
37 : we notice that a division has the same divisor multiple times.
38 :
39 : Of course, like in PRE, we don't insert a division if a dominator
40 : already has one. However, this cannot be done as an extension of
41 : PRE for several reasons.
42 :
43 : First of all, with some experiments it was found out that the
44 : transformation is not always useful if there are only two divisions
45 : by the same divisor. This is probably because modern processors
46 : can pipeline the divisions; on older, in-order processors it should
47 : still be effective to optimize two divisions by the same number.
48 : We make this a param, and it shall be called N in the remainder of
49 : this comment.
50 :
51 : Second, if trapping math is active, we have less freedom on where
52 : to insert divisions: we can only do so in basic blocks that already
53 : contain one. (If divisions don't trap, instead, we can insert
54 : divisions elsewhere, which will be in blocks that are common dominators
55 : of those that have the division).
56 :
57 : We really don't want to compute the reciprocal unless a division will
58 : be found. To do this, we won't insert the division in a basic block
59 : that has less than N divisions *post-dominating* it.
60 :
61 : The algorithm constructs a subset of the dominator tree, holding the
62 : blocks containing the divisions and the common dominators to them,
63 : and walk it twice. The first walk is in post-order, and it annotates
64 : each block with the number of divisions that post-dominate it: this
65 : gives information on where divisions can be inserted profitably.
66 : The second walk is in pre-order, and it inserts divisions as explained
67 : above, and replaces divisions by multiplications.
68 :
69 : In the best case, the cost of the pass is O(n_statements). In the
70 : worst-case, the cost is due to creating the dominator tree subset,
71 : with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 : for n_statements / n_basic_blocks statements. So, the amortized cost
73 : of creating the dominator tree subset is O(n_basic_blocks) and the
74 : worst-case cost of the pass is O(n_statements * n_basic_blocks).
75 :
76 : More practically, the cost will be small because there are few
77 : divisions, and they tend to be in the same basic block, so insert_bb
78 : is called very few times.
79 :
80 : If we did this using domwalk.cc, an efficient implementation would have
81 : to work on all the variables in a single pass, because we could not
82 : work on just a subset of the dominator tree, as we do now, and the
83 : cost would also be something like O(n_statements * n_basic_blocks).
84 : The data structures would be more complex in order to work on all the
85 : variables in a single pass. */
86 :
87 : #include "config.h"
88 : #include "system.h"
89 : #include "coretypes.h"
90 : #include "backend.h"
91 : #include "target.h"
92 : #include "rtl.h"
93 : #include "tree.h"
94 : #include "gimple.h"
95 : #include "predict.h"
96 : #include "alloc-pool.h"
97 : #include "tree-pass.h"
98 : #include "ssa.h"
99 : #include "optabs-tree.h"
100 : #include "gimple-pretty-print.h"
101 : #include "alias.h"
102 : #include "fold-const.h"
103 : #include "gimple-iterator.h"
104 : #include "gimple-fold.h"
105 : #include "stor-layout.h"
106 : #include "tree-cfg.h"
107 : #include "tree-dfa.h"
108 : #include "tree-ssa.h"
109 : #include "builtins.h"
110 : #include "internal-fn.h"
111 : #include "case-cfn-macros.h"
112 : #include "optabs-libfuncs.h"
113 : #include "tree-eh.h"
114 : #include "targhooks.h"
115 : #include "domwalk.h"
116 : #include "tree-ssa-math-opts.h"
117 : #include "dbgcnt.h"
118 : #include "cfghooks.h"
119 :
120 : /* This structure represents one basic block that either computes a
121 : division, or is a common dominator for basic block that compute a
122 : division. */
123 : struct occurrence {
124 : /* The basic block represented by this structure. */
125 : basic_block bb = basic_block();
126 :
127 : /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
128 : inserted in BB. */
129 : tree recip_def = tree();
130 :
131 : /* If non-NULL, the SSA_NAME holding the definition for a squared
132 : reciprocal inserted in BB. */
133 : tree square_recip_def = tree();
134 :
135 : /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
136 : was inserted in BB. */
137 : gimple *recip_def_stmt = nullptr;
138 :
139 : /* Pointer to a list of "struct occurrence"s for blocks dominated
140 : by BB. */
141 : struct occurrence *children = nullptr;
142 :
143 : /* Pointer to the next "struct occurrence"s in the list of blocks
144 : sharing a common dominator. */
145 : struct occurrence *next = nullptr;
146 :
147 : /* The number of divisions that are in BB before compute_merit. The
148 : number of divisions that are in BB or post-dominate it after
149 : compute_merit. */
150 : int num_divisions = 0;
151 :
152 : /* True if the basic block has a division, false if it is a common
153 : dominator for basic blocks that do. If it is false and trapping
154 : math is active, BB is not a candidate for inserting a reciprocal. */
155 : bool bb_has_division = false;
156 :
157 : /* Construct a struct occurrence for basic block BB, and whose
158 : children list is headed by CHILDREN. */
159 585 : occurrence (basic_block bb, struct occurrence *children)
160 585 : : bb (bb), children (children)
161 : {
162 585 : bb->aux = this;
163 : }
164 :
165 : /* Destroy a struct occurrence and remove it from its basic block. */
166 585 : ~occurrence ()
167 : {
168 585 : bb->aux = nullptr;
169 585 : }
170 :
171 : /* Allocate memory for a struct occurrence from OCC_POOL. */
172 : static void* operator new (size_t);
173 :
174 : /* Return memory for a struct occurrence to OCC_POOL. */
175 : static void operator delete (void*, size_t);
176 : };
177 :
178 : static struct
179 : {
180 : /* Number of 1.0/X ops inserted. */
181 : int rdivs_inserted;
182 :
183 : /* Number of 1.0/FUNC ops inserted. */
184 : int rfuncs_inserted;
185 : } reciprocal_stats;
186 :
187 : static struct
188 : {
189 : /* Number of cexpi calls inserted. */
190 : int inserted;
191 :
192 : /* Number of conversions removed. */
193 : int conv_removed;
194 :
195 : } sincos_stats;
196 :
197 : static struct
198 : {
199 : /* Number of widening multiplication ops inserted. */
200 : int widen_mults_inserted;
201 :
202 : /* Number of integer multiply-and-accumulate ops inserted. */
203 : int maccs_inserted;
204 :
205 : /* Number of fp fused multiply-add ops inserted. */
206 : int fmas_inserted;
207 :
208 : /* Number of divmod calls inserted. */
209 : int divmod_calls_inserted;
210 :
211 : /* Number of highpart multiplication ops inserted. */
212 : int highpart_mults_inserted;
213 : } widen_mul_stats;
214 :
215 : /* The instance of "struct occurrence" representing the highest
216 : interesting block in the dominator tree. */
217 : static struct occurrence *occ_head;
218 :
219 : /* Allocation pool for getting instances of "struct occurrence". */
220 : static object_allocator<occurrence> *occ_pool;
221 :
222 585 : void* occurrence::operator new (size_t n)
223 : {
224 585 : gcc_assert (n == sizeof(occurrence));
225 585 : return occ_pool->allocate_raw ();
226 : }
227 :
228 585 : void occurrence::operator delete (void *occ, size_t n)
229 : {
230 585 : gcc_assert (n == sizeof(occurrence));
231 585 : occ_pool->remove_raw (occ);
232 585 : }
233 :
234 : /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
235 : list of "struct occurrence"s, one per basic block, having IDOM as
236 : their common dominator.
237 :
238 : We try to insert NEW_OCC as deep as possible in the tree, and we also
239 : insert any other block that is a common dominator for BB and one
240 : block already in the tree. */
241 :
242 : static void
243 573 : insert_bb (struct occurrence *new_occ, basic_block idom,
244 : struct occurrence **p_head)
245 : {
246 579 : struct occurrence *occ, **p_occ;
247 :
248 605 : for (p_occ = p_head; (occ = *p_occ) != NULL; )
249 : {
250 32 : basic_block bb = new_occ->bb, occ_bb = occ->bb;
251 32 : basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
252 32 : if (dom == bb)
253 : {
254 : /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
255 : from its list. */
256 7 : *p_occ = occ->next;
257 7 : occ->next = new_occ->children;
258 7 : new_occ->children = occ;
259 :
260 : /* Try the next block (it may as well be dominated by BB). */
261 : }
262 :
263 25 : else if (dom == occ_bb)
264 : {
265 : /* OCC_BB dominates BB. Tail recurse to look deeper. */
266 6 : insert_bb (new_occ, dom, &occ->children);
267 6 : return;
268 : }
269 :
270 19 : else if (dom != idom)
271 : {
272 12 : gcc_assert (!dom->aux);
273 :
274 : /* There is a dominator between IDOM and BB, add it and make
275 : two children out of NEW_OCC and OCC. First, remove OCC from
276 : its list. */
277 12 : *p_occ = occ->next;
278 12 : new_occ->next = occ;
279 12 : occ->next = NULL;
280 :
281 : /* None of the previous blocks has DOM as a dominator: if we tail
282 : recursed, we would reexamine them uselessly. Just switch BB with
283 : DOM, and go on looking for blocks dominated by DOM. */
284 12 : new_occ = new occurrence (dom, new_occ);
285 : }
286 :
287 : else
288 : {
289 : /* Nothing special, go on with the next element. */
290 7 : p_occ = &occ->next;
291 : }
292 : }
293 :
294 : /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
295 573 : new_occ->next = *p_head;
296 573 : *p_head = new_occ;
297 : }
298 :
299 : /* Register that we found a division in BB.
300 : IMPORTANCE is a measure of how much weighting to give
301 : that division. Use IMPORTANCE = 2 to register a single
302 : division. If the division is going to be found multiple
303 : times use 1 (as it is with squares). */
304 :
305 : static inline void
306 674 : register_division_in (basic_block bb, int importance)
307 : {
308 674 : struct occurrence *occ;
309 :
310 674 : occ = (struct occurrence *) bb->aux;
311 674 : if (!occ)
312 : {
313 573 : occ = new occurrence (bb, NULL);
314 573 : insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
315 : }
316 :
317 674 : occ->bb_has_division = true;
318 674 : occ->num_divisions += importance;
319 674 : }
320 :
321 :
322 : /* Compute the number of divisions that postdominate each block in OCC and
323 : its children. */
324 :
325 : static void
326 31 : compute_merit (struct occurrence *occ)
327 : {
328 31 : struct occurrence *occ_child;
329 31 : basic_block dom = occ->bb;
330 :
331 60 : for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
332 : {
333 29 : basic_block bb;
334 29 : if (occ_child->children)
335 5 : compute_merit (occ_child);
336 :
337 29 : if (flag_exceptions)
338 6 : bb = single_noncomplex_succ (dom);
339 : else
340 : bb = dom;
341 :
342 29 : if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
343 12 : occ->num_divisions += occ_child->num_divisions;
344 : }
345 31 : }
346 :
347 :
348 : /* Return whether USE_STMT is a floating-point division by DEF. */
349 : static inline bool
350 345155 : is_division_by (gimple *use_stmt, tree def)
351 : {
352 345155 : return is_gimple_assign (use_stmt)
353 235819 : && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
354 1219 : && gimple_assign_rhs2 (use_stmt) == def
355 : /* Do not recognize x / x as valid division, as we are getting
356 : confused later by replacing all immediate uses x in such
357 : a stmt. */
358 851 : && gimple_assign_rhs1 (use_stmt) != def
359 346006 : && !stmt_can_throw_internal (cfun, use_stmt);
360 : }
361 :
362 : /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
363 : static inline bool
364 341397 : is_mult_by (gimple *use_stmt, tree def, tree a)
365 : {
366 341397 : if (gimple_code (use_stmt) == GIMPLE_ASSIGN
367 341397 : && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
368 : {
369 78114 : tree op0 = gimple_assign_rhs1 (use_stmt);
370 78114 : tree op1 = gimple_assign_rhs2 (use_stmt);
371 :
372 78114 : return (op0 == def && op1 == a)
373 78114 : || (op0 == a && op1 == def);
374 : }
375 : return 0;
376 : }
377 :
378 : /* Return whether USE_STMT is DEF * DEF. */
379 : static inline bool
380 341352 : is_square_of (gimple *use_stmt, tree def)
381 : {
382 5 : return is_mult_by (use_stmt, def, def);
383 : }
384 :
385 : /* Return whether USE_STMT is a floating-point division by
386 : DEF * DEF. */
387 : static inline bool
388 180 : is_division_by_square (gimple *use_stmt, tree def)
389 : {
390 180 : if (gimple_code (use_stmt) == GIMPLE_ASSIGN
391 173 : && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
392 7 : && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
393 187 : && !stmt_can_throw_internal (cfun, use_stmt))
394 : {
395 7 : tree denominator = gimple_assign_rhs2 (use_stmt);
396 7 : if (TREE_CODE (denominator) == SSA_NAME)
397 7 : return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
398 : }
399 : return 0;
400 : }
401 :
402 : /* Walk the subset of the dominator tree rooted at OCC, setting the
403 : RECIP_DEF field to a definition of 1.0 / DEF that can be used in
404 : the given basic block. The field may be left NULL, of course,
405 : if it is not possible or profitable to do the optimization.
406 :
407 : DEF_BSI is an iterator pointing at the statement defining DEF.
408 : If RECIP_DEF is set, a dominator already has a computation that can
409 : be used.
410 :
411 : If should_insert_square_recip is set, then this also inserts
412 : the square of the reciprocal immediately after the definition
413 : of the reciprocal. */
414 :
415 : static void
416 55 : insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
417 : tree def, tree recip_def, tree square_recip_def,
418 : int should_insert_square_recip, int threshold)
419 : {
420 55 : tree type;
421 55 : gassign *new_stmt, *new_square_stmt;
422 55 : gimple_stmt_iterator gsi;
423 55 : struct occurrence *occ_child;
424 :
425 55 : if (!recip_def
426 39 : && (occ->bb_has_division || !flag_trapping_math)
427 : /* Divide by two as all divisions are counted twice in
428 : the costing loop. */
429 35 : && occ->num_divisions / 2 >= threshold)
430 : {
431 : /* Make a variable with the replacement and substitute it. */
432 24 : type = TREE_TYPE (def);
433 24 : recip_def = create_tmp_reg (type, "reciptmp");
434 24 : new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
435 : build_one_cst (type), def);
436 :
437 24 : if (should_insert_square_recip)
438 : {
439 4 : square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
440 4 : new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
441 : recip_def, recip_def);
442 : }
443 :
444 24 : if (occ->bb_has_division)
445 : {
446 : /* Case 1: insert before an existing division. */
447 21 : gsi = gsi_after_labels (occ->bb);
448 200 : while (!gsi_end_p (gsi)
449 200 : && (!is_division_by (gsi_stmt (gsi), def))
450 380 : && (!is_division_by_square (gsi_stmt (gsi), def)))
451 179 : gsi_next (&gsi);
452 :
453 21 : gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
454 21 : if (should_insert_square_recip)
455 3 : gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
456 : }
457 3 : else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
458 : {
459 : /* Case 2: insert right after the definition. Note that this will
460 : never happen if the definition statement can throw, because in
461 : that case the sole successor of the statement's basic block will
462 : dominate all the uses as well. */
463 2 : gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
464 2 : if (should_insert_square_recip)
465 1 : gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
466 : }
467 : else
468 : {
469 : /* Case 3: insert in a basic block not containing defs/uses. */
470 1 : gsi = gsi_after_labels (occ->bb);
471 1 : gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
472 1 : if (should_insert_square_recip)
473 0 : gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
474 : }
475 :
476 24 : reciprocal_stats.rdivs_inserted++;
477 :
478 24 : occ->recip_def_stmt = new_stmt;
479 : }
480 :
481 55 : occ->recip_def = recip_def;
482 55 : occ->square_recip_def = square_recip_def;
483 84 : for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
484 29 : insert_reciprocals (def_gsi, occ_child, def, recip_def,
485 : square_recip_def, should_insert_square_recip,
486 : threshold);
487 55 : }
488 :
489 : /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
490 : Take as argument the use for (x * x). */
491 : static inline void
492 4 : replace_reciprocal_squares (use_operand_p use_p)
493 : {
494 4 : gimple *use_stmt = USE_STMT (use_p);
495 4 : basic_block bb = gimple_bb (use_stmt);
496 4 : struct occurrence *occ = (struct occurrence *) bb->aux;
497 :
498 8 : if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
499 8 : && occ->recip_def)
500 : {
501 4 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
502 4 : gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
503 4 : gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
504 4 : SET_USE (use_p, occ->square_recip_def);
505 4 : fold_stmt_inplace (&gsi);
506 4 : update_stmt (use_stmt);
507 : }
508 4 : }
509 :
510 :
511 : /* Replace the division at USE_P with a multiplication by the reciprocal, if
512 : possible. */
513 :
514 : static inline void
515 115 : replace_reciprocal (use_operand_p use_p)
516 : {
517 115 : gimple *use_stmt = USE_STMT (use_p);
518 115 : basic_block bb = gimple_bb (use_stmt);
519 115 : struct occurrence *occ = (struct occurrence *) bb->aux;
520 :
521 115 : if (optimize_bb_for_speed_p (bb)
522 115 : && occ->recip_def && use_stmt != occ->recip_def_stmt)
523 : {
524 80 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
525 80 : gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
526 80 : SET_USE (use_p, occ->recip_def);
527 80 : fold_stmt_inplace (&gsi);
528 80 : update_stmt (use_stmt);
529 : }
530 115 : }
531 :
532 :
533 : /* Free OCC and return one more "struct occurrence" to be freed. */
534 :
535 : static struct occurrence *
536 585 : free_bb (struct occurrence *occ)
537 : {
538 585 : struct occurrence *child, *next;
539 :
540 : /* First get the two pointers hanging off OCC. */
541 585 : next = occ->next;
542 585 : child = occ->children;
543 585 : delete occ;
544 :
545 : /* Now ensure that we don't recurse unless it is necessary. */
546 585 : if (!child)
547 : return next;
548 : else
549 : {
550 24 : while (next)
551 3 : next = free_bb (next);
552 :
553 : return child;
554 : }
555 : }
556 :
557 : /* Transform sequences like
558 : t = sqrt (a)
559 : x = 1.0 / t;
560 : r1 = x * x;
561 : r2 = a * x;
562 : into:
563 : t = sqrt (a)
564 : r1 = 1.0 / a;
565 : r2 = t;
566 : x = r1 * r2;
567 : depending on the uses of x, r1, r2. This removes one multiplication and
568 : allows the sqrt and division operations to execute in parallel.
569 : DEF_GSI is the gsi of the initial division by sqrt that defines
570 : DEF (x in the example above). */
571 :
572 : static void
573 536 : optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
574 : {
575 536 : gimple *use_stmt;
576 536 : imm_use_iterator use_iter;
577 536 : gimple *stmt = gsi_stmt (*def_gsi);
578 536 : tree x = def;
579 536 : tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
580 536 : tree div_rhs1 = gimple_assign_rhs1 (stmt);
581 :
582 536 : if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
583 531 : || TREE_CODE (div_rhs1) != REAL_CST
584 706 : || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
585 442 : return;
586 :
587 94 : gcall *sqrt_stmt
588 566 : = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
589 :
590 42 : if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
591 : return;
592 :
593 42 : switch (gimple_call_combined_fn (sqrt_stmt))
594 : {
595 31 : CASE_CFN_SQRT:
596 31 : CASE_CFN_SQRT_FN:
597 31 : break;
598 :
599 : default:
600 : return;
601 : }
602 31 : tree a = gimple_call_arg (sqrt_stmt, 0);
603 :
604 : /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
605 :
606 : /* Statements that use x in x * x. */
607 43 : auto_vec<gimple *> sqr_stmts;
608 : /* Statements that use x in a * x. */
609 12 : auto_vec<gimple *> mult_stmts;
610 31 : bool has_other_use = false;
611 31 : bool mult_on_main_path = false;
612 :
613 89 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
614 : {
615 58 : if (is_gimple_debug (use_stmt))
616 1 : continue;
617 57 : if (is_square_of (use_stmt, x))
618 : {
619 12 : sqr_stmts.safe_push (use_stmt);
620 12 : if (gimple_bb (use_stmt) == gimple_bb (stmt))
621 17 : mult_on_main_path = true;
622 : }
623 45 : else if (is_mult_by (use_stmt, x, a))
624 : {
625 14 : mult_stmts.safe_push (use_stmt);
626 14 : if (gimple_bb (use_stmt) == gimple_bb (stmt))
627 17 : mult_on_main_path = true;
628 : }
629 : else
630 : has_other_use = true;
631 31 : }
632 :
633 : /* In the x * x and a * x cases we just rewire stmt operands or
634 : remove multiplications. In the has_other_use case we introduce
635 : a multiplication so make sure we don't introduce a multiplication
636 : on a path where there was none. */
637 31 : if (has_other_use && !mult_on_main_path)
638 19 : return;
639 :
640 12 : if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
641 : return;
642 :
643 : /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
644 : to be able to compose it from the sqr and mult cases. */
645 41 : if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
646 : return;
647 :
648 12 : if (dump_file)
649 : {
650 10 : fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
651 10 : print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
652 10 : print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
653 10 : fprintf (dump_file, "\n");
654 : }
655 :
656 12 : bool delete_div = !has_other_use;
657 12 : tree sqr_ssa_name = NULL_TREE;
658 12 : if (!sqr_stmts.is_empty ())
659 : {
660 : /* r1 = x * x. Transform the original
661 : x = 1.0 / t
662 : into
663 : tmp1 = 1.0 / a
664 : r1 = tmp1. */
665 :
666 10 : sqr_ssa_name
667 10 : = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
668 :
669 10 : if (dump_file)
670 : {
671 10 : fprintf (dump_file, "Replacing original division\n");
672 10 : print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
673 10 : fprintf (dump_file, "with new division\n");
674 : }
675 10 : stmt
676 10 : = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
677 : gimple_assign_rhs1 (stmt), a);
678 10 : gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
679 10 : gsi_remove (def_gsi, true);
680 10 : *def_gsi = gsi_for_stmt (stmt);
681 10 : fold_stmt_inplace (def_gsi);
682 10 : update_stmt (stmt);
683 :
684 10 : if (dump_file)
685 10 : print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
686 :
687 20 : delete_div = false;
688 : gimple *sqr_stmt;
689 : unsigned int i;
690 20 : FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
691 : {
692 10 : gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
693 10 : gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
694 10 : update_stmt (sqr_stmt);
695 : }
696 : }
697 12 : if (!mult_stmts.is_empty ())
698 : {
699 : /* r2 = a * x. Transform this into:
700 : r2 = t (The original sqrt (a)). */
701 : unsigned int i;
702 24 : gimple *mult_stmt = NULL;
703 24 : FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
704 : {
705 12 : gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
706 :
707 12 : if (dump_file)
708 : {
709 10 : fprintf (dump_file, "Replacing squaring multiplication\n");
710 10 : print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
711 10 : fprintf (dump_file, "with assignment\n");
712 : }
713 12 : gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
714 12 : fold_stmt_inplace (&gsi2);
715 12 : update_stmt (mult_stmt);
716 12 : if (dump_file)
717 10 : print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
718 : }
719 : }
720 :
721 12 : if (has_other_use)
722 : {
723 : /* Using the two temporaries tmp1, tmp2 from above
724 : the original x is now:
725 : x = tmp1 * tmp2. */
726 10 : gcc_assert (orig_sqrt_ssa_name);
727 10 : gcc_assert (sqr_ssa_name);
728 :
729 10 : gimple *new_stmt
730 10 : = gimple_build_assign (x, MULT_EXPR,
731 : orig_sqrt_ssa_name, sqr_ssa_name);
732 10 : gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
733 10 : update_stmt (stmt);
734 : }
735 2 : else if (delete_div)
736 : {
737 : /* Remove the original division. */
738 2 : gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
739 2 : gsi_remove (&gsi2, true);
740 2 : release_defs (stmt);
741 : }
742 : else
743 0 : release_ssa_name (x);
744 : }
745 :
746 : /* Look for floating-point divisions among DEF's uses, and try to
747 : replace them by multiplications with the reciprocal. Add
748 : as many statements computing the reciprocal as needed.
749 :
750 : DEF must be a GIMPLE register of a floating-point type. */
751 :
752 : static void
753 207095 : execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
754 : {
755 207095 : use_operand_p use_p, square_use_p;
756 207095 : imm_use_iterator use_iter, square_use_iter;
757 207095 : tree square_def;
758 207095 : struct occurrence *occ;
759 207095 : int count = 0;
760 207095 : int threshold;
761 207095 : int square_recip_count = 0;
762 207095 : int sqrt_recip_count = 0;
763 :
764 207095 : gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
765 207095 : threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
766 :
767 : /* If DEF is a square (x * x), count the number of divisions by x.
768 : If there are more divisions by x than by (DEF * DEF), prefer to optimize
769 : the reciprocal of x instead of DEF. This improves cases like:
770 : def = x * x
771 : t0 = a / def
772 : t1 = b / def
773 : t2 = c / x
774 : Reciprocal optimization of x results in 1 division rather than 2 or 3. */
775 207095 : gimple *def_stmt = SSA_NAME_DEF_STMT (def);
776 :
777 207095 : if (is_gimple_assign (def_stmt)
778 161259 : && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
779 39580 : && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
780 246598 : && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
781 : {
782 665 : tree op0 = gimple_assign_rhs1 (def_stmt);
783 :
784 3381 : FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
785 : {
786 2051 : gimple *use_stmt = USE_STMT (use_p);
787 2051 : if (is_division_by (use_stmt, op0))
788 14 : sqrt_recip_count++;
789 665 : }
790 : }
791 :
792 755473 : FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
793 : {
794 341283 : gimple *use_stmt = USE_STMT (use_p);
795 341283 : if (is_division_by (use_stmt, def))
796 : {
797 608 : register_division_in (gimple_bb (use_stmt), 2);
798 608 : count++;
799 : }
800 :
801 341283 : if (is_square_of (use_stmt, def))
802 : {
803 1338 : square_def = gimple_assign_lhs (use_stmt);
804 4156 : FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
805 : {
806 1480 : gimple *square_use_stmt = USE_STMT (square_use_p);
807 1480 : if (is_division_by (square_use_stmt, square_def))
808 : {
809 : /* This is executed twice for each division by a square. */
810 66 : register_division_in (gimple_bb (square_use_stmt), 1);
811 66 : square_recip_count++;
812 : }
813 1338 : }
814 : }
815 207095 : }
816 :
817 : /* Square reciprocals were counted twice above. */
818 207095 : square_recip_count /= 2;
819 :
820 : /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
821 207095 : if (sqrt_recip_count > square_recip_count)
822 14 : goto out;
823 :
824 : /* Do the expensive part only if we can hope to optimize something. */
825 207081 : if (count + square_recip_count >= threshold && count >= 1)
826 : {
827 26 : gimple *use_stmt;
828 52 : for (occ = occ_head; occ; occ = occ->next)
829 : {
830 26 : compute_merit (occ);
831 26 : insert_reciprocals (def_gsi, occ, def, NULL, NULL,
832 : square_recip_count, threshold);
833 : }
834 :
835 185 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
836 : {
837 133 : if (is_division_by (use_stmt, def))
838 : {
839 345 : FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
840 115 : replace_reciprocal (use_p);
841 : }
842 23 : else if (square_recip_count > 0 && is_square_of (use_stmt, def))
843 : {
844 16 : FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
845 : {
846 : /* Find all uses of the square that are divisions and
847 : * replace them by multiplications with the inverse. */
848 8 : imm_use_iterator square_iterator;
849 8 : gimple *powmult_use_stmt = USE_STMT (use_p);
850 8 : tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
851 :
852 24 : FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
853 : square_iterator, powmult_def_name)
854 24 : FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
855 : {
856 8 : gimple *powmult_use_stmt = USE_STMT (square_use_p);
857 8 : if (is_division_by (powmult_use_stmt, powmult_def_name))
858 4 : replace_reciprocal_squares (square_use_p);
859 8 : }
860 : }
861 : }
862 26 : }
863 : }
864 :
865 207055 : out:
866 207677 : for (occ = occ_head; occ; )
867 582 : occ = free_bb (occ);
868 :
869 207095 : occ_head = NULL;
870 207095 : }
871 :
872 : /* Return an internal function that implements the reciprocal of CALL,
873 : or IFN_LAST if there is no such function that the target supports. */
874 :
875 : internal_fn
876 113 : internal_fn_reciprocal (gcall *call)
877 : {
878 113 : internal_fn ifn;
879 :
880 113 : switch (gimple_call_combined_fn (call))
881 : {
882 97 : CASE_CFN_SQRT:
883 97 : CASE_CFN_SQRT_FN:
884 97 : ifn = IFN_RSQRT;
885 97 : break;
886 :
887 : default:
888 : return IFN_LAST;
889 : }
890 :
891 97 : tree_pair types = direct_internal_fn_types (ifn, call);
892 97 : if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
893 : return IFN_LAST;
894 :
895 : return ifn;
896 : }
897 :
898 : /* Go through all the floating-point SSA_NAMEs, and call
899 : execute_cse_reciprocals_1 on each of them. */
900 : namespace {
901 :
902 : const pass_data pass_data_cse_reciprocals =
903 : {
904 : GIMPLE_PASS, /* type */
905 : "recip", /* name */
906 : OPTGROUP_NONE, /* optinfo_flags */
907 : TV_TREE_RECIP, /* tv_id */
908 : PROP_ssa, /* properties_required */
909 : 0, /* properties_provided */
910 : 0, /* properties_destroyed */
911 : 0, /* todo_flags_start */
912 : TODO_update_ssa, /* todo_flags_finish */
913 : };
914 :
915 : class pass_cse_reciprocals : public gimple_opt_pass
916 : {
917 : public:
918 288767 : pass_cse_reciprocals (gcc::context *ctxt)
919 577534 : : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
920 : {}
921 :
922 : /* opt_pass methods: */
923 1044325 : bool gate (function *) final override
924 : {
925 1044325 : return optimize && flag_reciprocal_math;
926 : }
927 : unsigned int execute (function *) final override;
928 :
929 : }; // class pass_cse_reciprocals
930 :
931 : unsigned int
932 8731 : pass_cse_reciprocals::execute (function *fun)
933 : {
934 8731 : basic_block bb;
935 8731 : tree arg;
936 :
937 8731 : occ_pool = new object_allocator<occurrence> ("dominators for recip");
938 :
939 8731 : memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
940 8731 : calculate_dominance_info (CDI_DOMINATORS);
941 8731 : calculate_dominance_info (CDI_POST_DOMINATORS);
942 :
943 8731 : if (flag_checking)
944 93872 : FOR_EACH_BB_FN (bb, fun)
945 85141 : gcc_assert (!bb->aux);
946 :
947 21615 : for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
948 20442 : if (FLOAT_TYPE_P (TREE_TYPE (arg))
949 13971 : && is_gimple_reg (arg))
950 : {
951 6412 : tree name = ssa_default_def (fun, arg);
952 6412 : if (name)
953 5417 : execute_cse_reciprocals_1 (NULL, name);
954 : }
955 :
956 93872 : FOR_EACH_BB_FN (bb, fun)
957 : {
958 85141 : tree def;
959 :
960 192824 : for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
961 107683 : gsi_next (&gsi))
962 : {
963 107683 : gphi *phi = gsi.phi ();
964 107683 : def = PHI_RESULT (phi);
965 107683 : if (! virtual_operand_p (def)
966 107683 : && FLOAT_TYPE_P (TREE_TYPE (def)))
967 30407 : execute_cse_reciprocals_1 (NULL, def);
968 : }
969 :
970 1364392 : for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
971 1279251 : gsi_next (&gsi))
972 : {
973 1279251 : gimple *stmt = gsi_stmt (gsi);
974 :
975 2558502 : if (gimple_has_lhs (stmt)
976 801139 : && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
977 760253 : && FLOAT_TYPE_P (TREE_TYPE (def))
978 194017 : && TREE_CODE (def) == SSA_NAME)
979 : {
980 171271 : execute_cse_reciprocals_1 (&gsi, def);
981 171271 : stmt = gsi_stmt (gsi);
982 171271 : if (flag_unsafe_math_optimizations
983 171244 : && is_gimple_assign (stmt)
984 161234 : && gimple_assign_lhs (stmt) == def
985 161232 : && !stmt_can_throw_internal (cfun, stmt)
986 332459 : && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
987 536 : optimize_recip_sqrt (&gsi, def);
988 : }
989 : }
990 :
991 85141 : if (optimize_bb_for_size_p (bb))
992 5330 : continue;
993 :
994 : /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
995 1337279 : for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
996 1257468 : gsi_next (&gsi))
997 : {
998 1257468 : gimple *stmt = gsi_stmt (gsi);
999 :
1000 1257468 : if (is_gimple_assign (stmt)
1001 1257468 : && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
1002 : {
1003 567 : tree arg1 = gimple_assign_rhs2 (stmt);
1004 567 : gimple *stmt1;
1005 :
1006 567 : if (TREE_CODE (arg1) != SSA_NAME)
1007 5 : continue;
1008 :
1009 562 : stmt1 = SSA_NAME_DEF_STMT (arg1);
1010 :
1011 562 : if (is_gimple_call (stmt1)
1012 562 : && gimple_call_lhs (stmt1))
1013 : {
1014 113 : bool fail;
1015 113 : imm_use_iterator ui;
1016 113 : use_operand_p use_p;
1017 113 : tree fndecl = NULL_TREE;
1018 :
1019 113 : gcall *call = as_a <gcall *> (stmt1);
1020 113 : internal_fn ifn = internal_fn_reciprocal (call);
1021 113 : if (ifn == IFN_LAST)
1022 : {
1023 60 : fndecl = gimple_call_fndecl (call);
1024 120 : if (!fndecl
1025 60 : || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1026 62 : continue;
1027 0 : fndecl = targetm.builtin_reciprocal (fndecl);
1028 0 : if (!fndecl)
1029 0 : continue;
1030 : }
1031 :
1032 : /* Check that all uses of the SSA name are divisions,
1033 : otherwise replacing the defining statement will do
1034 : the wrong thing. */
1035 53 : fail = false;
1036 159 : FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1037 : {
1038 55 : gimple *stmt2 = USE_STMT (use_p);
1039 55 : if (is_gimple_debug (stmt2))
1040 0 : continue;
1041 55 : if (!is_gimple_assign (stmt2)
1042 55 : || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1043 53 : || gimple_assign_rhs1 (stmt2) == arg1
1044 108 : || gimple_assign_rhs2 (stmt2) != arg1)
1045 : {
1046 : fail = true;
1047 : break;
1048 : }
1049 53 : }
1050 53 : if (fail)
1051 2 : continue;
1052 :
1053 51 : gimple_replace_ssa_lhs (call, arg1);
1054 51 : reset_flow_sensitive_info (arg1);
1055 51 : if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1056 : {
1057 30 : auto_vec<tree, 4> args;
1058 30 : for (unsigned int i = 0;
1059 60 : i < gimple_call_num_args (call); i++)
1060 30 : args.safe_push (gimple_call_arg (call, i));
1061 30 : gcall *stmt2;
1062 30 : if (ifn == IFN_LAST)
1063 0 : stmt2 = gimple_build_call_vec (fndecl, args);
1064 : else
1065 30 : stmt2 = gimple_build_call_internal_vec (ifn, args);
1066 30 : gimple_call_set_lhs (stmt2, arg1);
1067 30 : gimple_move_vops (stmt2, call);
1068 30 : gimple_call_set_nothrow (stmt2,
1069 30 : gimple_call_nothrow_p (call));
1070 30 : gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1071 30 : gsi_replace (&gsi2, stmt2, true);
1072 30 : }
1073 : else
1074 : {
1075 21 : if (ifn == IFN_LAST)
1076 0 : gimple_call_set_fndecl (call, fndecl);
1077 : else
1078 21 : gimple_call_set_internal_fn (call, ifn);
1079 21 : update_stmt (call);
1080 : }
1081 51 : reciprocal_stats.rfuncs_inserted++;
1082 :
1083 153 : FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1084 : {
1085 51 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1086 51 : gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1087 51 : fold_stmt_inplace (&gsi);
1088 51 : update_stmt (stmt);
1089 51 : }
1090 : }
1091 : }
1092 : }
1093 : }
1094 :
1095 8731 : statistics_counter_event (fun, "reciprocal divs inserted",
1096 : reciprocal_stats.rdivs_inserted);
1097 8731 : statistics_counter_event (fun, "reciprocal functions inserted",
1098 : reciprocal_stats.rfuncs_inserted);
1099 :
1100 8731 : free_dominance_info (CDI_DOMINATORS);
1101 8731 : free_dominance_info (CDI_POST_DOMINATORS);
1102 17462 : delete occ_pool;
1103 8731 : return 0;
1104 : }
1105 :
1106 : } // anon namespace
1107 :
1108 : gimple_opt_pass *
1109 288767 : make_pass_cse_reciprocals (gcc::context *ctxt)
1110 : {
1111 288767 : return new pass_cse_reciprocals (ctxt);
1112 : }
1113 :
1114 : /* If NAME is the result of a type conversion, look for other
1115 : equivalent dominating or dominated conversions, and replace all
1116 : uses with the earliest dominating name, removing the redundant
1117 : conversions. Return the prevailing name. */
1118 :
1119 : static tree
1120 1031 : execute_cse_conv_1 (tree name, bool *cfg_changed)
1121 : {
1122 1031 : if (SSA_NAME_IS_DEFAULT_DEF (name)
1123 1031 : || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1124 : return name;
1125 :
1126 939 : gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1127 :
1128 939 : if (!gimple_assign_cast_p (def_stmt))
1129 : return name;
1130 :
1131 136 : tree src = gimple_assign_rhs1 (def_stmt);
1132 :
1133 136 : if (TREE_CODE (src) != SSA_NAME)
1134 : return name;
1135 :
1136 136 : imm_use_iterator use_iter;
1137 136 : gimple *use_stmt;
1138 :
1139 : /* Find the earliest dominating def. */
1140 657 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1141 : {
1142 763 : if (use_stmt == def_stmt
1143 385 : || !gimple_assign_cast_p (use_stmt))
1144 763 : continue;
1145 :
1146 7 : tree lhs = gimple_assign_lhs (use_stmt);
1147 :
1148 7 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1149 14 : || (gimple_assign_rhs1 (use_stmt)
1150 7 : != gimple_assign_rhs1 (def_stmt))
1151 14 : || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1152 0 : continue;
1153 :
1154 7 : bool use_dominates;
1155 7 : if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1156 : {
1157 0 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1158 0 : while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1159 0 : gsi_next (&gsi);
1160 0 : use_dominates = !gsi_end_p (gsi);
1161 : }
1162 7 : else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1163 7 : gimple_bb (def_stmt)))
1164 : use_dominates = false;
1165 7 : else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1166 7 : gimple_bb (use_stmt)))
1167 : use_dominates = true;
1168 : else
1169 4 : continue;
1170 :
1171 0 : if (use_dominates)
1172 : {
1173 : std::swap (name, lhs);
1174 : std::swap (def_stmt, use_stmt);
1175 : }
1176 136 : }
1177 :
1178 : /* Now go through all uses of SRC again, replacing the equivalent
1179 : dominated conversions. We may replace defs that were not
1180 : dominated by the then-prevailing defs when we first visited
1181 : them. */
1182 657 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1183 : {
1184 763 : if (use_stmt == def_stmt
1185 385 : || !gimple_assign_cast_p (use_stmt))
1186 378 : continue;
1187 :
1188 7 : tree lhs = gimple_assign_lhs (use_stmt);
1189 :
1190 7 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1191 14 : || (gimple_assign_rhs1 (use_stmt)
1192 7 : != gimple_assign_rhs1 (def_stmt))
1193 14 : || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1194 0 : continue;
1195 :
1196 7 : basic_block use_bb = gimple_bb (use_stmt);
1197 7 : if (gimple_bb (def_stmt) == use_bb
1198 7 : || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (def_stmt)))
1199 : {
1200 3 : sincos_stats.conv_removed++;
1201 :
1202 3 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1203 3 : replace_uses_by (lhs, name);
1204 3 : if (gsi_remove (&gsi, true)
1205 3 : && gimple_purge_dead_eh_edges (use_bb))
1206 3 : *cfg_changed = true;
1207 3 : release_defs (use_stmt);
1208 : }
1209 136 : }
1210 :
1211 136 : return name;
1212 : }
1213 :
1214 : /* Records an occurrence at statement USE_STMT in the vector of trees
1215 : STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1216 : is not yet initialized. Returns true if the occurrence was pushed on
1217 : the vector. Adjusts *TOP_BB to be the basic block dominating all
1218 : statements in the vector. */
1219 :
1220 : static bool
1221 1241 : maybe_record_sincos (vec<gimple *> *stmts,
1222 : basic_block *top_bb, gimple *use_stmt)
1223 : {
1224 1241 : basic_block use_bb = gimple_bb (use_stmt);
1225 1241 : if (*top_bb
1226 1241 : && (*top_bb == use_bb
1227 66 : || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1228 151 : stmts->safe_push (use_stmt);
1229 1090 : else if (!*top_bb
1230 1090 : || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1231 : {
1232 1070 : stmts->safe_push (use_stmt);
1233 1070 : *top_bb = use_bb;
1234 : }
1235 : else
1236 : return false;
1237 :
1238 : return true;
1239 : }
1240 :
1241 : /* Look for sin, cos and cexpi calls with the same argument NAME and
1242 : create a single call to cexpi CSEing the result in this case.
1243 : We first walk over all immediate uses of the argument collecting
1244 : statements that we can CSE in a vector and in a second pass replace
1245 : the statement rhs with a REALPART or IMAGPART expression on the
1246 : result of the cexpi call we insert before the use statement that
1247 : dominates all other candidates. */
1248 :
1249 : static bool
1250 1031 : execute_cse_sincos_1 (tree name)
1251 : {
1252 1031 : gimple_stmt_iterator gsi;
1253 1031 : imm_use_iterator use_iter;
1254 1031 : tree fndecl, res, type = NULL_TREE;
1255 1031 : gimple *def_stmt, *use_stmt, *stmt;
1256 1031 : int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1257 1031 : auto_vec<gimple *> stmts;
1258 1031 : basic_block top_bb = NULL;
1259 1031 : int i;
1260 1031 : bool cfg_changed = false;
1261 :
1262 1031 : name = execute_cse_conv_1 (name, &cfg_changed);
1263 :
1264 5000 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1265 : {
1266 2940 : if (gimple_code (use_stmt) != GIMPLE_CALL
1267 2940 : || !gimple_call_lhs (use_stmt))
1268 1670 : continue;
1269 :
1270 1270 : switch (gimple_call_combined_fn (use_stmt))
1271 : {
1272 451 : CASE_CFN_COS:
1273 451 : seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1274 451 : break;
1275 :
1276 784 : CASE_CFN_SIN:
1277 784 : seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1278 784 : break;
1279 :
1280 6 : CASE_CFN_CEXPI:
1281 6 : seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1282 6 : break;
1283 :
1284 29 : default:;
1285 29 : continue;
1286 : }
1287 :
1288 1241 : auto stmt_cfn = gimple_call_combined_fn (use_stmt);
1289 1241 : tree t = mathfn_built_in_type (stmt_cfn);
1290 1241 : if (!t)
1291 : {
1292 : /* It is possible to get IFN_{SIN,COS} calls, for which
1293 : mathfn_built_in_type will return NULL. Those are normally only
1294 : present for vector operations. We won't be able to CSE those
1295 : at the moment. */
1296 2 : gcc_checking_assert (internal_fn_p (stmt_cfn));
1297 : return false;
1298 : }
1299 :
1300 1239 : if (!type)
1301 : {
1302 1029 : type = t;
1303 1029 : t = TREE_TYPE (name);
1304 : }
1305 : /* This checks that NAME has the right type in the first round,
1306 : and, in subsequent rounds, that the built_in type is the same
1307 : type, or a compatible type. */
1308 1239 : if (type != t && !types_compatible_p (type, t))
1309 : return false;
1310 2 : }
1311 1029 : if (seen_cos + seen_sin + seen_cexpi <= 1)
1312 : return false;
1313 :
1314 : /* Simply insert cexpi at the beginning of top_bb but not earlier than
1315 : the name def statement. */
1316 190 : fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1317 190 : if (!fndecl)
1318 : return false;
1319 146 : stmt = gimple_build_call (fndecl, 1, name);
1320 146 : res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1321 146 : gimple_call_set_lhs (stmt, res);
1322 :
1323 146 : def_stmt = SSA_NAME_DEF_STMT (name);
1324 146 : if (!SSA_NAME_IS_DEFAULT_DEF (name)
1325 121 : && gimple_code (def_stmt) != GIMPLE_PHI
1326 259 : && gimple_bb (def_stmt) == top_bb)
1327 : {
1328 113 : gsi = gsi_for_stmt (def_stmt);
1329 113 : gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1330 : }
1331 : else
1332 : {
1333 33 : gsi = gsi_after_labels (top_bb);
1334 33 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1335 : }
1336 146 : sincos_stats.inserted++;
1337 :
1338 : /* And adjust the recorded old call sites. */
1339 438 : for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1340 : {
1341 292 : tree rhs = NULL;
1342 :
1343 292 : switch (gimple_call_combined_fn (use_stmt))
1344 : {
1345 146 : CASE_CFN_COS:
1346 146 : rhs = fold_build1 (REALPART_EXPR, type, res);
1347 146 : break;
1348 :
1349 146 : CASE_CFN_SIN:
1350 146 : rhs = fold_build1 (IMAGPART_EXPR, type, res);
1351 146 : break;
1352 :
1353 : CASE_CFN_CEXPI:
1354 : rhs = res;
1355 : break;
1356 :
1357 0 : default:;
1358 0 : gcc_unreachable ();
1359 : }
1360 :
1361 : /* Replace call with a copy. */
1362 292 : stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1363 :
1364 292 : gsi = gsi_for_stmt (use_stmt);
1365 292 : gsi_replace (&gsi, stmt, true);
1366 292 : if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1367 0 : cfg_changed = true;
1368 : }
1369 :
1370 146 : return cfg_changed;
1371 1031 : }
1372 :
1373 : /* To evaluate powi(x,n), the floating point value x raised to the
1374 : constant integer exponent n, we use a hybrid algorithm that
1375 : combines the "window method" with look-up tables. For an
1376 : introduction to exponentiation algorithms and "addition chains",
1377 : see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1378 : "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1379 : 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1380 : Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1381 :
1382 : /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1383 : multiplications to inline before calling the system library's pow
1384 : function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1385 : so this default never requires calling pow, powf or powl. */
1386 :
1387 : #ifndef POWI_MAX_MULTS
1388 : #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1389 : #endif
1390 :
1391 : /* The size of the "optimal power tree" lookup table. All
1392 : exponents less than this value are simply looked up in the
1393 : powi_table below. This threshold is also used to size the
1394 : cache of pseudo registers that hold intermediate results. */
1395 : #define POWI_TABLE_SIZE 256
1396 :
1397 : /* The size, in bits of the window, used in the "window method"
1398 : exponentiation algorithm. This is equivalent to a radix of
1399 : (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1400 : #define POWI_WINDOW_SIZE 3
1401 :
1402 : /* The following table is an efficient representation of an
1403 : "optimal power tree". For each value, i, the corresponding
1404 : value, j, in the table states than an optimal evaluation
1405 : sequence for calculating pow(x,i) can be found by evaluating
1406 : pow(x,j)*pow(x,i-j). An optimal power tree for the first
1407 : 100 integers is given in Knuth's "Seminumerical algorithms". */
1408 :
1409 : static const unsigned char powi_table[POWI_TABLE_SIZE] =
1410 : {
1411 : 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1412 : 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1413 : 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1414 : 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1415 : 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1416 : 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1417 : 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1418 : 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1419 : 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1420 : 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1421 : 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1422 : 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1423 : 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1424 : 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1425 : 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1426 : 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1427 : 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1428 : 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1429 : 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1430 : 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1431 : 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1432 : 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1433 : 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1434 : 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1435 : 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1436 : 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1437 : 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1438 : 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1439 : 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1440 : 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1441 : 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1442 : 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1443 : };
1444 :
1445 :
1446 : /* Return the number of multiplications required to calculate
1447 : powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1448 : subroutine of powi_cost. CACHE is an array indicating
1449 : which exponents have already been calculated. */
1450 :
1451 : static int
1452 1120 : powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1453 : {
1454 : /* If we've already calculated this exponent, then this evaluation
1455 : doesn't require any additional multiplications. */
1456 1861 : if (cache[n])
1457 1120 : return 0;
1458 :
1459 741 : cache[n] = true;
1460 741 : return powi_lookup_cost (n - powi_table[n], cache)
1461 741 : + powi_lookup_cost (powi_table[n], cache) + 1;
1462 : }
1463 :
1464 : /* Return the number of multiplications required to calculate
1465 : powi(x,n) for an arbitrary x, given the exponent N. This
1466 : function needs to be kept in sync with powi_as_mults below. */
1467 :
1468 : static int
1469 384 : powi_cost (HOST_WIDE_INT n)
1470 : {
1471 384 : bool cache[POWI_TABLE_SIZE];
1472 384 : unsigned HOST_WIDE_INT digit;
1473 384 : unsigned HOST_WIDE_INT val;
1474 384 : int result;
1475 :
1476 384 : if (n == 0)
1477 : return 0;
1478 :
1479 : /* Ignore the reciprocal when calculating the cost. */
1480 379 : val = absu_hwi (n);
1481 :
1482 : /* Initialize the exponent cache. */
1483 379 : memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1484 379 : cache[1] = true;
1485 :
1486 379 : result = 0;
1487 :
1488 379 : while (val >= POWI_TABLE_SIZE)
1489 : {
1490 0 : if (val & 1)
1491 : {
1492 0 : digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1493 0 : result += powi_lookup_cost (digit, cache)
1494 0 : + POWI_WINDOW_SIZE + 1;
1495 0 : val >>= POWI_WINDOW_SIZE;
1496 : }
1497 : else
1498 : {
1499 0 : val >>= 1;
1500 0 : result++;
1501 : }
1502 : }
1503 :
1504 379 : return result + powi_lookup_cost (val, cache);
1505 : }
1506 :
1507 : /* Recursive subroutine of powi_as_mults. This function takes the
1508 : array, CACHE, of already calculated exponents and an exponent N and
1509 : returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1510 :
1511 : static tree
1512 6113 : powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1513 : unsigned HOST_WIDE_INT n, tree *cache)
1514 : {
1515 6113 : tree op0, op1, ssa_target;
1516 6113 : unsigned HOST_WIDE_INT digit;
1517 6113 : gassign *mult_stmt;
1518 :
1519 6113 : if (n < POWI_TABLE_SIZE && cache[n])
1520 : return cache[n];
1521 :
1522 2166 : ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1523 :
1524 2166 : if (n < POWI_TABLE_SIZE)
1525 : {
1526 2163 : cache[n] = ssa_target;
1527 2163 : op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1528 2163 : op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1529 : }
1530 3 : else if (n & 1)
1531 : {
1532 1 : digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1533 1 : op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1534 1 : op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1535 : }
1536 : else
1537 : {
1538 2 : op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1539 2 : op1 = op0;
1540 : }
1541 :
1542 2166 : mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1543 2166 : gimple_set_location (mult_stmt, loc);
1544 2166 : gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1545 :
1546 2166 : return ssa_target;
1547 : }
1548 :
1549 : /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1550 : This function needs to be kept in sync with powi_cost above. */
1551 :
1552 : tree
1553 1783 : powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1554 : tree arg0, HOST_WIDE_INT n)
1555 : {
1556 1783 : tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1557 1783 : gassign *div_stmt;
1558 1783 : tree target;
1559 :
1560 1783 : if (n == 0)
1561 0 : return build_one_cst (type);
1562 :
1563 1783 : memset (cache, 0, sizeof (cache));
1564 1783 : cache[1] = arg0;
1565 :
1566 1783 : result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache);
1567 1783 : if (n >= 0)
1568 : return result;
1569 :
1570 : /* If the original exponent was negative, reciprocate the result. */
1571 8 : target = make_temp_ssa_name (type, NULL, "powmult");
1572 8 : div_stmt = gimple_build_assign (target, RDIV_EXPR,
1573 : build_real (type, dconst1), result);
1574 8 : gimple_set_location (div_stmt, loc);
1575 8 : gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1576 :
1577 8 : return target;
1578 : }
1579 :
1580 : /* ARG0 and N are the two arguments to a powi builtin in GSI with
1581 : location info LOC. If the arguments are appropriate, create an
1582 : equivalent sequence of statements prior to GSI using an optimal
1583 : number of multiplications, and return an expession holding the
1584 : result. */
1585 :
1586 : static tree
1587 630 : gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1588 : tree arg0, HOST_WIDE_INT n)
1589 : {
1590 630 : if ((n >= -1 && n <= 2)
1591 630 : || (optimize_function_for_speed_p (cfun)
1592 351 : && powi_cost (n) <= POWI_MAX_MULTS))
1593 622 : return powi_as_mults (gsi, loc, arg0, n);
1594 :
1595 : return NULL_TREE;
1596 : }
1597 :
1598 : /* Build a gimple call statement that calls FN with argument ARG.
1599 : Set the lhs of the call statement to a fresh SSA name. Insert the
1600 : statement prior to GSI's current position, and return the fresh
1601 : SSA name. */
1602 :
1603 : static tree
1604 44 : build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1605 : tree fn, tree arg)
1606 : {
1607 44 : gcall *call_stmt;
1608 44 : tree ssa_target;
1609 :
1610 44 : call_stmt = gimple_build_call (fn, 1, arg);
1611 44 : ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1612 44 : gimple_set_lhs (call_stmt, ssa_target);
1613 44 : gimple_set_location (call_stmt, loc);
1614 44 : gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1615 :
1616 44 : return ssa_target;
1617 : }
1618 :
1619 : /* Build a gimple binary operation with the given CODE and arguments
1620 : ARG0, ARG1, assigning the result to a new SSA name for variable
1621 : TARGET. Insert the statement prior to GSI's current position, and
1622 : return the fresh SSA name.*/
1623 :
1624 : static tree
1625 912 : build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1626 : const char *name, enum tree_code code,
1627 : tree arg0, tree arg1)
1628 : {
1629 912 : tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1630 912 : gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1631 912 : gimple_set_location (stmt, loc);
1632 912 : gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1633 912 : return result;
1634 : }
1635 :
1636 : /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1637 : prior to GSI's current position, and return the fresh SSA name. */
1638 :
1639 : static tree
1640 16853 : build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1641 : tree type, tree val)
1642 : {
1643 0 : return gimple_convert (gsi, true, GSI_SAME_STMT, loc, type, val);
1644 : }
1645 :
1646 : struct pow_synth_sqrt_info
1647 : {
1648 : bool *factors;
1649 : unsigned int deepest;
1650 : unsigned int num_mults;
1651 : };
1652 :
1653 : /* Return true iff the real value C can be represented as a
1654 : sum of powers of 0.5 up to N. That is:
1655 : C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1656 : Record in INFO the various parameters of the synthesis algorithm such
1657 : as the factors a[i], the maximum 0.5 power and the number of
1658 : multiplications that will be required. */
1659 :
1660 : bool
1661 33 : representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1662 : struct pow_synth_sqrt_info *info)
1663 : {
1664 33 : REAL_VALUE_TYPE factor = dconsthalf;
1665 33 : REAL_VALUE_TYPE remainder = c;
1666 :
1667 33 : info->deepest = 0;
1668 33 : info->num_mults = 0;
1669 33 : memset (info->factors, 0, n * sizeof (bool));
1670 :
1671 97 : for (unsigned i = 0; i < n; i++)
1672 : {
1673 90 : REAL_VALUE_TYPE res;
1674 :
1675 : /* If something inexact happened bail out now. */
1676 90 : if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1677 26 : return false;
1678 :
1679 : /* We have hit zero. The number is representable as a sum
1680 : of powers of 0.5. */
1681 90 : if (real_equal (&res, &dconst0))
1682 : {
1683 26 : info->factors[i] = true;
1684 26 : info->deepest = i + 1;
1685 26 : return true;
1686 : }
1687 64 : else if (!REAL_VALUE_NEGATIVE (res))
1688 : {
1689 29 : remainder = res;
1690 29 : info->factors[i] = true;
1691 29 : info->num_mults++;
1692 : }
1693 : else
1694 35 : info->factors[i] = false;
1695 :
1696 64 : real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1697 : }
1698 : return false;
1699 : }
1700 :
1701 : /* Return the tree corresponding to FN being applied
1702 : to ARG N times at GSI and LOC.
1703 : Look up previous results from CACHE if need be.
1704 : cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1705 :
1706 : static tree
1707 63 : get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1708 : tree fn, location_t loc, tree *cache)
1709 : {
1710 63 : tree res = cache[n];
1711 63 : if (!res)
1712 : {
1713 40 : tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1714 40 : res = build_and_insert_call (gsi, loc, fn, prev);
1715 40 : cache[n] = res;
1716 : }
1717 :
1718 63 : return res;
1719 : }
1720 :
1721 : /* Print to STREAM the repeated application of function FNAME to ARG
1722 : N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1723 : "foo (foo (x))". */
1724 :
1725 : static void
1726 36 : print_nested_fn (FILE* stream, const char *fname, const char* arg,
1727 : unsigned int n)
1728 : {
1729 36 : if (n == 0)
1730 10 : fprintf (stream, "%s", arg);
1731 : else
1732 : {
1733 26 : fprintf (stream, "%s (", fname);
1734 26 : print_nested_fn (stream, fname, arg, n - 1);
1735 26 : fprintf (stream, ")");
1736 : }
1737 36 : }
1738 :
1739 : /* Print to STREAM the fractional sequence of sqrt chains
1740 : applied to ARG, described by INFO. Used for the dump file. */
1741 :
1742 : static void
1743 7 : dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1744 : struct pow_synth_sqrt_info *info)
1745 : {
1746 29 : for (unsigned int i = 0; i < info->deepest; i++)
1747 : {
1748 22 : bool is_set = info->factors[i];
1749 22 : if (is_set)
1750 : {
1751 10 : print_nested_fn (stream, "sqrt", arg, i + 1);
1752 10 : if (i != info->deepest - 1)
1753 3 : fprintf (stream, " * ");
1754 : }
1755 : }
1756 7 : }
1757 :
1758 : /* Print to STREAM a representation of raising ARG to an integer
1759 : power N. Used for the dump file. */
1760 :
1761 : static void
1762 7 : dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1763 : {
1764 7 : if (n > 1)
1765 3 : fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1766 4 : else if (n == 1)
1767 3 : fprintf (stream, "%s", arg);
1768 7 : }
1769 :
1770 : /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1771 : square roots. Place at GSI and LOC. Limit the maximum depth
1772 : of the sqrt chains to MAX_DEPTH. Return the tree holding the
1773 : result of the expanded sequence or NULL_TREE if the expansion failed.
1774 :
1775 : This routine assumes that ARG1 is a real number with a fractional part
1776 : (the integer exponent case will have been handled earlier in
1777 : gimple_expand_builtin_pow).
1778 :
1779 : For ARG1 > 0.0:
1780 : * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1781 : FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1782 : FRAC_PART == ARG1 - WHOLE_PART:
1783 : Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1784 : POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1785 : if it can be expressed as such, that is if FRAC_PART satisfies:
1786 : FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1787 : where integer a[i] is either 0 or 1.
1788 :
1789 : Example:
1790 : POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1791 : --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1792 :
1793 : For ARG1 < 0.0 there are two approaches:
1794 : * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1795 : is calculated as above.
1796 :
1797 : Example:
1798 : POW (x, -5.625) == 1.0 / POW (x, 5.625)
1799 : --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1800 :
1801 : * (B) : WHOLE_PART := - ceil (abs (ARG1))
1802 : FRAC_PART := ARG1 - WHOLE_PART
1803 : and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1804 : Example:
1805 : POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1806 : --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1807 :
1808 : For ARG1 < 0.0 we choose between (A) and (B) depending on
1809 : how many multiplications we'd have to do.
1810 : So, for the example in (B): POW (x, -5.875), if we were to
1811 : follow algorithm (A) we would produce:
1812 : 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1813 : which contains more multiplications than approach (B).
1814 :
1815 : Hopefully, this approach will eliminate potentially expensive POW library
1816 : calls when unsafe floating point math is enabled and allow the compiler to
1817 : further optimise the multiplies, square roots and divides produced by this
1818 : function. */
1819 :
1820 : static tree
1821 25 : expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1822 : tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1823 : {
1824 25 : tree type = TREE_TYPE (arg0);
1825 25 : machine_mode mode = TYPE_MODE (type);
1826 25 : tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1827 25 : bool one_over = true;
1828 :
1829 25 : if (!sqrtfn)
1830 : return NULL_TREE;
1831 :
1832 25 : if (TREE_CODE (arg1) != REAL_CST)
1833 : return NULL_TREE;
1834 :
1835 25 : REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1836 :
1837 25 : gcc_assert (max_depth > 0);
1838 25 : tree *cache = XALLOCAVEC (tree, max_depth + 1);
1839 :
1840 25 : struct pow_synth_sqrt_info synth_info;
1841 25 : synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1842 25 : synth_info.deepest = 0;
1843 25 : synth_info.num_mults = 0;
1844 :
1845 25 : bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1846 25 : REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1847 :
1848 : /* The whole and fractional parts of exp. */
1849 25 : REAL_VALUE_TYPE whole_part;
1850 25 : REAL_VALUE_TYPE frac_part;
1851 :
1852 25 : real_floor (&whole_part, mode, &exp);
1853 25 : real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1854 :
1855 :
1856 25 : REAL_VALUE_TYPE ceil_whole = dconst0;
1857 25 : REAL_VALUE_TYPE ceil_fract = dconst0;
1858 :
1859 25 : if (neg_exp)
1860 : {
1861 10 : real_ceil (&ceil_whole, mode, &exp);
1862 10 : real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1863 : }
1864 :
1865 25 : if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1866 : return NULL_TREE;
1867 :
1868 : /* Check whether it's more profitable to not use 1.0 / ... */
1869 18 : if (neg_exp)
1870 : {
1871 8 : struct pow_synth_sqrt_info alt_synth_info;
1872 8 : alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1873 8 : alt_synth_info.deepest = 0;
1874 8 : alt_synth_info.num_mults = 0;
1875 :
1876 8 : if (representable_as_half_series_p (ceil_fract, max_depth,
1877 : &alt_synth_info)
1878 8 : && alt_synth_info.deepest <= synth_info.deepest
1879 16 : && alt_synth_info.num_mults < synth_info.num_mults)
1880 : {
1881 2 : whole_part = ceil_whole;
1882 2 : frac_part = ceil_fract;
1883 2 : synth_info.deepest = alt_synth_info.deepest;
1884 2 : synth_info.num_mults = alt_synth_info.num_mults;
1885 2 : memcpy (synth_info.factors, alt_synth_info.factors,
1886 : (max_depth + 1) * sizeof (bool));
1887 2 : one_over = false;
1888 : }
1889 : }
1890 :
1891 18 : HOST_WIDE_INT n = real_to_integer (&whole_part);
1892 18 : REAL_VALUE_TYPE cint;
1893 18 : real_from_integer (&cint, VOIDmode, n, SIGNED);
1894 :
1895 18 : if (!real_identical (&whole_part, &cint))
1896 : return NULL_TREE;
1897 :
1898 18 : if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1899 : return NULL_TREE;
1900 :
1901 18 : memset (cache, 0, (max_depth + 1) * sizeof (tree));
1902 :
1903 18 : tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1904 :
1905 : /* Calculate the integer part of the exponent. */
1906 18 : if (n > 1)
1907 : {
1908 6 : integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1909 6 : if (!integer_res)
1910 : return NULL_TREE;
1911 : }
1912 :
1913 18 : if (dump_file)
1914 : {
1915 7 : char string[64];
1916 :
1917 7 : real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1918 7 : fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1919 :
1920 7 : if (neg_exp)
1921 : {
1922 2 : if (one_over)
1923 : {
1924 1 : fprintf (dump_file, "1.0 / (");
1925 1 : dump_integer_part (dump_file, "x", n);
1926 1 : if (n > 0)
1927 1 : fprintf (dump_file, " * ");
1928 1 : dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1929 1 : fprintf (dump_file, ")");
1930 : }
1931 : else
1932 : {
1933 1 : dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1934 1 : fprintf (dump_file, " / (");
1935 1 : dump_integer_part (dump_file, "x", n);
1936 1 : fprintf (dump_file, ")");
1937 : }
1938 : }
1939 : else
1940 : {
1941 5 : dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1942 5 : if (n > 0)
1943 4 : fprintf (dump_file, " * ");
1944 5 : dump_integer_part (dump_file, "x", n);
1945 : }
1946 :
1947 7 : fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1948 : }
1949 :
1950 :
1951 18 : tree fract_res = NULL_TREE;
1952 18 : cache[0] = arg0;
1953 :
1954 : /* Calculate the fractional part of the exponent. */
1955 58 : for (unsigned i = 0; i < synth_info.deepest; i++)
1956 : {
1957 40 : if (synth_info.factors[i])
1958 : {
1959 23 : tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1960 :
1961 23 : if (!fract_res)
1962 : fract_res = sqrt_chain;
1963 :
1964 : else
1965 5 : fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1966 : fract_res, sqrt_chain);
1967 : }
1968 : }
1969 :
1970 18 : tree res = NULL_TREE;
1971 :
1972 18 : if (neg_exp)
1973 : {
1974 8 : if (one_over)
1975 : {
1976 6 : if (n > 0)
1977 4 : res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1978 : fract_res, integer_res);
1979 : else
1980 : res = fract_res;
1981 :
1982 6 : res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1983 : build_real (type, dconst1), res);
1984 : }
1985 : else
1986 : {
1987 2 : res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1988 : fract_res, integer_res);
1989 : }
1990 : }
1991 : else
1992 10 : res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1993 : fract_res, integer_res);
1994 : return res;
1995 : }
1996 :
1997 : /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1998 : with location info LOC. If possible, create an equivalent and
1999 : less expensive sequence of statements prior to GSI, and return an
2000 : expession holding the result. */
2001 :
2002 : static tree
2003 601 : gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2004 : tree arg0, tree arg1)
2005 : {
2006 601 : REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2007 601 : REAL_VALUE_TYPE c2, dconst3;
2008 601 : HOST_WIDE_INT n;
2009 601 : tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2010 601 : machine_mode mode;
2011 601 : bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2012 601 : bool hw_sqrt_exists, c_is_int, c2_is_int;
2013 :
2014 601 : dconst1_4 = dconst1;
2015 601 : SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2016 :
2017 : /* If the exponent isn't a constant, there's nothing of interest
2018 : to be done. */
2019 601 : if (TREE_CODE (arg1) != REAL_CST)
2020 : return NULL_TREE;
2021 :
2022 : /* Don't perform the operation if flag_signaling_nans is on
2023 : and the operand is a signaling NaN. */
2024 363 : if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2025 363 : && ((TREE_CODE (arg0) == REAL_CST
2026 0 : && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2027 1 : || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2028 0 : return NULL_TREE;
2029 :
2030 363 : if (flag_errno_math)
2031 : return NULL_TREE;
2032 :
2033 : /* If the exponent is equivalent to an integer, expand to an optimal
2034 : multiplication sequence when profitable. */
2035 75 : c = TREE_REAL_CST (arg1);
2036 75 : n = real_to_integer (&c);
2037 75 : real_from_integer (&cint, VOIDmode, n, SIGNED);
2038 75 : c_is_int = real_identical (&c, &cint);
2039 :
2040 75 : if (c_is_int
2041 75 : && ((n >= -1 && n <= 2)
2042 21 : || (flag_unsafe_math_optimizations
2043 11 : && speed_p
2044 11 : && powi_cost (n) <= POWI_MAX_MULTS)))
2045 30 : return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2046 :
2047 : /* Attempt various optimizations using sqrt and cbrt. */
2048 45 : type = TREE_TYPE (arg0);
2049 45 : mode = TYPE_MODE (type);
2050 45 : sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2051 :
2052 : /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2053 : unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2054 : sqrt(-0) = -0. */
2055 45 : if (sqrtfn
2056 45 : && real_equal (&c, &dconsthalf)
2057 52 : && !HONOR_SIGNED_ZEROS (mode))
2058 0 : return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2059 :
2060 45 : hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2061 :
2062 : /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2063 : optimizations since 1./3. is not exactly representable. If x
2064 : is negative and finite, the correct value of pow(x,1./3.) is
2065 : a NaN with the "invalid" exception raised, because the value
2066 : of 1./3. actually has an even denominator. The correct value
2067 : of cbrt(x) is a negative real value. */
2068 45 : cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2069 45 : dconst1_3 = real_value_truncate (mode, dconst_third ());
2070 :
2071 45 : if (flag_unsafe_math_optimizations
2072 25 : && cbrtfn
2073 25 : && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2074 70 : && real_equal (&c, &dconst1_3))
2075 0 : return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2076 :
2077 : /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2078 : if we don't have a hardware sqrt insn. */
2079 45 : dconst1_6 = dconst1_3;
2080 45 : SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2081 :
2082 45 : if (flag_unsafe_math_optimizations
2083 25 : && sqrtfn
2084 25 : && cbrtfn
2085 25 : && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2086 : && speed_p
2087 25 : && hw_sqrt_exists
2088 70 : && real_equal (&c, &dconst1_6))
2089 : {
2090 : /* sqrt(x) */
2091 0 : sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2092 :
2093 : /* cbrt(sqrt(x)) */
2094 0 : return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2095 : }
2096 :
2097 :
2098 : /* Attempt to expand the POW as a product of square root chains.
2099 : Expand the 0.25 case even when otpimising for size. */
2100 45 : if (flag_unsafe_math_optimizations
2101 25 : && sqrtfn
2102 25 : && hw_sqrt_exists
2103 25 : && (speed_p || real_equal (&c, &dconst1_4))
2104 70 : && !HONOR_SIGNED_ZEROS (mode))
2105 : {
2106 50 : unsigned int max_depth = speed_p
2107 25 : ? param_max_pow_sqrt_depth
2108 : : 2;
2109 :
2110 25 : tree expand_with_sqrts
2111 25 : = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2112 :
2113 25 : if (expand_with_sqrts)
2114 : return expand_with_sqrts;
2115 : }
2116 :
2117 27 : real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2118 27 : n = real_to_integer (&c2);
2119 27 : real_from_integer (&cint, VOIDmode, n, SIGNED);
2120 27 : c2_is_int = real_identical (&c2, &cint);
2121 :
2122 : /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2123 :
2124 : powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2125 : 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2126 :
2127 : Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2128 : different from pow(x, 1./3.) due to rounding and behavior with
2129 : negative x, we need to constrain this transformation to unsafe
2130 : math and positive x or finite math. */
2131 27 : real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2132 27 : real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2133 27 : real_round (&c2, mode, &c2);
2134 27 : n = real_to_integer (&c2);
2135 27 : real_from_integer (&cint, VOIDmode, n, SIGNED);
2136 27 : real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2137 27 : real_convert (&c2, mode, &c2);
2138 :
2139 27 : if (flag_unsafe_math_optimizations
2140 7 : && cbrtfn
2141 7 : && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2142 7 : && real_identical (&c2, &c)
2143 4 : && !c2_is_int
2144 4 : && optimize_function_for_speed_p (cfun)
2145 31 : && powi_cost (n / 3) <= POWI_MAX_MULTS)
2146 : {
2147 4 : tree powi_x_ndiv3 = NULL_TREE;
2148 :
2149 : /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2150 : possible or profitable, give up. Skip the degenerate case when
2151 : abs(n) < 3, where the result is always 1. */
2152 4 : if (absu_hwi (n) >= 3)
2153 : {
2154 4 : powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2155 : abs_hwi (n / 3));
2156 4 : if (!powi_x_ndiv3)
2157 : return NULL_TREE;
2158 : }
2159 :
2160 : /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2161 : as that creates an unnecessary variable. Instead, just produce
2162 : either cbrt(x) or cbrt(x) * cbrt(x). */
2163 4 : cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2164 :
2165 4 : if (absu_hwi (n) % 3 == 1)
2166 : powi_cbrt_x = cbrt_x;
2167 : else
2168 2 : powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2169 : cbrt_x, cbrt_x);
2170 :
2171 : /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2172 4 : if (absu_hwi (n) < 3)
2173 : result = powi_cbrt_x;
2174 : else
2175 4 : result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2176 : powi_x_ndiv3, powi_cbrt_x);
2177 :
2178 : /* If n is negative, reciprocate the result. */
2179 4 : if (n < 0)
2180 1 : result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2181 : build_real (type, dconst1), result);
2182 :
2183 4 : return result;
2184 : }
2185 :
2186 : /* No optimizations succeeded. */
2187 : return NULL_TREE;
2188 : }
2189 :
2190 : /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2191 : on the SSA_NAME argument of each of them. */
2192 :
2193 : namespace {
2194 :
2195 : const pass_data pass_data_cse_sincos =
2196 : {
2197 : GIMPLE_PASS, /* type */
2198 : "sincos", /* name */
2199 : OPTGROUP_NONE, /* optinfo_flags */
2200 : TV_TREE_SINCOS, /* tv_id */
2201 : PROP_ssa, /* properties_required */
2202 : 0, /* properties_provided */
2203 : 0, /* properties_destroyed */
2204 : 0, /* todo_flags_start */
2205 : TODO_update_ssa, /* todo_flags_finish */
2206 : };
2207 :
2208 : class pass_cse_sincos : public gimple_opt_pass
2209 : {
2210 : public:
2211 288767 : pass_cse_sincos (gcc::context *ctxt)
2212 577534 : : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2213 : {}
2214 :
2215 : /* opt_pass methods: */
2216 1044325 : bool gate (function *) final override
2217 : {
2218 1044325 : return optimize;
2219 : }
2220 :
2221 : unsigned int execute (function *) final override;
2222 :
2223 : }; // class pass_cse_sincos
2224 :
2225 : unsigned int
2226 1044300 : pass_cse_sincos::execute (function *fun)
2227 : {
2228 1044300 : basic_block bb;
2229 1044300 : bool cfg_changed = false;
2230 :
2231 1044300 : calculate_dominance_info (CDI_DOMINATORS);
2232 1044300 : memset (&sincos_stats, 0, sizeof (sincos_stats));
2233 :
2234 10956312 : FOR_EACH_BB_FN (bb, fun)
2235 : {
2236 9912012 : gimple_stmt_iterator gsi;
2237 :
2238 94457125 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2239 : {
2240 84545113 : gimple *stmt = gsi_stmt (gsi);
2241 :
2242 84545113 : if (is_gimple_call (stmt)
2243 84545113 : && gimple_call_lhs (stmt))
2244 : {
2245 2027089 : tree arg;
2246 2027089 : switch (gimple_call_combined_fn (stmt))
2247 : {
2248 1031 : CASE_CFN_COS:
2249 1031 : CASE_CFN_SIN:
2250 1031 : CASE_CFN_CEXPI:
2251 1031 : arg = gimple_call_arg (stmt, 0);
2252 : /* Make sure we have either sincos or cexp. */
2253 1031 : if (!targetm.libc_has_function (function_c99_math_complex,
2254 1031 : TREE_TYPE (arg))
2255 1031 : && !targetm.libc_has_function (function_sincos,
2256 0 : TREE_TYPE (arg)))
2257 : break;
2258 :
2259 1031 : if (TREE_CODE (arg) == SSA_NAME)
2260 1031 : cfg_changed |= execute_cse_sincos_1 (arg);
2261 : break;
2262 : default:
2263 : break;
2264 : }
2265 : }
2266 : }
2267 : }
2268 :
2269 1044300 : statistics_counter_event (fun, "sincos statements inserted",
2270 : sincos_stats.inserted);
2271 1044300 : statistics_counter_event (fun, "conv statements removed",
2272 : sincos_stats.conv_removed);
2273 :
2274 1044300 : return cfg_changed ? TODO_cleanup_cfg : 0;
2275 : }
2276 :
2277 : } // anon namespace
2278 :
2279 : gimple_opt_pass *
2280 288767 : make_pass_cse_sincos (gcc::context *ctxt)
2281 : {
2282 288767 : return new pass_cse_sincos (ctxt);
2283 : }
2284 :
2285 : /* Expand powi(x,n) into an optimal number of multiplies, when n is a
2286 : constant. */
2287 : namespace {
2288 :
2289 : const pass_data pass_data_expand_pow =
2290 : {
2291 : GIMPLE_PASS, /* type */
2292 : "pow", /* name */
2293 : OPTGROUP_NONE, /* optinfo_flags */
2294 : TV_TREE_POW, /* tv_id */
2295 : PROP_ssa, /* properties_required */
2296 : PROP_gimple_opt_math, /* properties_provided */
2297 : 0, /* properties_destroyed */
2298 : 0, /* todo_flags_start */
2299 : TODO_update_ssa, /* todo_flags_finish */
2300 : };
2301 :
2302 : class pass_expand_pow : public gimple_opt_pass
2303 : {
2304 : public:
2305 288767 : pass_expand_pow (gcc::context *ctxt)
2306 577534 : : gimple_opt_pass (pass_data_expand_pow, ctxt)
2307 : {}
2308 :
2309 : /* opt_pass methods: */
2310 1044325 : bool gate (function *) final override
2311 : {
2312 1044325 : return optimize;
2313 : }
2314 :
2315 : unsigned int execute (function *) final override;
2316 :
2317 : }; // class pass_expand_pow
2318 :
2319 : unsigned int
2320 1044320 : pass_expand_pow::execute (function *fun)
2321 : {
2322 1044320 : basic_block bb;
2323 1044320 : bool cfg_changed = false;
2324 :
2325 1044320 : calculate_dominance_info (CDI_DOMINATORS);
2326 :
2327 10415174 : FOR_EACH_BB_FN (bb, fun)
2328 : {
2329 9370854 : gimple_stmt_iterator gsi;
2330 9370854 : bool cleanup_eh = false;
2331 :
2332 91766711 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2333 : {
2334 82395857 : gimple *stmt = gsi_stmt (gsi);
2335 :
2336 : /* Only the last stmt in a bb could throw, no need to call
2337 : gimple_purge_dead_eh_edges if we change something in the middle
2338 : of a basic block. */
2339 82395857 : cleanup_eh = false;
2340 :
2341 82395857 : if (is_gimple_call (stmt)
2342 82395857 : && gimple_call_lhs (stmt))
2343 : {
2344 2000897 : tree arg0, arg1, result;
2345 2000897 : HOST_WIDE_INT n;
2346 2000897 : location_t loc;
2347 :
2348 2000897 : switch (gimple_call_combined_fn (stmt))
2349 : {
2350 601 : CASE_CFN_POW:
2351 601 : arg0 = gimple_call_arg (stmt, 0);
2352 601 : arg1 = gimple_call_arg (stmt, 1);
2353 :
2354 601 : loc = gimple_location (stmt);
2355 601 : result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2356 :
2357 601 : if (result)
2358 : {
2359 52 : tree lhs = gimple_get_lhs (stmt);
2360 52 : gassign *new_stmt = gimple_build_assign (lhs, result);
2361 52 : gimple_set_location (new_stmt, loc);
2362 52 : unlink_stmt_vdef (stmt);
2363 52 : gsi_replace (&gsi, new_stmt, true);
2364 52 : cleanup_eh = true;
2365 104 : if (gimple_vdef (stmt))
2366 0 : release_ssa_name (gimple_vdef (stmt));
2367 : }
2368 : break;
2369 :
2370 812 : CASE_CFN_POWI:
2371 812 : arg0 = gimple_call_arg (stmt, 0);
2372 812 : arg1 = gimple_call_arg (stmt, 1);
2373 812 : loc = gimple_location (stmt);
2374 :
2375 812 : if (real_minus_onep (arg0))
2376 : {
2377 11 : tree t0, t1, cond, one, minus_one;
2378 11 : gassign *stmt;
2379 :
2380 11 : t0 = TREE_TYPE (arg0);
2381 11 : t1 = TREE_TYPE (arg1);
2382 11 : one = build_real (t0, dconst1);
2383 11 : minus_one = build_real (t0, dconstm1);
2384 :
2385 11 : cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2386 11 : stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2387 : arg1, build_int_cst (t1, 1));
2388 11 : gimple_set_location (stmt, loc);
2389 11 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2390 :
2391 11 : result = make_temp_ssa_name (t0, NULL, "powi");
2392 11 : stmt = gimple_build_assign (result, COND_EXPR, cond,
2393 : minus_one, one);
2394 11 : gimple_set_location (stmt, loc);
2395 11 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2396 : }
2397 : else
2398 : {
2399 801 : if (!tree_fits_shwi_p (arg1))
2400 : break;
2401 :
2402 590 : n = tree_to_shwi (arg1);
2403 590 : result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2404 : }
2405 :
2406 601 : if (result)
2407 : {
2408 593 : tree lhs = gimple_get_lhs (stmt);
2409 593 : gassign *new_stmt = gimple_build_assign (lhs, result);
2410 593 : gimple_set_location (new_stmt, loc);
2411 593 : unlink_stmt_vdef (stmt);
2412 593 : gsi_replace (&gsi, new_stmt, true);
2413 593 : cleanup_eh = true;
2414 82397043 : if (gimple_vdef (stmt))
2415 0 : release_ssa_name (gimple_vdef (stmt));
2416 : }
2417 : break;
2418 :
2419 211 : default:;
2420 : }
2421 : }
2422 : }
2423 9370854 : if (cleanup_eh)
2424 1 : cfg_changed |= gimple_purge_dead_eh_edges (bb);
2425 : }
2426 :
2427 1044320 : return cfg_changed ? TODO_cleanup_cfg : 0;
2428 : }
2429 :
2430 : } // anon namespace
2431 :
2432 : gimple_opt_pass *
2433 288767 : make_pass_expand_pow (gcc::context *ctxt)
2434 : {
2435 288767 : return new pass_expand_pow (ctxt);
2436 : }
2437 :
2438 : /* Return true if stmt is a type conversion operation that can be stripped
2439 : when used in a widening multiply operation. */
2440 : static bool
2441 467841 : widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2442 : {
2443 467841 : enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2444 :
2445 467841 : if (TREE_CODE (result_type) == INTEGER_TYPE)
2446 : {
2447 467841 : tree op_type;
2448 467841 : tree inner_op_type;
2449 :
2450 467841 : if (!CONVERT_EXPR_CODE_P (rhs_code))
2451 : return false;
2452 :
2453 188379 : op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2454 :
2455 : /* If the type of OP has the same precision as the result, then
2456 : we can strip this conversion. The multiply operation will be
2457 : selected to create the correct extension as a by-product. */
2458 188379 : if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2459 : return true;
2460 :
2461 : /* We can also strip a conversion if it preserves the signed-ness of
2462 : the operation and doesn't narrow the range. */
2463 1166 : inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2464 :
2465 : /* If the inner-most type is unsigned, then we can strip any
2466 : intermediate widening operation. If it's signed, then the
2467 : intermediate widening operation must also be signed. */
2468 1166 : if ((TYPE_UNSIGNED (inner_op_type)
2469 1163 : || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2470 2329 : && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2471 : return true;
2472 :
2473 1164 : return false;
2474 : }
2475 :
2476 0 : return rhs_code == FIXED_CONVERT_EXPR;
2477 : }
2478 :
2479 : /* Return true if RHS is a suitable operand for a widening multiplication,
2480 : assuming a target type of TYPE.
2481 : There are two cases:
2482 :
2483 : - RHS makes some value at least twice as wide. Store that value
2484 : in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2485 :
2486 : - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2487 : but leave *TYPE_OUT untouched. */
2488 :
2489 : static bool
2490 923721 : is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2491 : tree *new_rhs_out)
2492 : {
2493 923721 : gimple *stmt;
2494 923721 : tree type1, rhs1;
2495 :
2496 923721 : if (TREE_CODE (rhs) == SSA_NAME)
2497 : {
2498 : /* Use tree_non_zero_bits to see if this operand is zero_extended
2499 : for unsigned widening multiplications or non-negative for
2500 : signed widening multiplications. */
2501 765877 : if (TREE_CODE (type) == INTEGER_TYPE
2502 765877 : && (TYPE_PRECISION (type) & 1) == 0
2503 1531754 : && int_mode_for_size (TYPE_PRECISION (type) / 2, 1).exists ())
2504 : {
2505 759958 : unsigned int prec = TYPE_PRECISION (type);
2506 759958 : unsigned int hprec = prec / 2;
2507 759958 : wide_int bits = wide_int::from (tree_nonzero_bits (rhs), prec,
2508 1519916 : TYPE_SIGN (TREE_TYPE (rhs)));
2509 759958 : if (TYPE_UNSIGNED (type)
2510 1319090 : && wi::bit_and (bits, wi::mask (hprec, true, prec)) == 0)
2511 : {
2512 148772 : *type_out = build_nonstandard_integer_type (hprec, true);
2513 : /* X & MODE_MASK can be simplified to (T)X. */
2514 148772 : stmt = SSA_NAME_DEF_STMT (rhs);
2515 297544 : if (is_gimple_assign (stmt)
2516 129913 : && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
2517 16895 : && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST
2518 181962 : && wide_int::from (wi::to_wide (gimple_assign_rhs2 (stmt)),
2519 16595 : prec, TYPE_SIGN (TREE_TYPE (rhs)))
2520 198557 : == wi::mask (hprec, false, prec))
2521 14819 : *new_rhs_out = gimple_assign_rhs1 (stmt);
2522 : else
2523 133953 : *new_rhs_out = rhs;
2524 148772 : return true;
2525 : }
2526 611186 : else if (!TYPE_UNSIGNED (type)
2527 812012 : && wi::bit_and (bits, wi::mask (hprec - 1, true, prec)) == 0)
2528 : {
2529 24584 : *type_out = build_nonstandard_integer_type (hprec, false);
2530 24584 : *new_rhs_out = rhs;
2531 24584 : return true;
2532 : }
2533 759958 : }
2534 :
2535 592521 : stmt = SSA_NAME_DEF_STMT (rhs);
2536 592521 : if (is_gimple_assign (stmt))
2537 : {
2538 :
2539 467841 : if (widening_mult_conversion_strippable_p (type, stmt))
2540 : {
2541 187215 : rhs1 = gimple_assign_rhs1 (stmt);
2542 :
2543 187215 : if (TREE_CODE (rhs1) == INTEGER_CST)
2544 : {
2545 0 : *new_rhs_out = rhs1;
2546 0 : *type_out = NULL;
2547 0 : return true;
2548 : }
2549 : }
2550 : else
2551 : rhs1 = rhs;
2552 : }
2553 : else
2554 : rhs1 = rhs;
2555 :
2556 592521 : type1 = TREE_TYPE (rhs1);
2557 :
2558 592521 : if (TREE_CODE (type1) != TREE_CODE (type)
2559 592521 : || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2560 : return false;
2561 :
2562 59756 : *new_rhs_out = rhs1;
2563 59756 : *type_out = type1;
2564 59756 : return true;
2565 : }
2566 :
2567 157844 : if (TREE_CODE (rhs) == INTEGER_CST)
2568 : {
2569 157844 : *new_rhs_out = rhs;
2570 157844 : *type_out = NULL;
2571 157844 : return true;
2572 : }
2573 :
2574 : return false;
2575 : }
2576 :
2577 : /* Return true if STMT performs a widening multiplication, assuming the
2578 : output type is TYPE. If so, store the unwidened types of the operands
2579 : in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2580 : *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2581 : and *TYPE2_OUT would give the operands of the multiplication. */
2582 :
2583 : static bool
2584 725387 : is_widening_mult_p (gimple *stmt,
2585 : tree *type1_out, tree *rhs1_out,
2586 : tree *type2_out, tree *rhs2_out)
2587 : {
2588 725387 : tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2589 :
2590 725387 : if (TREE_CODE (type) == INTEGER_TYPE)
2591 : {
2592 725387 : if (TYPE_OVERFLOW_TRAPS (type))
2593 : return false;
2594 : }
2595 0 : else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2596 : return false;
2597 :
2598 725359 : if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2599 : rhs1_out))
2600 : return false;
2601 :
2602 198362 : if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2603 : rhs2_out))
2604 : return false;
2605 :
2606 192594 : if (*type1_out == NULL)
2607 : {
2608 0 : if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2609 : return false;
2610 0 : *type1_out = *type2_out;
2611 : }
2612 :
2613 192594 : if (*type2_out == NULL)
2614 : {
2615 157844 : if (!int_fits_type_p (*rhs2_out, *type1_out))
2616 : return false;
2617 153222 : *type2_out = *type1_out;
2618 : }
2619 :
2620 : /* Ensure that the larger of the two operands comes first. */
2621 187972 : if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2622 : {
2623 87 : std::swap (*type1_out, *type2_out);
2624 87 : std::swap (*rhs1_out, *rhs2_out);
2625 : }
2626 :
2627 : return true;
2628 : }
2629 :
2630 : /* Check to see if the CALL statement is an invocation of copysign
2631 : with 1. being the first argument. */
2632 : static bool
2633 163820 : is_copysign_call_with_1 (gimple *call)
2634 : {
2635 168917 : gcall *c = dyn_cast <gcall *> (call);
2636 5150 : if (! c)
2637 : return false;
2638 :
2639 5150 : enum combined_fn code = gimple_call_combined_fn (c);
2640 :
2641 5150 : if (code == CFN_LAST)
2642 : return false;
2643 :
2644 4202 : if (builtin_fn_p (code))
2645 : {
2646 1189 : switch (as_builtin_fn (code))
2647 : {
2648 30 : CASE_FLT_FN (BUILT_IN_COPYSIGN):
2649 30 : CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2650 30 : return real_onep (gimple_call_arg (c, 0));
2651 : default:
2652 : return false;
2653 : }
2654 : }
2655 :
2656 3013 : if (internal_fn_p (code))
2657 : {
2658 3013 : switch (as_internal_fn (code))
2659 : {
2660 23 : case IFN_COPYSIGN:
2661 23 : return real_onep (gimple_call_arg (c, 0));
2662 : default:
2663 : return false;
2664 : }
2665 : }
2666 :
2667 : return false;
2668 : }
2669 :
2670 : /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2671 : This only happens when the xorsign optab is defined, if the
2672 : pattern is not a xorsign pattern or if expansion fails FALSE is
2673 : returned, otherwise TRUE is returned. */
2674 : static bool
2675 712696 : convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2676 : {
2677 712696 : tree treeop0, treeop1, lhs, type;
2678 712696 : location_t loc = gimple_location (stmt);
2679 712696 : lhs = gimple_assign_lhs (stmt);
2680 712696 : treeop0 = gimple_assign_rhs1 (stmt);
2681 712696 : treeop1 = gimple_assign_rhs2 (stmt);
2682 712696 : type = TREE_TYPE (lhs);
2683 712696 : machine_mode mode = TYPE_MODE (type);
2684 :
2685 712696 : if (HONOR_SNANS (type))
2686 : return false;
2687 :
2688 712173 : if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2689 : {
2690 213814 : gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2691 213814 : if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2692 : {
2693 213788 : call0 = SSA_NAME_DEF_STMT (treeop1);
2694 213788 : if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2695 213771 : return false;
2696 :
2697 : treeop1 = treeop0;
2698 : }
2699 43 : if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2700 : return false;
2701 :
2702 43 : gcall *c = as_a<gcall*> (call0);
2703 43 : treeop0 = gimple_call_arg (c, 1);
2704 :
2705 43 : gcall *call_stmt
2706 43 : = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2707 43 : gimple_set_lhs (call_stmt, lhs);
2708 43 : gimple_set_location (call_stmt, loc);
2709 43 : gsi_replace (gsi, call_stmt, true);
2710 43 : return true;
2711 : }
2712 :
2713 : return false;
2714 : }
2715 :
2716 : /* Process a single gimple statement STMT, which has a MULT_EXPR as
2717 : its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2718 : value is true iff we converted the statement. */
2719 :
2720 : static bool
2721 722770 : convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2722 : {
2723 722770 : tree lhs, rhs1, rhs2, type, type1, type2;
2724 722770 : enum insn_code handler;
2725 722770 : scalar_int_mode to_mode, from_mode, actual_mode;
2726 722770 : optab op;
2727 722770 : int actual_precision;
2728 722770 : location_t loc = gimple_location (stmt);
2729 722770 : bool from_unsigned1, from_unsigned2;
2730 :
2731 722770 : lhs = gimple_assign_lhs (stmt);
2732 722770 : type = TREE_TYPE (lhs);
2733 722770 : if (TREE_CODE (type) != INTEGER_TYPE)
2734 : return false;
2735 :
2736 587676 : if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2737 : return false;
2738 :
2739 : /* if any one of rhs1 and rhs2 is subject to abnormal coalescing,
2740 : avoid the tranform. */
2741 151966 : if ((TREE_CODE (rhs1) == SSA_NAME
2742 151966 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
2743 303931 : || (TREE_CODE (rhs2) == SSA_NAME
2744 24212 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2)))
2745 : return false;
2746 :
2747 151965 : to_mode = SCALAR_INT_TYPE_MODE (type);
2748 151965 : from_mode = SCALAR_INT_TYPE_MODE (type1);
2749 151965 : if (to_mode == from_mode)
2750 : return false;
2751 :
2752 151961 : from_unsigned1 = TYPE_UNSIGNED (type1);
2753 151961 : from_unsigned2 = TYPE_UNSIGNED (type2);
2754 :
2755 151961 : if (from_unsigned1 && from_unsigned2)
2756 : op = umul_widen_optab;
2757 56058 : else if (!from_unsigned1 && !from_unsigned2)
2758 : op = smul_widen_optab;
2759 : else
2760 1911 : op = usmul_widen_optab;
2761 :
2762 151961 : handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2763 : &actual_mode);
2764 :
2765 151961 : if (handler == CODE_FOR_nothing)
2766 : {
2767 141887 : if (op != smul_widen_optab)
2768 : {
2769 : /* We can use a signed multiply with unsigned types as long as
2770 : there is a wider mode to use, or it is the smaller of the two
2771 : types that is unsigned. Note that type1 >= type2, always. */
2772 89251 : if ((TYPE_UNSIGNED (type1)
2773 87553 : && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2774 89251 : || (TYPE_UNSIGNED (type2)
2775 1698 : && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2776 : {
2777 89251 : if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2778 178502 : || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2779 89251 : return false;
2780 : }
2781 :
2782 0 : op = smul_widen_optab;
2783 0 : handler = find_widening_optab_handler_and_mode (op, to_mode,
2784 : from_mode,
2785 : &actual_mode);
2786 :
2787 0 : if (handler == CODE_FOR_nothing)
2788 : return false;
2789 :
2790 : from_unsigned1 = from_unsigned2 = false;
2791 : }
2792 : else
2793 : {
2794 : /* Expand can synthesize smul_widen_optab if the target
2795 : supports umul_widen_optab. */
2796 52636 : op = umul_widen_optab;
2797 52636 : handler = find_widening_optab_handler_and_mode (op, to_mode,
2798 : from_mode,
2799 : &actual_mode);
2800 52636 : if (handler == CODE_FOR_nothing)
2801 : return false;
2802 : }
2803 : }
2804 :
2805 : /* Ensure that the inputs to the handler are in the correct precison
2806 : for the opcode. This will be the full mode size. */
2807 10074 : actual_precision = GET_MODE_PRECISION (actual_mode);
2808 10074 : if (2 * actual_precision > TYPE_PRECISION (type))
2809 : return false;
2810 10074 : if (actual_precision != TYPE_PRECISION (type1)
2811 10074 : || from_unsigned1 != TYPE_UNSIGNED (type1))
2812 : {
2813 8 : if (!useless_type_conversion_p (type1, TREE_TYPE (rhs1)))
2814 : {
2815 0 : if (TREE_CODE (rhs1) == INTEGER_CST)
2816 0 : rhs1 = fold_convert (type1, rhs1);
2817 : else
2818 0 : rhs1 = build_and_insert_cast (gsi, loc, type1, rhs1);
2819 : }
2820 8 : type1 = build_nonstandard_integer_type (actual_precision,
2821 : from_unsigned1);
2822 : }
2823 10074 : if (!useless_type_conversion_p (type1, TREE_TYPE (rhs1)))
2824 : {
2825 9333 : if (TREE_CODE (rhs1) == INTEGER_CST)
2826 0 : rhs1 = fold_convert (type1, rhs1);
2827 : else
2828 9333 : rhs1 = build_and_insert_cast (gsi, loc, type1, rhs1);
2829 : }
2830 10074 : if (actual_precision != TYPE_PRECISION (type2)
2831 10074 : || from_unsigned2 != TYPE_UNSIGNED (type2))
2832 : {
2833 8 : if (!useless_type_conversion_p (type2, TREE_TYPE (rhs2)))
2834 : {
2835 8 : if (TREE_CODE (rhs2) == INTEGER_CST)
2836 8 : rhs2 = fold_convert (type2, rhs2);
2837 : else
2838 0 : rhs2 = build_and_insert_cast (gsi, loc, type2, rhs2);
2839 : }
2840 8 : type2 = build_nonstandard_integer_type (actual_precision,
2841 : from_unsigned2);
2842 : }
2843 10074 : if (!useless_type_conversion_p (type2, TREE_TYPE (rhs2)))
2844 : {
2845 9528 : if (TREE_CODE (rhs2) == INTEGER_CST)
2846 2024 : rhs2 = fold_convert (type2, rhs2);
2847 : else
2848 7504 : rhs2 = build_and_insert_cast (gsi, loc, type2, rhs2);
2849 : }
2850 :
2851 10074 : gimple_assign_set_rhs1 (stmt, rhs1);
2852 10074 : gimple_assign_set_rhs2 (stmt, rhs2);
2853 10074 : gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2854 10074 : update_stmt (stmt);
2855 10074 : widen_mul_stats.widen_mults_inserted++;
2856 10074 : return true;
2857 : }
2858 :
2859 : /* Process a single gimple statement STMT, which is found at the
2860 : iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2861 : rhs (given by CODE), and try to convert it into a
2862 : WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2863 : is true iff we converted the statement. */
2864 :
2865 : static bool
2866 2560768 : convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2867 : enum tree_code code)
2868 : {
2869 2560768 : gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2870 2560768 : gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2871 2560768 : tree type, type1, type2, optype;
2872 2560768 : tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2873 2560768 : enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2874 2560768 : optab this_optab;
2875 2560768 : enum tree_code wmult_code;
2876 2560768 : enum insn_code handler;
2877 2560768 : scalar_mode to_mode, from_mode, actual_mode;
2878 2560768 : location_t loc = gimple_location (stmt);
2879 2560768 : int actual_precision;
2880 2560768 : bool from_unsigned1, from_unsigned2;
2881 :
2882 2560768 : lhs = gimple_assign_lhs (stmt);
2883 2560768 : type = TREE_TYPE (lhs);
2884 2560768 : if ((TREE_CODE (type) != INTEGER_TYPE
2885 398672 : && TREE_CODE (type) != FIXED_POINT_TYPE)
2886 2560768 : || !type_has_mode_precision_p (type))
2887 399750 : return false;
2888 :
2889 2161018 : if (code == MINUS_EXPR)
2890 : wmult_code = WIDEN_MULT_MINUS_EXPR;
2891 : else
2892 1923741 : wmult_code = WIDEN_MULT_PLUS_EXPR;
2893 :
2894 2161018 : rhs1 = gimple_assign_rhs1 (stmt);
2895 2161018 : rhs2 = gimple_assign_rhs2 (stmt);
2896 :
2897 2161018 : if (TREE_CODE (rhs1) == SSA_NAME)
2898 : {
2899 2126078 : rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2900 2126078 : if (is_gimple_assign (rhs1_stmt))
2901 1256309 : rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2902 : }
2903 :
2904 2161018 : if (TREE_CODE (rhs2) == SSA_NAME)
2905 : {
2906 788229 : rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2907 788229 : if (is_gimple_assign (rhs2_stmt))
2908 609394 : rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2909 : }
2910 :
2911 : /* Allow for one conversion statement between the multiply
2912 : and addition/subtraction statement. If there are more than
2913 : one conversions then we assume they would invalidate this
2914 : transformation. If that's not the case then they should have
2915 : been folded before now. */
2916 2161018 : if (CONVERT_EXPR_CODE_P (rhs1_code))
2917 : {
2918 422390 : conv1_stmt = rhs1_stmt;
2919 422390 : rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2920 422390 : if (TREE_CODE (rhs1) == SSA_NAME)
2921 : {
2922 357070 : rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2923 357070 : if (is_gimple_assign (rhs1_stmt))
2924 205452 : rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2925 : }
2926 : else
2927 : return false;
2928 : }
2929 2095698 : if (CONVERT_EXPR_CODE_P (rhs2_code))
2930 : {
2931 198650 : conv2_stmt = rhs2_stmt;
2932 198650 : rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2933 198650 : if (TREE_CODE (rhs2) == SSA_NAME)
2934 : {
2935 187612 : rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2936 187612 : if (is_gimple_assign (rhs2_stmt))
2937 121366 : rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2938 : }
2939 : else
2940 : return false;
2941 : }
2942 :
2943 : /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2944 : is_widening_mult_p, but we still need the rhs returns.
2945 :
2946 : It might also appear that it would be sufficient to use the existing
2947 : operands of the widening multiply, but that would limit the choice of
2948 : multiply-and-accumulate instructions.
2949 :
2950 : If the widened-multiplication result has more than one uses, it is
2951 : probably wiser not to do the conversion. Also restrict this operation
2952 : to single basic block to avoid moving the multiply to a different block
2953 : with a higher execution frequency. */
2954 2084660 : if (code == PLUS_EXPR
2955 1852295 : && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2956 : {
2957 137012 : if (!has_single_use (rhs1)
2958 77500 : || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2959 205369 : || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2960 : &type2, &mult_rhs2))
2961 115581 : return false;
2962 : add_rhs = rhs2;
2963 : conv_stmt = conv1_stmt;
2964 : }
2965 1947648 : else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2966 : {
2967 125786 : if (!has_single_use (rhs2)
2968 76475 : || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2969 195140 : || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2970 : &type2, &mult_rhs2))
2971 111211 : return false;
2972 : add_rhs = rhs1;
2973 : conv_stmt = conv2_stmt;
2974 : }
2975 : else
2976 : return false;
2977 :
2978 36006 : to_mode = SCALAR_TYPE_MODE (type);
2979 36006 : from_mode = SCALAR_TYPE_MODE (type1);
2980 36006 : if (to_mode == from_mode)
2981 : return false;
2982 :
2983 : /* For fixed point types, the mode classes could be different
2984 : so reject that case. */
2985 36003 : if (GET_MODE_CLASS (from_mode) != GET_MODE_CLASS (to_mode))
2986 : return false;
2987 :
2988 36003 : from_unsigned1 = TYPE_UNSIGNED (type1);
2989 36003 : from_unsigned2 = TYPE_UNSIGNED (type2);
2990 36003 : optype = type1;
2991 :
2992 : /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2993 36003 : if (from_unsigned1 != from_unsigned2)
2994 : {
2995 907 : if (!INTEGRAL_TYPE_P (type))
2996 : return false;
2997 : /* We can use a signed multiply with unsigned types as long as
2998 : there is a wider mode to use, or it is the smaller of the two
2999 : types that is unsigned. Note that type1 >= type2, always. */
3000 907 : if ((from_unsigned1
3001 56 : && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3002 907 : || (from_unsigned2
3003 851 : && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3004 : {
3005 1778 : if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
3006 1814 : || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
3007 871 : return false;
3008 : }
3009 :
3010 36 : from_unsigned1 = from_unsigned2 = false;
3011 36 : optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
3012 : false);
3013 : }
3014 :
3015 : /* If there was a conversion between the multiply and addition
3016 : then we need to make sure it fits a multiply-and-accumulate.
3017 : The should be a single mode change which does not change the
3018 : value. */
3019 35132 : if (conv_stmt)
3020 : {
3021 : /* We use the original, unmodified data types for this. */
3022 737 : tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3023 737 : tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3024 737 : int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3025 737 : bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3026 :
3027 737 : if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3028 : {
3029 : /* Conversion is a truncate. */
3030 0 : if (TYPE_PRECISION (to_type) < data_size)
3031 : return false;
3032 : }
3033 737 : else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3034 : {
3035 : /* Conversion is an extend. Check it's the right sort. */
3036 382 : if (TYPE_UNSIGNED (from_type) != is_unsigned
3037 382 : && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3038 : return false;
3039 : }
3040 : /* else convert is a no-op for our purposes. */
3041 : }
3042 :
3043 : /* Verify that the machine can perform a widening multiply
3044 : accumulate in this mode/signedness combination, otherwise
3045 : this transformation is likely to pessimize code. */
3046 34813 : this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3047 34813 : handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3048 : from_mode, &actual_mode);
3049 :
3050 34813 : if (handler == CODE_FOR_nothing)
3051 : return false;
3052 :
3053 : /* Ensure that the inputs to the handler are in the correct precison
3054 : for the opcode. This will be the full mode size. */
3055 0 : actual_precision = GET_MODE_PRECISION (actual_mode);
3056 0 : if (actual_precision != TYPE_PRECISION (type1)
3057 0 : || from_unsigned1 != TYPE_UNSIGNED (type1))
3058 : {
3059 0 : if (!useless_type_conversion_p (type1, TREE_TYPE (mult_rhs1)))
3060 : {
3061 0 : if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3062 0 : mult_rhs1 = fold_convert (type1, mult_rhs1);
3063 : else
3064 0 : mult_rhs1 = build_and_insert_cast (gsi, loc, type1, mult_rhs1);
3065 : }
3066 0 : type1 = build_nonstandard_integer_type (actual_precision,
3067 : from_unsigned1);
3068 : }
3069 0 : if (!useless_type_conversion_p (type1, TREE_TYPE (mult_rhs1)))
3070 : {
3071 0 : if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3072 0 : mult_rhs1 = fold_convert (type1, mult_rhs1);
3073 : else
3074 0 : mult_rhs1 = build_and_insert_cast (gsi, loc, type1, mult_rhs1);
3075 : }
3076 0 : if (actual_precision != TYPE_PRECISION (type2)
3077 0 : || from_unsigned2 != TYPE_UNSIGNED (type2))
3078 : {
3079 0 : if (!useless_type_conversion_p (type2, TREE_TYPE (mult_rhs2)))
3080 : {
3081 0 : if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3082 0 : mult_rhs2 = fold_convert (type2, mult_rhs2);
3083 : else
3084 0 : mult_rhs2 = build_and_insert_cast (gsi, loc, type2, mult_rhs2);
3085 : }
3086 0 : type2 = build_nonstandard_integer_type (actual_precision,
3087 : from_unsigned2);
3088 : }
3089 0 : if (!useless_type_conversion_p (type2, TREE_TYPE (mult_rhs2)))
3090 : {
3091 0 : if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3092 0 : mult_rhs2 = fold_convert (type2, mult_rhs2);
3093 : else
3094 0 : mult_rhs2 = build_and_insert_cast (gsi, loc, type2, mult_rhs2);
3095 : }
3096 :
3097 0 : if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3098 0 : add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3099 :
3100 0 : gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3101 : add_rhs);
3102 0 : update_stmt (gsi_stmt (*gsi));
3103 0 : widen_mul_stats.maccs_inserted++;
3104 0 : return true;
3105 : }
3106 :
3107 : /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3108 : OP2 and which we know is used in statements that can be, together with the
3109 : multiplication, converted to FMAs, perform the transformation. */
3110 :
3111 : static void
3112 17428 : convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
3113 : {
3114 17428 : gimple *use_stmt;
3115 17428 : imm_use_iterator imm_iter;
3116 17428 : gcall *fma_stmt;
3117 :
3118 52340 : FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3119 : {
3120 17484 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3121 17484 : tree addop, mulop1 = op1, result = mul_result;
3122 17484 : bool negate_p = false;
3123 17484 : gimple_seq seq = NULL;
3124 :
3125 17484 : if (is_gimple_debug (use_stmt))
3126 0 : continue;
3127 :
3128 : /* If the use is a type convert, look further into it if the operations
3129 : are the same under two's complement. */
3130 17484 : tree lhs_type;
3131 17484 : if (gimple_assign_cast_p (use_stmt)
3132 0 : && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
3133 17484 : && tree_nop_conversion_p (lhs_type, TREE_TYPE (op1)))
3134 : {
3135 0 : tree cast_lhs = gimple_get_lhs (use_stmt);
3136 0 : gimple *tmp_use;
3137 0 : use_operand_p tmp_use_p;
3138 0 : if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
3139 : {
3140 0 : release_defs (use_stmt);
3141 0 : use_stmt = tmp_use;
3142 0 : result = cast_lhs;
3143 0 : gsi_remove (&gsi, true);
3144 0 : gsi = gsi_for_stmt (use_stmt);
3145 : }
3146 : }
3147 :
3148 17484 : if (is_gimple_assign (use_stmt)
3149 17484 : && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3150 : {
3151 700 : result = gimple_assign_lhs (use_stmt);
3152 700 : use_operand_p use_p;
3153 700 : gimple *neguse_stmt;
3154 700 : single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3155 700 : gsi_remove (&gsi, true);
3156 700 : release_defs (use_stmt);
3157 :
3158 700 : use_stmt = neguse_stmt;
3159 700 : gsi = gsi_for_stmt (use_stmt);
3160 700 : negate_p = true;
3161 : }
3162 :
3163 17484 : tree cond, else_value, ops[3], len, bias;
3164 17484 : tree_code code;
3165 17484 : if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3166 : ops, &else_value,
3167 : &len, &bias))
3168 0 : gcc_unreachable ();
3169 17484 : addop = ops[0] == result ? ops[1] : ops[0];
3170 :
3171 17484 : if (code == MINUS_EXPR)
3172 : {
3173 5747 : if (ops[0] == result)
3174 : /* a * b - c -> a * b + (-c) */
3175 2907 : addop = gimple_build (&seq, NEGATE_EXPR, TREE_TYPE (addop), addop);
3176 : else
3177 : /* a - b * c -> (-b) * c + a */
3178 2840 : negate_p = !negate_p;
3179 : }
3180 :
3181 17484 : if (negate_p)
3182 3540 : mulop1 = gimple_build (&seq, NEGATE_EXPR, TREE_TYPE (mulop1), mulop1);
3183 :
3184 17484 : if (seq)
3185 5742 : gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3186 :
3187 : /* Ensure all the operands are of the same type. Use the type of the
3188 : addend as that's the statement being replaced. */
3189 17484 : op2 = gimple_convert (&gsi, true, GSI_SAME_STMT,
3190 17484 : UNKNOWN_LOCATION, TREE_TYPE (addop), op2);
3191 17484 : mulop1 = gimple_convert (&gsi, true, GSI_SAME_STMT,
3192 17484 : UNKNOWN_LOCATION, TREE_TYPE (addop), mulop1);
3193 :
3194 17484 : if (len)
3195 0 : fma_stmt
3196 0 : = gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2,
3197 : addop, else_value, len, bias);
3198 17484 : else if (cond)
3199 94 : fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3200 : op2, addop, else_value);
3201 : else
3202 17390 : fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3203 17484 : gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3204 17484 : gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3205 : use_stmt));
3206 17484 : gsi_replace (&gsi, fma_stmt, true);
3207 : /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3208 : regardless of where the negation occurs. */
3209 17484 : gimple *orig_stmt = gsi_stmt (gsi);
3210 17484 : if (fold_stmt (&gsi, follow_all_ssa_edges))
3211 : {
3212 5790 : if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3213 0 : gcc_unreachable ();
3214 5790 : update_stmt (gsi_stmt (gsi));
3215 : }
3216 :
3217 17484 : if (dump_file && (dump_flags & TDF_DETAILS))
3218 : {
3219 3 : fprintf (dump_file, "Generated FMA ");
3220 3 : print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3221 3 : fprintf (dump_file, "\n");
3222 : }
3223 :
3224 : /* If the FMA result is negated in a single use, fold the negation
3225 : too. */
3226 17484 : orig_stmt = gsi_stmt (gsi);
3227 17484 : use_operand_p use_p;
3228 17484 : gimple *neg_stmt;
3229 17484 : if (is_gimple_call (orig_stmt)
3230 17484 : && gimple_call_internal_p (orig_stmt)
3231 17484 : && gimple_call_lhs (orig_stmt)
3232 17484 : && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3233 17484 : && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3234 12548 : && is_gimple_assign (neg_stmt)
3235 9923 : && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3236 18837 : && !stmt_could_throw_p (cfun, neg_stmt))
3237 : {
3238 1353 : gsi = gsi_for_stmt (neg_stmt);
3239 1353 : if (fold_stmt (&gsi, follow_all_ssa_edges))
3240 : {
3241 1353 : if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3242 0 : gcc_unreachable ();
3243 1353 : update_stmt (gsi_stmt (gsi));
3244 1353 : if (dump_file && (dump_flags & TDF_DETAILS))
3245 : {
3246 0 : fprintf (dump_file, "Folded FMA negation ");
3247 0 : print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3248 0 : fprintf (dump_file, "\n");
3249 : }
3250 : }
3251 : }
3252 :
3253 17484 : widen_mul_stats.fmas_inserted++;
3254 17428 : }
3255 17428 : }
3256 :
3257 : /* Data necessary to perform the actual transformation from a multiplication
3258 : and an addition to an FMA after decision is taken it should be done and to
3259 : then delete the multiplication statement from the function IL. */
3260 :
3261 : struct fma_transformation_info
3262 : {
3263 : gimple *mul_stmt;
3264 : tree mul_result;
3265 : tree op1;
3266 : tree op2;
3267 : };
3268 :
3269 : /* Structure containing the current state of FMA deferring, i.e. whether we are
3270 : deferring, whether to continue deferring, and all data necessary to come
3271 : back and perform all deferred transformations. */
3272 :
3273 10157378 : class fma_deferring_state
3274 : {
3275 : public:
3276 : /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3277 : do any deferring. */
3278 :
3279 10157378 : fma_deferring_state (bool perform_deferring)
3280 10157378 : : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3281 10157378 : m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3282 :
3283 : /* List of FMA candidates for which we the transformation has been determined
3284 : possible but we at this point in BB analysis we do not consider them
3285 : beneficial. */
3286 : auto_vec<fma_transformation_info, 8> m_candidates;
3287 :
3288 : /* Set of results of multiplication that are part of an already deferred FMA
3289 : candidates. */
3290 : hash_set<tree> m_mul_result_set;
3291 :
3292 : /* The PHI that supposedly feeds back result of a FMA to another over loop
3293 : boundary. */
3294 : gphi *m_initial_phi;
3295 :
3296 : /* Result of the last produced FMA candidate or NULL if there has not been
3297 : one. */
3298 : tree m_last_result;
3299 :
3300 : /* If true, deferring might still be profitable. If false, transform all
3301 : candidates and no longer defer. */
3302 : bool m_deferring_p;
3303 : };
3304 :
3305 : /* Transform all deferred FMA candidates and mark STATE as no longer
3306 : deferring. */
3307 :
3308 : static void
3309 3636494 : cancel_fma_deferring (fma_deferring_state *state)
3310 : {
3311 3636494 : if (!state->m_deferring_p)
3312 : return;
3313 :
3314 2625242 : for (unsigned i = 0; i < state->m_candidates.length (); i++)
3315 : {
3316 906 : if (dump_file && (dump_flags & TDF_DETAILS))
3317 0 : fprintf (dump_file, "Generating deferred FMA\n");
3318 :
3319 906 : const fma_transformation_info &fti = state->m_candidates[i];
3320 906 : convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3321 :
3322 906 : gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3323 906 : gsi_remove (&gsi, true);
3324 906 : release_defs (fti.mul_stmt);
3325 : }
3326 2624336 : state->m_deferring_p = false;
3327 : }
3328 :
3329 : /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3330 : Otherwise return NULL. */
3331 :
3332 : static gphi *
3333 5184 : result_of_phi (tree op)
3334 : {
3335 0 : if (TREE_CODE (op) != SSA_NAME)
3336 : return NULL;
3337 :
3338 5059 : return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3339 : }
3340 :
3341 : /* After processing statements of a BB and recording STATE, return true if the
3342 : initial phi is fed by the last FMA candidate result ore one such result from
3343 : previously processed BBs marked in LAST_RESULT_SET. */
3344 :
3345 : static bool
3346 360 : last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3347 : hash_set<tree> *last_result_set)
3348 : {
3349 360 : ssa_op_iter iter;
3350 360 : use_operand_p use;
3351 888 : FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3352 : {
3353 624 : tree t = USE_FROM_PTR (use);
3354 624 : if (t == state->m_last_result
3355 624 : || last_result_set->contains (t))
3356 96 : return true;
3357 : }
3358 :
3359 : return false;
3360 : }
3361 :
3362 : /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3363 : with uses in additions and subtractions to form fused multiply-add
3364 : operations. Returns true if successful and MUL_STMT should be removed.
3365 : If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3366 : on MUL_COND, otherwise it is unconditional.
3367 :
3368 : If STATE indicates that we are deferring FMA transformation, that means
3369 : that we do not produce FMAs for basic blocks which look like:
3370 :
3371 : <bb 6>
3372 : # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3373 : _65 = _14 * _16;
3374 : accumulator_66 = _65 + accumulator_111;
3375 :
3376 : or its unrolled version, i.e. with several FMA candidates that feed result
3377 : of one into the addend of another. Instead, we add them to a list in STATE
3378 : and if we later discover an FMA candidate that is not part of such a chain,
3379 : we go back and perform all deferred past candidates. */
3380 :
3381 : static bool
3382 712782 : convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3383 : fma_deferring_state *state, tree mul_cond = NULL_TREE,
3384 : tree mul_len = NULL_TREE, tree mul_bias = NULL_TREE)
3385 : {
3386 712782 : tree mul_result = gimple_get_lhs (mul_stmt);
3387 : /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3388 : if the statement was left just for the side-effects. */
3389 712782 : if (!mul_result)
3390 : return false;
3391 712782 : tree type = TREE_TYPE (mul_result);
3392 712782 : gimple *use_stmt, *neguse_stmt;
3393 712782 : use_operand_p use_p;
3394 712782 : imm_use_iterator imm_iter;
3395 :
3396 615722 : if (FLOAT_TYPE_P (type)
3397 737141 : && flag_fp_contract_mode != FP_CONTRACT_FAST)
3398 : return false;
3399 :
3400 : /* We don't want to do bitfield reduction ops. */
3401 707642 : if (INTEGRAL_TYPE_P (type)
3402 707642 : && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3403 : return false;
3404 :
3405 : /* If the target doesn't support it, don't generate it. We assume that
3406 : if fma isn't available then fms, fnma or fnms are not either. */
3407 707453 : optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3408 707453 : if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3409 : return false;
3410 :
3411 : /* If the multiplication has zero uses, it is kept around probably because
3412 : of -fnon-call-exceptions. Don't optimize it away in that case,
3413 : it is DCE job. */
3414 23141 : if (has_zero_uses (mul_result))
3415 : return false;
3416 :
3417 23141 : bool check_defer
3418 23141 : = (state->m_deferring_p
3419 23141 : && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3420 23141 : param_avoid_fma_max_bits));
3421 23141 : bool defer = check_defer;
3422 23141 : bool seen_negate_p = false;
3423 :
3424 : /* There is no numerical difference between fused and unfused integer FMAs,
3425 : and the assumption below that FMA is as cheap as addition is unlikely
3426 : to be true, especially if the multiplication occurs multiple times on
3427 : the same chain. E.g., for something like:
3428 :
3429 : (((a * b) + c) >> 1) + (a * b)
3430 :
3431 : we do not want to duplicate the a * b into two additions, not least
3432 : because the result is not a natural FMA chain. */
3433 23141 : if (ANY_INTEGRAL_TYPE_P (type)
3434 23141 : && !has_single_use (mul_result))
3435 : return false;
3436 :
3437 23141 : if (!dbg_cnt (form_fma))
3438 : return false;
3439 :
3440 : /* Make sure that the multiplication statement becomes dead after
3441 : the transformation, thus that all uses are transformed to FMAs.
3442 : This means we assume that an FMA operation has the same cost
3443 : as an addition. */
3444 41409 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3445 : {
3446 23885 : tree result = mul_result;
3447 23885 : bool negate_p = false;
3448 :
3449 23885 : use_stmt = USE_STMT (use_p);
3450 :
3451 23885 : if (is_gimple_debug (use_stmt))
3452 214 : continue;
3453 :
3454 : /* If the use is a type convert, look further into it if the operations
3455 : are the same under two's complement. */
3456 23671 : tree lhs_type;
3457 23671 : if (gimple_assign_cast_p (use_stmt)
3458 295 : && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
3459 23966 : && tree_nop_conversion_p (lhs_type, TREE_TYPE (op1)))
3460 : {
3461 0 : tree cast_lhs = gimple_get_lhs (use_stmt);
3462 0 : gimple *tmp_use;
3463 0 : use_operand_p tmp_use_p;
3464 0 : if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
3465 0 : use_stmt = tmp_use;
3466 0 : result = cast_lhs;
3467 : }
3468 :
3469 : /* For now restrict this operations to single basic blocks. In theory
3470 : we would want to support sinking the multiplication in
3471 : m = a*b;
3472 : if ()
3473 : ma = m + c;
3474 : else
3475 : d = m;
3476 : to form a fma in the then block and sink the multiplication to the
3477 : else block. */
3478 23671 : if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3479 5617 : return false;
3480 :
3481 : /* A negate on the multiplication leads to FNMA. */
3482 22820 : if (is_gimple_assign (use_stmt)
3483 22820 : && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3484 : {
3485 706 : ssa_op_iter iter;
3486 706 : use_operand_p usep;
3487 :
3488 : /* If (due to earlier missed optimizations) we have two
3489 : negates of the same value, treat them as equivalent
3490 : to a single negate with multiple uses. */
3491 706 : if (seen_negate_p)
3492 0 : return false;
3493 :
3494 706 : result = gimple_assign_lhs (use_stmt);
3495 :
3496 : /* Make sure the negate statement becomes dead with this
3497 : single transformation. */
3498 706 : if (!single_imm_use (gimple_assign_lhs (use_stmt),
3499 : &use_p, &neguse_stmt))
3500 : return false;
3501 :
3502 : /* Make sure the multiplication isn't also used on that stmt. */
3503 2836 : FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3504 1424 : if (USE_FROM_PTR (usep) == mul_result)
3505 : return false;
3506 :
3507 : /* Re-validate. */
3508 706 : use_stmt = neguse_stmt;
3509 706 : if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3510 : return false;
3511 :
3512 706 : negate_p = seen_negate_p = true;
3513 : }
3514 :
3515 22820 : tree cond, else_value, ops[3], len, bias;
3516 22820 : tree_code code;
3517 22820 : if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3518 : &else_value, &len, &bias))
3519 : return false;
3520 :
3521 : /* The multiplication result must be one of the addition operands. */
3522 20227 : if (ops[0] != result && ops[1] != result)
3523 : return false;
3524 :
3525 19716 : switch (code)
3526 : {
3527 5753 : case MINUS_EXPR:
3528 5753 : if (ops[1] == result)
3529 2840 : negate_p = !negate_p;
3530 : break;
3531 : case PLUS_EXPR:
3532 : break;
3533 : default:
3534 : /* FMA can only be formed from PLUS and MINUS. */
3535 : return false;
3536 : }
3537 :
3538 18076 : if (len)
3539 : {
3540 : /* For COND_LEN_* operations, we may have dummpy mask which is
3541 : the all true mask. Such TREE type may be mul_cond != cond
3542 : but we still consider they are equal. */
3543 0 : if (mul_cond && cond != mul_cond
3544 0 : && !(integer_truep (mul_cond) && integer_truep (cond)))
3545 0 : return false;
3546 :
3547 0 : if (else_value == result)
3548 : return false;
3549 :
3550 0 : if (!direct_internal_fn_supported_p (IFN_COND_LEN_FMA, type,
3551 : opt_type))
3552 : return false;
3553 :
3554 0 : if (mul_len)
3555 : {
3556 0 : poly_int64 mul_value, value;
3557 0 : if (poly_int_tree_p (mul_len, &mul_value)
3558 0 : && poly_int_tree_p (len, &value)
3559 0 : && maybe_ne (mul_value, value))
3560 0 : return false;
3561 0 : else if (mul_len != len)
3562 : return false;
3563 :
3564 0 : if (wi::to_widest (mul_bias) != wi::to_widest (bias))
3565 : return false;
3566 : }
3567 : }
3568 : else
3569 : {
3570 18076 : if (mul_cond && cond != mul_cond)
3571 : return false;
3572 :
3573 18064 : if (cond)
3574 : {
3575 104 : if (cond == result || else_value == result)
3576 : return false;
3577 94 : if (!direct_internal_fn_supported_p (IFN_COND_FMA, type,
3578 : opt_type))
3579 : return false;
3580 : }
3581 : }
3582 :
3583 : /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3584 : we'll visit later, we might be able to get a more profitable
3585 : match with fnma.
3586 : OTOH, if we don't, a negate / fma pair has likely lower latency
3587 : that a mult / subtract pair. */
3588 18054 : if (code == MINUS_EXPR
3589 5747 : && !negate_p
3590 2207 : && ops[0] == result
3591 2207 : && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3592 0 : && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3593 0 : && TREE_CODE (ops[1]) == SSA_NAME
3594 18054 : && has_single_use (ops[1]))
3595 : {
3596 0 : gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3597 0 : if (is_gimple_assign (stmt2)
3598 0 : && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3599 : return false;
3600 : }
3601 :
3602 : /* We can't handle a * b + a * b. */
3603 18054 : if (ops[0] == ops[1])
3604 : return false;
3605 : /* If deferring, make sure we are not looking at an instruction that
3606 : wouldn't have existed if we were not. */
3607 18054 : if (state->m_deferring_p
3608 18054 : && (state->m_mul_result_set.contains (ops[0])
3609 6344 : || state->m_mul_result_set.contains (ops[1])))
3610 0 : return false;
3611 :
3612 18054 : if (check_defer)
3613 : {
3614 6202 : tree use_lhs = gimple_get_lhs (use_stmt);
3615 6202 : if (state->m_last_result)
3616 : {
3617 1018 : if (ops[1] == state->m_last_result
3618 1018 : || ops[0] == state->m_last_result)
3619 : defer = true;
3620 : else
3621 6202 : defer = false;
3622 : }
3623 : else
3624 : {
3625 5184 : gcc_checking_assert (!state->m_initial_phi);
3626 5184 : gphi *phi;
3627 5184 : if (ops[0] == result)
3628 3269 : phi = result_of_phi (ops[1]);
3629 : else
3630 : {
3631 1915 : gcc_assert (ops[1] == result);
3632 1915 : phi = result_of_phi (ops[0]);
3633 : }
3634 :
3635 : if (phi)
3636 : {
3637 928 : state->m_initial_phi = phi;
3638 928 : defer = true;
3639 : }
3640 : else
3641 : defer = false;
3642 : }
3643 :
3644 6202 : state->m_last_result = use_lhs;
3645 6202 : check_defer = false;
3646 : }
3647 : else
3648 : defer = false;
3649 :
3650 : /* While it is possible to validate whether or not the exact form that
3651 : we've recognized is available in the backend, the assumption is that
3652 : if the deferring logic above did not trigger, the transformation is
3653 : never a loss. For instance, suppose the target only has the plain FMA
3654 : pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3655 : MUL+SUB for FMA+NEG, which is still two operations. Consider
3656 : -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3657 : form the two NEGs are independent and could be run in parallel. */
3658 5617 : }
3659 :
3660 17524 : if (defer)
3661 : {
3662 1002 : fma_transformation_info fti;
3663 1002 : fti.mul_stmt = mul_stmt;
3664 1002 : fti.mul_result = mul_result;
3665 1002 : fti.op1 = op1;
3666 1002 : fti.op2 = op2;
3667 1002 : state->m_candidates.safe_push (fti);
3668 1002 : state->m_mul_result_set.add (mul_result);
3669 :
3670 1002 : if (dump_file && (dump_flags & TDF_DETAILS))
3671 : {
3672 0 : fprintf (dump_file, "Deferred generating FMA for multiplication ");
3673 0 : print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3674 0 : fprintf (dump_file, "\n");
3675 : }
3676 :
3677 1002 : return false;
3678 : }
3679 : else
3680 : {
3681 16522 : if (state->m_deferring_p)
3682 4844 : cancel_fma_deferring (state);
3683 16522 : convert_mult_to_fma_1 (mul_result, op1, op2);
3684 16522 : return true;
3685 : }
3686 : }
3687 :
3688 :
3689 : /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3690 : a check for non-zero like:
3691 : _1 = x_4(D) * y_5(D);
3692 : *res_7(D) = _1;
3693 : if (x_4(D) != 0)
3694 : goto <bb 3>; [50.00%]
3695 : else
3696 : goto <bb 4>; [50.00%]
3697 :
3698 : <bb 3> [local count: 536870913]:
3699 : _2 = _1 / x_4(D);
3700 : _9 = _2 != y_5(D);
3701 : _10 = (int) _9;
3702 :
3703 : <bb 4> [local count: 1073741824]:
3704 : # iftmp.0_3 = PHI <_10(3), 0(2)>
3705 : then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3706 : optimize the x_4(D) != 0 condition to 1. */
3707 :
3708 : static void
3709 145 : maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3710 : gimple *div_stmt, bool *cfg_changed)
3711 : {
3712 145 : basic_block bb = gimple_bb (cond_stmt);
3713 290 : if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3714 51 : return;
3715 145 : edge pred_edge = single_pred_edge (bb);
3716 145 : basic_block pred_bb = pred_edge->src;
3717 145 : if (EDGE_COUNT (pred_bb->succs) != 2)
3718 : return;
3719 102 : edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3720 102 : edge other_succ_edge = NULL;
3721 102 : if (gimple_code (cond_stmt) == GIMPLE_COND)
3722 : {
3723 48 : if (EDGE_COUNT (bb->succs) != 2)
3724 : return;
3725 48 : other_succ_edge = EDGE_SUCC (bb, 0);
3726 48 : if (gimple_cond_code (cond_stmt) == NE_EXPR)
3727 : {
3728 24 : if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3729 24 : other_succ_edge = EDGE_SUCC (bb, 1);
3730 : }
3731 : else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3732 48 : other_succ_edge = EDGE_SUCC (bb, 0);
3733 48 : if (other_edge->dest != other_succ_edge->dest)
3734 : return;
3735 : }
3736 105 : else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3737 : return;
3738 202 : gcond *zero_cond = safe_dyn_cast <gcond *> (*gsi_last_bb (pred_bb));
3739 101 : if (zero_cond == NULL
3740 101 : || (gimple_cond_code (zero_cond)
3741 101 : != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3742 101 : || !integer_zerop (gimple_cond_rhs (zero_cond)))
3743 0 : return;
3744 101 : tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3745 : if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3746 : return;
3747 101 : if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3748 : {
3749 : /* Allow the divisor to be result of a same precision cast
3750 : from zero_cond_lhs. */
3751 53 : tree rhs2 = gimple_assign_rhs2 (div_stmt);
3752 53 : if (TREE_CODE (rhs2) != SSA_NAME)
3753 : return;
3754 53 : gimple *g = SSA_NAME_DEF_STMT (rhs2);
3755 53 : if (!gimple_assign_cast_p (g)
3756 53 : || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3757 53 : || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3758 106 : || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3759 53 : != TYPE_PRECISION (TREE_TYPE (rhs2))))
3760 : return;
3761 : }
3762 101 : gimple_stmt_iterator gsi = gsi_after_labels (bb);
3763 101 : mul_stmts.quick_push (div_stmt);
3764 101 : if (is_gimple_debug (gsi_stmt (gsi)))
3765 0 : gsi_next_nondebug (&gsi);
3766 : unsigned cast_count = 0;
3767 635 : while (gsi_stmt (gsi) != cond_stmt)
3768 : {
3769 : /* If original mul_stmt has a single use, allow it in the same bb,
3770 : we are looking then just at __builtin_mul_overflow_p.
3771 : Though, in that case the original mul_stmt will be replaced
3772 : by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3773 : gimple *mul_stmt;
3774 : unsigned int i;
3775 2274 : bool ok = false;
3776 2274 : FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3777 : {
3778 2127 : if (gsi_stmt (gsi) == mul_stmt)
3779 : {
3780 : ok = true;
3781 : break;
3782 : }
3783 : }
3784 534 : if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3785 : ok = true;
3786 387 : if (!ok)
3787 51 : return;
3788 534 : gsi_next_nondebug (&gsi);
3789 : }
3790 101 : if (gimple_code (cond_stmt) == GIMPLE_COND)
3791 : {
3792 47 : basic_block succ_bb = other_edge->dest;
3793 75 : for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3794 28 : gsi_next (&gpi))
3795 : {
3796 35 : gphi *phi = gpi.phi ();
3797 35 : tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3798 35 : tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3799 35 : if (!operand_equal_p (v1, v2, 0))
3800 7 : return;
3801 : }
3802 : }
3803 : else
3804 : {
3805 54 : tree lhs = gimple_assign_lhs (cond_stmt);
3806 54 : if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3807 : return;
3808 54 : gsi_next_nondebug (&gsi);
3809 54 : if (!gsi_end_p (gsi))
3810 : {
3811 54 : if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3812 : return;
3813 54 : gimple *cast_stmt = gsi_stmt (gsi);
3814 54 : if (!gimple_assign_cast_p (cast_stmt))
3815 : return;
3816 54 : tree new_lhs = gimple_assign_lhs (cast_stmt);
3817 54 : gsi_next_nondebug (&gsi);
3818 54 : if (!gsi_end_p (gsi)
3819 54 : || !new_lhs
3820 54 : || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3821 108 : || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3822 : return;
3823 : lhs = new_lhs;
3824 : }
3825 54 : edge succ_edge = single_succ_edge (bb);
3826 54 : basic_block succ_bb = succ_edge->dest;
3827 54 : gsi = gsi_start_phis (succ_bb);
3828 54 : if (gsi_end_p (gsi))
3829 : return;
3830 54 : gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3831 54 : gsi_next (&gsi);
3832 54 : if (!gsi_end_p (gsi))
3833 : return;
3834 54 : if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3835 : return;
3836 54 : tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3837 54 : if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3838 : {
3839 0 : tree cond = gimple_assign_rhs1 (cond_stmt);
3840 0 : if (TREE_CODE (cond) == NE_EXPR)
3841 : {
3842 0 : if (!operand_equal_p (other_val,
3843 0 : gimple_assign_rhs3 (cond_stmt), 0))
3844 : return;
3845 : }
3846 0 : else if (!operand_equal_p (other_val,
3847 0 : gimple_assign_rhs2 (cond_stmt), 0))
3848 : return;
3849 : }
3850 54 : else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3851 : {
3852 25 : if (!integer_zerop (other_val))
3853 : return;
3854 : }
3855 29 : else if (!integer_onep (other_val))
3856 : return;
3857 : }
3858 94 : if (pred_edge->flags & EDGE_TRUE_VALUE)
3859 41 : gimple_cond_make_true (zero_cond);
3860 : else
3861 53 : gimple_cond_make_false (zero_cond);
3862 94 : update_stmt (zero_cond);
3863 94 : reset_flow_sensitive_info_in_bb (bb);
3864 94 : *cfg_changed = true;
3865 : }
3866 :
3867 : /* Helper function for arith_overflow_check_p. Return true
3868 : if VAL1 is equal to VAL2 cast to corresponding integral type
3869 : with other signedness or vice versa. */
3870 :
3871 : static bool
3872 382 : arith_cast_equal_p (tree val1, tree val2)
3873 : {
3874 382 : if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3875 65 : return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3876 317 : else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3877 : return false;
3878 280 : if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3879 280 : && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3880 : return true;
3881 168 : if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3882 168 : && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3883 120 : return true;
3884 : return false;
3885 : }
3886 :
3887 : /* Helper function of match_arith_overflow. Return 1
3888 : if USE_STMT is unsigned overflow check ovf != 0 for
3889 : STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3890 : and 0 otherwise. */
3891 :
3892 : static int
3893 2909526 : arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3894 : tree maxval, tree *other)
3895 : {
3896 2909526 : enum tree_code ccode = ERROR_MARK;
3897 2909526 : tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3898 2909526 : enum tree_code code = gimple_assign_rhs_code (stmt);
3899 5783478 : tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3900 2909526 : tree rhs1 = gimple_assign_rhs1 (stmt);
3901 2909526 : tree rhs2 = gimple_assign_rhs2 (stmt);
3902 2909526 : tree multop = NULL_TREE, divlhs = NULL_TREE;
3903 2909526 : gimple *cur_use_stmt = use_stmt;
3904 :
3905 2909526 : if (code == MULT_EXPR)
3906 : {
3907 665276 : if (!is_gimple_assign (use_stmt))
3908 664980 : return 0;
3909 534332 : if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3910 : return 0;
3911 2187 : if (gimple_assign_rhs1 (use_stmt) != lhs)
3912 : return 0;
3913 2124 : if (cast_stmt)
3914 : {
3915 155 : if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3916 : multop = rhs2;
3917 81 : else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3918 : multop = rhs1;
3919 : else
3920 : return 0;
3921 : }
3922 1969 : else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3923 : multop = rhs2;
3924 1891 : else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3925 : multop = rhs1;
3926 : else
3927 : return 0;
3928 300 : if (stmt_ends_bb_p (use_stmt))
3929 : return 0;
3930 300 : divlhs = gimple_assign_lhs (use_stmt);
3931 300 : if (!divlhs)
3932 : return 0;
3933 300 : use_operand_p use;
3934 300 : if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3935 : return 0;
3936 296 : if (cast_stmt && gimple_assign_cast_p (cur_use_stmt))
3937 : {
3938 4 : tree cast_lhs = gimple_assign_lhs (cur_use_stmt);
3939 8 : if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3940 4 : && TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3941 4 : && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3942 4 : == TYPE_PRECISION (TREE_TYPE (divlhs)))
3943 8 : && single_imm_use (cast_lhs, &use, &cur_use_stmt))
3944 : {
3945 : cast_stmt = NULL;
3946 : divlhs = cast_lhs;
3947 : }
3948 : else
3949 0 : return 0;
3950 : }
3951 : }
3952 2244546 : if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3953 : {
3954 567424 : ccode = gimple_cond_code (cur_use_stmt);
3955 567424 : crhs1 = gimple_cond_lhs (cur_use_stmt);
3956 567424 : crhs2 = gimple_cond_rhs (cur_use_stmt);
3957 : }
3958 1677122 : else if (is_gimple_assign (cur_use_stmt))
3959 : {
3960 818350 : if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3961 : {
3962 492807 : ccode = gimple_assign_rhs_code (cur_use_stmt);
3963 492807 : crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3964 492807 : crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3965 : }
3966 325543 : else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3967 : {
3968 4867 : tree cond = gimple_assign_rhs1 (cur_use_stmt);
3969 4867 : if (COMPARISON_CLASS_P (cond))
3970 : {
3971 0 : ccode = TREE_CODE (cond);
3972 0 : crhs1 = TREE_OPERAND (cond, 0);
3973 0 : crhs2 = TREE_OPERAND (cond, 1);
3974 : }
3975 : else
3976 : return 0;
3977 : }
3978 : else
3979 : return 0;
3980 : }
3981 : else
3982 : return 0;
3983 :
3984 1060231 : if (maxval
3985 1060231 : && ccode == RSHIFT_EXPR
3986 97 : && crhs1 == lhs
3987 17 : && TREE_CODE (crhs2) == INTEGER_CST
3988 1060248 : && wi::to_widest (crhs2) == TYPE_PRECISION (TREE_TYPE (maxval)))
3989 : {
3990 16 : tree shiftlhs = gimple_assign_lhs (use_stmt);
3991 16 : if (!shiftlhs)
3992 : return 0;
3993 16 : use_operand_p use;
3994 16 : if (!single_imm_use (shiftlhs, &use, &cur_use_stmt))
3995 : return 0;
3996 12 : if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3997 : {
3998 0 : ccode = gimple_cond_code (cur_use_stmt);
3999 0 : crhs1 = gimple_cond_lhs (cur_use_stmt);
4000 0 : crhs2 = gimple_cond_rhs (cur_use_stmt);
4001 : }
4002 12 : else if (is_gimple_assign (cur_use_stmt))
4003 : {
4004 12 : if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
4005 : {
4006 0 : ccode = gimple_assign_rhs_code (cur_use_stmt);
4007 0 : crhs1 = gimple_assign_rhs1 (cur_use_stmt);
4008 0 : crhs2 = gimple_assign_rhs2 (cur_use_stmt);
4009 : }
4010 12 : else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
4011 : {
4012 0 : tree cond = gimple_assign_rhs1 (cur_use_stmt);
4013 0 : if (COMPARISON_CLASS_P (cond))
4014 : {
4015 0 : ccode = TREE_CODE (cond);
4016 0 : crhs1 = TREE_OPERAND (cond, 0);
4017 0 : crhs2 = TREE_OPERAND (cond, 1);
4018 : }
4019 : else
4020 : return 0;
4021 : }
4022 : else
4023 : {
4024 12 : enum tree_code sc = gimple_assign_rhs_code (cur_use_stmt);
4025 12 : tree castlhs = gimple_assign_lhs (cur_use_stmt);
4026 12 : if (!CONVERT_EXPR_CODE_P (sc)
4027 12 : || !castlhs
4028 12 : || !INTEGRAL_TYPE_P (TREE_TYPE (castlhs))
4029 24 : || (TYPE_PRECISION (TREE_TYPE (castlhs))
4030 12 : > TYPE_PRECISION (TREE_TYPE (maxval))))
4031 : return 0;
4032 : return 1;
4033 : }
4034 : }
4035 : else
4036 : return 0;
4037 0 : if ((ccode != EQ_EXPR && ccode != NE_EXPR)
4038 0 : || crhs1 != shiftlhs
4039 0 : || !integer_zerop (crhs2))
4040 0 : return 0;
4041 : return 1;
4042 : }
4043 :
4044 1060215 : if (TREE_CODE_CLASS (ccode) != tcc_comparison)
4045 : return 0;
4046 :
4047 603518 : switch (ccode)
4048 : {
4049 114824 : case GT_EXPR:
4050 114824 : case LE_EXPR:
4051 114824 : if (maxval)
4052 : {
4053 : /* r = a + b; r > maxval or r <= maxval */
4054 45 : if (crhs1 == lhs
4055 44 : && TREE_CODE (crhs2) == INTEGER_CST
4056 67 : && tree_int_cst_equal (crhs2, maxval))
4057 12 : return ccode == GT_EXPR ? 1 : -1;
4058 : break;
4059 : }
4060 : /* r = a - b; r > a or r <= a
4061 : r = a + b; a > r or a <= r or b > r or b <= r. */
4062 114779 : if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
4063 114717 : || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
4064 8777 : && crhs2 == lhs))
4065 9179 : return ccode == GT_EXPR ? 1 : -1;
4066 : /* r = ~a; b > r or b <= r. */
4067 105940 : if (code == BIT_NOT_EXPR && crhs2 == lhs)
4068 : {
4069 190 : if (other)
4070 95 : *other = crhs1;
4071 222 : return ccode == GT_EXPR ? 1 : -1;
4072 : }
4073 : break;
4074 62526 : case LT_EXPR:
4075 62526 : case GE_EXPR:
4076 62526 : if (maxval)
4077 : break;
4078 : /* r = a - b; a < r or a >= r
4079 : r = a + b; r < a or r >= a or r < b or r >= b. */
4080 62520 : if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
4081 62380 : || (code == PLUS_EXPR && crhs1 == lhs
4082 30652 : && (crhs2 == rhs1 || crhs2 == rhs2)))
4083 4139 : return ccode == LT_EXPR ? 1 : -1;
4084 : /* r = ~a; r < b or r >= b. */
4085 58421 : if (code == BIT_NOT_EXPR && crhs1 == lhs)
4086 : {
4087 167 : if (other)
4088 92 : *other = crhs2;
4089 219 : return ccode == LT_EXPR ? 1 : -1;
4090 : }
4091 : break;
4092 426168 : case EQ_EXPR:
4093 426168 : case NE_EXPR:
4094 : /* r = a * b; _1 = r / a; _1 == b
4095 : r = a * b; _1 = r / b; _1 == a
4096 : r = a * b; _1 = r / a; _1 != b
4097 : r = a * b; _1 = r / b; _1 != a. */
4098 426168 : if (code == MULT_EXPR)
4099 : {
4100 293 : if (cast_stmt)
4101 : {
4102 146 : if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
4103 146 : || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
4104 : {
4105 146 : use_stmt = cur_use_stmt;
4106 216 : return ccode == NE_EXPR ? 1 : -1;
4107 : }
4108 : }
4109 96 : else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
4110 147 : || (crhs2 == divlhs && crhs1 == multop))
4111 : {
4112 147 : use_stmt = cur_use_stmt;
4113 223 : return ccode == NE_EXPR ? 1 : -1;
4114 : }
4115 : }
4116 : break;
4117 : default:
4118 : break;
4119 : }
4120 : return 0;
4121 : }
4122 :
4123 : extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
4124 : extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
4125 : extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
4126 : extern bool gimple_unsigned_integer_sat_mul (tree, tree*, tree (*)(tree));
4127 :
4128 : extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
4129 : extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
4130 : extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree));
4131 :
4132 : static void
4133 159 : build_saturation_binary_arith_call_and_replace (gimple_stmt_iterator *gsi,
4134 : internal_fn fn, tree lhs,
4135 : tree op_0, tree op_1)
4136 : {
4137 159 : if (direct_internal_fn_supported_p (fn, TREE_TYPE (op_0), OPTIMIZE_FOR_BOTH))
4138 : {
4139 157 : gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
4140 157 : gimple_call_set_lhs (call, lhs);
4141 157 : gsi_replace (gsi, call, /* update_eh_info */ true);
4142 : }
4143 159 : }
4144 :
4145 : static bool
4146 51 : build_saturation_binary_arith_call_and_insert (gimple_stmt_iterator *gsi,
4147 : internal_fn fn, tree lhs,
4148 : tree op_0, tree op_1)
4149 : {
4150 51 : if (!direct_internal_fn_supported_p (fn, TREE_TYPE (op_0), OPTIMIZE_FOR_BOTH))
4151 : return false;
4152 :
4153 43 : gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
4154 43 : gimple_call_set_lhs (call, lhs);
4155 43 : gsi_insert_before (gsi, call, GSI_SAME_STMT);
4156 :
4157 43 : return true;
4158 : }
4159 :
4160 : /*
4161 : * Try to match saturation unsigned add with assign.
4162 : * _7 = _4 + _6;
4163 : * _8 = _4 > _7;
4164 : * _9 = (long unsigned int) _8;
4165 : * _10 = -_9;
4166 : * _12 = _7 | _10;
4167 : * =>
4168 : * _12 = .SAT_ADD (_4, _6);
4169 : *
4170 : * Try to match IMM=-1 saturation signed add with assign.
4171 : * <bb 2> [local count: 1073741824]:
4172 : * x.0_1 = (unsigned char) x_5(D);
4173 : * _3 = -x.0_1;
4174 : * _10 = (signed char) _3;
4175 : * _8 = x_5(D) & _10;
4176 : * if (_8 < 0)
4177 : * goto <bb 4>; [1.40%]
4178 : * else
4179 : * goto <bb 3>; [98.60%]
4180 : * <bb 3> [local count: 434070867]:
4181 : * _2 = x.0_1 + 255;
4182 : * <bb 4> [local count: 1073741824]:
4183 : * # _9 = PHI <_2(3), 128(2)>
4184 : * _4 = (int8_t) _9;
4185 : * =>
4186 : * _4 = .SAT_ADD (x_5, -1); */
4187 :
4188 : static void
4189 4832368 : match_saturation_add_with_assign (gimple_stmt_iterator *gsi, gassign *stmt)
4190 : {
4191 4832368 : tree ops[2];
4192 4832368 : tree lhs = gimple_assign_lhs (stmt);
4193 :
4194 4832368 : if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
4195 4832368 : || gimple_signed_integer_sat_add (lhs, ops, NULL))
4196 34 : build_saturation_binary_arith_call_and_replace (gsi, IFN_SAT_ADD, lhs,
4197 : ops[0], ops[1]);
4198 4832368 : }
4199 :
4200 : /*
4201 : * Try to match saturation add with PHI.
4202 : * For unsigned integer:
4203 : * <bb 2> :
4204 : * _1 = x_3(D) + y_4(D);
4205 : * if (_1 >= x_3(D))
4206 : * goto <bb 3>; [INV]
4207 : * else
4208 : * goto <bb 4>; [INV]
4209 : *
4210 : * <bb 3> :
4211 : *
4212 : * <bb 4> :
4213 : * # _2 = PHI <255(2), _1(3)>
4214 : * =>
4215 : * <bb 4> [local count: 1073741824]:
4216 : * _2 = .SAT_ADD (x_4(D), y_5(D));
4217 : *
4218 : * For signed integer:
4219 : * x.0_1 = (long unsigned int) x_7(D);
4220 : * y.1_2 = (long unsigned int) y_8(D);
4221 : * _3 = x.0_1 + y.1_2;
4222 : * sum_9 = (int64_t) _3;
4223 : * _4 = x_7(D) ^ y_8(D);
4224 : * _5 = x_7(D) ^ sum_9;
4225 : * _15 = ~_4;
4226 : * _16 = _5 & _15;
4227 : * if (_16 < 0)
4228 : * goto <bb 3>; [41.00%]
4229 : * else
4230 : * goto <bb 4>; [59.00%]
4231 : * _11 = x_7(D) < 0;
4232 : * _12 = (long int) _11;
4233 : * _13 = -_12;
4234 : * _14 = _13 ^ 9223372036854775807;
4235 : * # _6 = PHI <_14(3), sum_9(2)>
4236 : * =>
4237 : * _6 = .SAT_ADD (x_5(D), y_6(D)); [tail call] */
4238 :
4239 : static bool
4240 4136237 : match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
4241 : {
4242 4136237 : if (gimple_phi_num_args (phi) != 2)
4243 : return false;
4244 :
4245 3299038 : tree ops[2];
4246 3299038 : tree phi_result = gimple_phi_result (phi);
4247 :
4248 3299038 : if (!gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
4249 3299038 : && !gimple_signed_integer_sat_add (phi_result, ops, NULL))
4250 : return false;
4251 :
4252 21 : if (!TYPE_UNSIGNED (TREE_TYPE (ops[0])) && TREE_CODE (ops[1]) == INTEGER_CST)
4253 0 : ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4254 :
4255 21 : return build_saturation_binary_arith_call_and_insert (gsi, IFN_SAT_ADD,
4256 : phi_result, ops[0],
4257 21 : ops[1]);
4258 : }
4259 :
4260 : /*
4261 : * Try to match saturation unsigned sub.
4262 : * _1 = _4 >= _5;
4263 : * _3 = _4 - _5;
4264 : * _6 = _1 ? _3 : 0;
4265 : * =>
4266 : * _6 = .SAT_SUB (_4, _5); */
4267 :
4268 : static void
4269 3304207 : match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
4270 : {
4271 3304207 : tree ops[2];
4272 3304207 : tree lhs = gimple_assign_lhs (stmt);
4273 :
4274 3304207 : if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL))
4275 125 : build_saturation_binary_arith_call_and_replace (gsi, IFN_SAT_SUB, lhs,
4276 : ops[0], ops[1]);
4277 3304207 : }
4278 :
4279 : /*
4280 : * Try to match saturation unsigned mul.
4281 : * _1 = (unsigned int) a_6(D);
4282 : * _2 = (unsigned int) b_7(D);
4283 : * x_8 = _1 * _2;
4284 : * overflow_9 = x_8 > 255;
4285 : * _3 = (unsigned char) overflow_9;
4286 : * _4 = -_3;
4287 : * _5 = (unsigned char) x_8;
4288 : * _10 = _4 | _5;
4289 : * =>
4290 : * _10 = .SAT_SUB (a_6, b_7); */
4291 :
4292 : static void
4293 2567114 : match_unsigned_saturation_mul (gimple_stmt_iterator *gsi, gassign *stmt)
4294 : {
4295 2567114 : tree ops[2];
4296 2567114 : tree lhs = gimple_assign_lhs (stmt);
4297 :
4298 2567114 : if (gimple_unsigned_integer_sat_mul (lhs, ops, NULL))
4299 0 : build_saturation_binary_arith_call_and_replace (gsi, IFN_SAT_MUL, lhs,
4300 : ops[0], ops[1]);
4301 2567114 : }
4302 :
4303 : /* Try to match saturation unsigned mul, aka:
4304 : _6 = .MUL_OVERFLOW (a_4(D), b_5(D));
4305 : _2 = IMAGPART_EXPR <_6>;
4306 : if (_2 != 0)
4307 : goto <bb 4>; [35.00%]
4308 : else
4309 : goto <bb 3>; [65.00%]
4310 :
4311 : <bb 3> [local count: 697932184]:
4312 : _1 = REALPART_EXPR <_6>;
4313 :
4314 : <bb 4> [local count: 1073741824]:
4315 : # _3 = PHI <18446744073709551615(2), _1(3)>
4316 : =>
4317 : _3 = .SAT_MUL (a_4(D), b_5(D)); */
4318 :
4319 : static bool
4320 4136194 : match_saturation_mul (gimple_stmt_iterator *gsi, gphi *phi)
4321 : {
4322 4136194 : if (gimple_phi_num_args (phi) != 2)
4323 : return false;
4324 :
4325 3298995 : tree ops[2];
4326 3298995 : tree phi_result = gimple_phi_result (phi);
4327 :
4328 3298995 : if (!gimple_unsigned_integer_sat_mul (phi_result, ops, NULL))
4329 : return false;
4330 :
4331 0 : return build_saturation_binary_arith_call_and_insert (gsi, IFN_SAT_MUL,
4332 : phi_result, ops[0],
4333 0 : ops[1]);
4334 : }
4335 :
4336 : /*
4337 : * Try to match saturation unsigned sub.
4338 : * <bb 2> [local count: 1073741824]:
4339 : * if (x_2(D) > y_3(D))
4340 : * goto <bb 3>; [50.00%]
4341 : * else
4342 : * goto <bb 4>; [50.00%]
4343 : *
4344 : * <bb 3> [local count: 536870912]:
4345 : * _4 = x_2(D) - y_3(D);
4346 : *
4347 : * <bb 4> [local count: 1073741824]:
4348 : * # _1 = PHI <0(2), _4(3)>
4349 : * =>
4350 : * <bb 4> [local count: 1073741824]:
4351 : * _1 = .SAT_SUB (x_2(D), y_3(D)); */
4352 : static bool
4353 4136220 : match_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
4354 : {
4355 4136220 : if (gimple_phi_num_args (phi) != 2)
4356 : return false;
4357 :
4358 3299021 : tree ops[2];
4359 3299021 : tree phi_result = gimple_phi_result (phi);
4360 :
4361 3299021 : if (!gimple_unsigned_integer_sat_sub (phi_result, ops, NULL)
4362 3299021 : && !gimple_signed_integer_sat_sub (phi_result, ops, NULL))
4363 : return false;
4364 :
4365 30 : return build_saturation_binary_arith_call_and_insert (gsi, IFN_SAT_SUB,
4366 : phi_result, ops[0],
4367 30 : ops[1]);
4368 : }
4369 :
4370 : /*
4371 : * Try to match saturation unsigned sub.
4372 : * uint16_t x_4(D);
4373 : * uint8_t _6;
4374 : * overflow_5 = x_4(D) > 255;
4375 : * _1 = (unsigned char) x_4(D);
4376 : * _2 = (unsigned char) overflow_5;
4377 : * _3 = -_2;
4378 : * _6 = _1 | _3;
4379 : * =>
4380 : * _6 = .SAT_TRUNC (x_4(D));
4381 : * */
4382 : static void
4383 2567114 : match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
4384 : {
4385 2567114 : tree ops[1];
4386 2567114 : tree lhs = gimple_assign_lhs (stmt);
4387 2567114 : tree type = TREE_TYPE (lhs);
4388 :
4389 2567114 : if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
4390 2567214 : && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
4391 100 : tree_pair (type, TREE_TYPE (ops[0])),
4392 : OPTIMIZE_FOR_BOTH))
4393 : {
4394 73 : gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
4395 73 : gimple_call_set_lhs (call, lhs);
4396 73 : gsi_replace (gsi, call, /* update_eh_info */ true);
4397 : }
4398 2567114 : }
4399 :
4400 : /*
4401 : * Try to match saturation truncate.
4402 : * Aka:
4403 : * x.0_1 = (unsigned long) x_4(D);
4404 : * _2 = x.0_1 + 2147483648;
4405 : * if (_2 > 4294967295)
4406 : * goto <bb 4>; [50.00%]
4407 : * else
4408 : * goto <bb 3>; [50.00%]
4409 : * ;; succ: 4
4410 : * ;; 3
4411 : *
4412 : * ;; basic block 3, loop depth 0
4413 : * ;; pred: 2
4414 : * trunc_5 = (int32_t) x_4(D);
4415 : * goto <bb 5>; [100.00%]
4416 : * ;; succ: 5
4417 : *
4418 : * ;; basic block 4, loop depth 0
4419 : * ;; pred: 2
4420 : * _7 = x_4(D) < 0;
4421 : * _8 = (int) _7;
4422 : * _9 = -_8;
4423 : * _10 = _9 ^ 2147483647;
4424 : * ;; succ: 5
4425 : *
4426 : * ;; basic block 5, loop depth 0
4427 : * ;; pred: 3
4428 : * ;; 4
4429 : * # _3 = PHI <trunc_5(3), _10(4)>
4430 : * =>
4431 : * _6 = .SAT_TRUNC (x_4(D));
4432 : */
4433 :
4434 : static bool
4435 4136194 : match_saturation_trunc (gimple_stmt_iterator *gsi, gphi *phi)
4436 : {
4437 4136194 : if (gimple_phi_num_args (phi) != 2)
4438 : return false;
4439 :
4440 3298995 : tree ops[1];
4441 3298995 : tree phi_result = gimple_phi_result (phi);
4442 3298995 : tree type = TREE_TYPE (phi_result);
4443 :
4444 3298995 : if (!gimple_unsigned_integer_sat_trunc (phi_result, ops, NULL)
4445 3298995 : && !gimple_signed_integer_sat_trunc (phi_result, ops, NULL))
4446 : return false;
4447 :
4448 0 : if (!direct_internal_fn_supported_p (IFN_SAT_TRUNC,
4449 0 : tree_pair (type, TREE_TYPE (ops[0])),
4450 : OPTIMIZE_FOR_BOTH))
4451 : return false;
4452 :
4453 0 : gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
4454 0 : gimple_call_set_lhs (call, phi_result);
4455 0 : gsi_insert_before (gsi, call, GSI_SAME_STMT);
4456 :
4457 0 : return true;
4458 : }
4459 :
4460 : /* Recognize for unsigned x
4461 : x = y - z;
4462 : if (x > y)
4463 : where there are other uses of x and replace it with
4464 : _7 = .SUB_OVERFLOW (y, z);
4465 : x = REALPART_EXPR <_7>;
4466 : _8 = IMAGPART_EXPR <_7>;
4467 : if (_8)
4468 : and similarly for addition.
4469 :
4470 : Also recognize:
4471 : yc = (type) y;
4472 : zc = (type) z;
4473 : x = yc + zc;
4474 : if (x > max)
4475 : where y and z have unsigned types with maximum max
4476 : and there are other uses of x and all of those cast x
4477 : back to that unsigned type and again replace it with
4478 : _7 = .ADD_OVERFLOW (y, z);
4479 : _9 = REALPART_EXPR <_7>;
4480 : _8 = IMAGPART_EXPR <_7>;
4481 : if (_8)
4482 : and replace (utype) x with _9.
4483 : Or with x >> popcount (max) instead of x > max.
4484 :
4485 : Also recognize:
4486 : x = ~z;
4487 : if (y > x)
4488 : and replace it with
4489 : _7 = .ADD_OVERFLOW (y, z);
4490 : _8 = IMAGPART_EXPR <_7>;
4491 : if (_8)
4492 :
4493 : And also recognize:
4494 : z = x * y;
4495 : if (x != 0)
4496 : goto <bb 3>; [50.00%]
4497 : else
4498 : goto <bb 4>; [50.00%]
4499 :
4500 : <bb 3> [local count: 536870913]:
4501 : _2 = z / x;
4502 : _9 = _2 != y;
4503 : _10 = (int) _9;
4504 :
4505 : <bb 4> [local count: 1073741824]:
4506 : # iftmp.0_3 = PHI <_10(3), 0(2)>
4507 : and replace it with
4508 : _7 = .MUL_OVERFLOW (x, y);
4509 : z = IMAGPART_EXPR <_7>;
4510 : _8 = IMAGPART_EXPR <_7>;
4511 : _9 = _8 != 0;
4512 : iftmp.0_3 = (int) _9; */
4513 :
4514 : static bool
4515 3303487 : match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
4516 : enum tree_code code, bool *cfg_changed)
4517 : {
4518 3303487 : tree lhs = gimple_assign_lhs (stmt);
4519 3303487 : tree type = TREE_TYPE (lhs);
4520 3303487 : use_operand_p use_p;
4521 3303487 : imm_use_iterator iter;
4522 3303487 : bool use_seen = false;
4523 3303487 : bool ovf_use_seen = false;
4524 3303487 : gimple *use_stmt;
4525 3303487 : gimple *add_stmt = NULL;
4526 3303487 : bool add_first = false;
4527 3303487 : gimple *cond_stmt = NULL;
4528 3303487 : gimple *cast_stmt = NULL;
4529 3303487 : tree cast_lhs = NULL_TREE;
4530 :
4531 3303487 : gcc_checking_assert (code == PLUS_EXPR
4532 : || code == MINUS_EXPR
4533 : || code == MULT_EXPR
4534 : || code == BIT_NOT_EXPR);
4535 3303487 : if (!INTEGRAL_TYPE_P (type)
4536 2783697 : || !TYPE_UNSIGNED (type)
4537 1945066 : || has_zero_uses (lhs)
4538 3303487 : || (code != PLUS_EXPR
4539 1944727 : && code != MULT_EXPR
4540 170758 : && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
4541 146720 : TYPE_MODE (type)) == CODE_FOR_nothing))
4542 1360719 : return false;
4543 :
4544 1942768 : tree rhs1 = gimple_assign_rhs1 (stmt);
4545 1942768 : tree rhs2 = gimple_assign_rhs2 (stmt);
4546 7344665 : FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4547 : {
4548 3465494 : use_stmt = USE_STMT (use_p);
4549 3465494 : if (is_gimple_debug (use_stmt))
4550 610351 : continue;
4551 :
4552 2855143 : tree other = NULL_TREE;
4553 2855143 : if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
4554 : {
4555 6851 : if (code == BIT_NOT_EXPR)
4556 : {
4557 187 : gcc_assert (other);
4558 187 : if (TREE_CODE (other) != SSA_NAME)
4559 0 : return false;
4560 187 : if (rhs2 == NULL)
4561 187 : rhs2 = other;
4562 : else
4563 : return false;
4564 187 : cond_stmt = use_stmt;
4565 : }
4566 : ovf_use_seen = true;
4567 : }
4568 : else
4569 : {
4570 2848292 : use_seen = true;
4571 2848292 : if (code == MULT_EXPR
4572 2848292 : && cast_stmt == NULL
4573 2848292 : && gimple_assign_cast_p (use_stmt))
4574 : {
4575 32908 : cast_lhs = gimple_assign_lhs (use_stmt);
4576 65816 : if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
4577 32374 : && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
4578 61947 : && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
4579 29039 : == TYPE_PRECISION (TREE_TYPE (lhs))))
4580 : cast_stmt = use_stmt;
4581 : else
4582 : cast_lhs = NULL_TREE;
4583 : }
4584 : }
4585 2855143 : if (ovf_use_seen && use_seen)
4586 : break;
4587 0 : }
4588 :
4589 1942768 : if (!ovf_use_seen
4590 1942768 : && code == MULT_EXPR
4591 449809 : && cast_stmt)
4592 : {
4593 28651 : if (TREE_CODE (rhs1) != SSA_NAME
4594 28651 : || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
4595 : return false;
4596 94165 : FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
4597 : {
4598 36863 : use_stmt = USE_STMT (use_p);
4599 36863 : if (is_gimple_debug (use_stmt))
4600 1386 : continue;
4601 :
4602 35477 : if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4603 : NULL_TREE, NULL))
4604 36863 : ovf_use_seen = true;
4605 28651 : }
4606 28651 : }
4607 : else
4608 : {
4609 : cast_stmt = NULL;
4610 : cast_lhs = NULL_TREE;
4611 : }
4612 :
4613 1942768 : tree maxval = NULL_TREE;
4614 1942768 : if (!ovf_use_seen
4615 13704 : || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
4616 6483 : || (code == PLUS_EXPR
4617 6213 : && optab_handler (uaddv4_optab,
4618 6213 : TYPE_MODE (type)) == CODE_FOR_nothing)
4619 1956177 : || (code == MULT_EXPR
4620 221 : && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
4621 148 : TYPE_MODE (type)) == CODE_FOR_nothing
4622 3 : && (use_seen
4623 3 : || cast_stmt
4624 0 : || !can_mult_highpart_p (TYPE_MODE (type), true))))
4625 : {
4626 1936140 : if (code != PLUS_EXPR)
4627 : return false;
4628 1341912 : if (TREE_CODE (rhs1) != SSA_NAME
4629 1341912 : || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
4630 : return false;
4631 325850 : rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4632 325850 : tree type1 = TREE_TYPE (rhs1);
4633 325850 : if (!INTEGRAL_TYPE_P (type1)
4634 175492 : || !TYPE_UNSIGNED (type1)
4635 33069 : || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4636 340045 : || (TYPE_PRECISION (type1)
4637 28390 : != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4638 316119 : return false;
4639 9731 : if (TREE_CODE (rhs2) == INTEGER_CST)
4640 : {
4641 3977 : if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4642 3977 : TYPE_PRECISION (type1),
4643 7954 : UNSIGNED), 0))
4644 : return false;
4645 1455 : rhs2 = fold_convert (type1, rhs2);
4646 : }
4647 : else
4648 : {
4649 5754 : if (TREE_CODE (rhs2) != SSA_NAME
4650 5754 : || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4651 : return false;
4652 2369 : rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4653 2369 : tree type2 = TREE_TYPE (rhs2);
4654 2369 : if (!INTEGRAL_TYPE_P (type2)
4655 1160 : || !TYPE_UNSIGNED (type2)
4656 383 : || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4657 2724 : || (TYPE_PRECISION (type2)
4658 710 : != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4659 2027 : return false;
4660 : }
4661 1797 : if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4662 : type = type1;
4663 : else
4664 5 : type = TREE_TYPE (rhs2);
4665 :
4666 1797 : if (TREE_CODE (type) != INTEGER_TYPE
4667 3594 : || optab_handler (uaddv4_optab,
4668 1797 : TYPE_MODE (type)) == CODE_FOR_nothing)
4669 0 : return false;
4670 :
4671 1797 : maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4672 : UNSIGNED));
4673 1797 : ovf_use_seen = false;
4674 1797 : use_seen = false;
4675 1797 : basic_block use_bb = NULL;
4676 3767 : FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4677 : {
4678 1910 : use_stmt = USE_STMT (use_p);
4679 1910 : if (is_gimple_debug (use_stmt))
4680 109 : continue;
4681 :
4682 1801 : if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4683 : {
4684 12 : ovf_use_seen = true;
4685 12 : use_bb = gimple_bb (use_stmt);
4686 : }
4687 : else
4688 : {
4689 1789 : if (!gimple_assign_cast_p (use_stmt)
4690 1789 : || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4691 : return false;
4692 113 : tree use_lhs = gimple_assign_lhs (use_stmt);
4693 226 : if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4694 226 : || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4695 113 : > TYPE_PRECISION (type)))
4696 : return false;
4697 : use_seen = true;
4698 : }
4699 1737 : }
4700 60 : if (!ovf_use_seen)
4701 : return false;
4702 12 : if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4703 : {
4704 2 : if (!use_seen)
4705 : return false;
4706 2 : tree new_rhs1 = make_ssa_name (type);
4707 2 : gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4708 2 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4709 2 : rhs1 = new_rhs1;
4710 : }
4711 10 : else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4712 : {
4713 2 : if (!use_seen)
4714 : return false;
4715 2 : tree new_rhs2 = make_ssa_name (type);
4716 2 : gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4717 2 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4718 2 : rhs2 = new_rhs2;
4719 : }
4720 8 : else if (!use_seen)
4721 : {
4722 : /* If there are no uses of the wider addition, check if
4723 : forwprop has not created a narrower addition.
4724 : Require it to be in the same bb as the overflow check. */
4725 18 : FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4726 : {
4727 10 : use_stmt = USE_STMT (use_p);
4728 10 : if (is_gimple_debug (use_stmt))
4729 0 : continue;
4730 :
4731 10 : if (use_stmt == stmt)
4732 0 : continue;
4733 :
4734 10 : if (!is_gimple_assign (use_stmt)
4735 10 : || gimple_bb (use_stmt) != use_bb
4736 20 : || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4737 2 : continue;
4738 :
4739 8 : if (gimple_assign_rhs1 (use_stmt) == rhs1)
4740 : {
4741 8 : if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4742 : rhs2, 0))
4743 0 : continue;
4744 : }
4745 0 : else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4746 : {
4747 0 : if (gimple_assign_rhs1 (use_stmt) != rhs2)
4748 0 : continue;
4749 : }
4750 : else
4751 0 : continue;
4752 :
4753 8 : add_stmt = use_stmt;
4754 8 : break;
4755 8 : }
4756 8 : if (add_stmt == NULL)
4757 : return false;
4758 :
4759 : /* If stmt and add_stmt are in the same bb, we need to find out
4760 : which one is earlier. If they are in different bbs, we've
4761 : checked add_stmt is in the same bb as one of the uses of the
4762 : stmt lhs, so stmt needs to dominate add_stmt too. */
4763 8 : if (gimple_bb (stmt) == gimple_bb (add_stmt))
4764 : {
4765 8 : gimple_stmt_iterator gsif = *gsi;
4766 8 : gimple_stmt_iterator gsib = *gsi;
4767 8 : int i;
4768 : /* Search both forward and backward from stmt and have a small
4769 : upper bound. */
4770 20 : for (i = 0; i < 128; i++)
4771 : {
4772 20 : if (!gsi_end_p (gsib))
4773 : {
4774 18 : gsi_prev_nondebug (&gsib);
4775 18 : if (gsi_stmt (gsib) == add_stmt)
4776 : {
4777 : add_first = true;
4778 : break;
4779 : }
4780 : }
4781 2 : else if (gsi_end_p (gsif))
4782 : break;
4783 18 : if (!gsi_end_p (gsif))
4784 : {
4785 18 : gsi_next_nondebug (&gsif);
4786 18 : if (gsi_stmt (gsif) == add_stmt)
4787 : break;
4788 : }
4789 : }
4790 8 : if (i == 128)
4791 0 : return false;
4792 8 : if (add_first)
4793 2 : *gsi = gsi_for_stmt (add_stmt);
4794 : }
4795 : }
4796 : }
4797 :
4798 6640 : if (code == BIT_NOT_EXPR)
4799 170 : *gsi = gsi_for_stmt (cond_stmt);
4800 :
4801 6640 : auto_vec<gimple *, 8> mul_stmts;
4802 6640 : if (code == MULT_EXPR && cast_stmt)
4803 : {
4804 75 : type = TREE_TYPE (cast_lhs);
4805 75 : gimple *g = SSA_NAME_DEF_STMT (rhs1);
4806 75 : if (gimple_assign_cast_p (g)
4807 38 : && useless_type_conversion_p (type,
4808 38 : TREE_TYPE (gimple_assign_rhs1 (g)))
4809 113 : && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4810 : rhs1 = gimple_assign_rhs1 (g);
4811 : else
4812 : {
4813 37 : g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4814 37 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4815 37 : rhs1 = gimple_assign_lhs (g);
4816 37 : mul_stmts.quick_push (g);
4817 : }
4818 75 : if (TREE_CODE (rhs2) == INTEGER_CST)
4819 32 : rhs2 = fold_convert (type, rhs2);
4820 : else
4821 : {
4822 43 : g = SSA_NAME_DEF_STMT (rhs2);
4823 43 : if (gimple_assign_cast_p (g)
4824 22 : && useless_type_conversion_p (type,
4825 22 : TREE_TYPE (gimple_assign_rhs1 (g)))
4826 65 : && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4827 : rhs2 = gimple_assign_rhs1 (g);
4828 : else
4829 : {
4830 21 : g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4831 21 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4832 21 : rhs2 = gimple_assign_lhs (g);
4833 21 : mul_stmts.quick_push (g);
4834 : }
4835 : }
4836 : }
4837 6640 : tree ctype = build_complex_type (type);
4838 13135 : gcall *g = gimple_build_call_internal (code == MULT_EXPR
4839 : ? IFN_MUL_OVERFLOW
4840 : : code != MINUS_EXPR
4841 6495 : ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4842 : 2, rhs1, rhs2);
4843 6640 : tree ctmp = make_ssa_name (ctype);
4844 6640 : gimple_call_set_lhs (g, ctmp);
4845 6640 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4846 6640 : tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4847 6640 : gassign *g2;
4848 6640 : if (code != BIT_NOT_EXPR)
4849 : {
4850 6470 : g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4851 : build1 (REALPART_EXPR, type, ctmp));
4852 6470 : if (maxval || cast_stmt)
4853 : {
4854 87 : gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4855 87 : if (add_first)
4856 2 : *gsi = gsi_for_stmt (stmt);
4857 : }
4858 : else
4859 6383 : gsi_replace (gsi, g2, true);
4860 6470 : if (code == MULT_EXPR)
4861 : {
4862 145 : mul_stmts.quick_push (g);
4863 145 : mul_stmts.quick_push (g2);
4864 145 : if (cast_stmt)
4865 : {
4866 75 : g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4867 75 : gsi_replace (gsi, g2, true);
4868 75 : mul_stmts.quick_push (g2);
4869 : }
4870 : }
4871 : }
4872 6640 : tree ovf = make_ssa_name (type);
4873 6640 : g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4874 : build1 (IMAGPART_EXPR, type, ctmp));
4875 6640 : if (code != BIT_NOT_EXPR)
4876 6470 : gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4877 : else
4878 170 : gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4879 6640 : if (code == MULT_EXPR)
4880 145 : mul_stmts.quick_push (g2);
4881 :
4882 35861 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4883 : {
4884 22656 : if (is_gimple_debug (use_stmt))
4885 5551 : continue;
4886 :
4887 17105 : gimple *orig_use_stmt = use_stmt;
4888 17105 : int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4889 : maxval, NULL);
4890 17105 : if (ovf_use == 0)
4891 : {
4892 10431 : gcc_assert (code != BIT_NOT_EXPR);
4893 10431 : if (maxval)
4894 : {
4895 4 : tree use_lhs = gimple_assign_lhs (use_stmt);
4896 4 : gimple_assign_set_rhs1 (use_stmt, new_lhs);
4897 4 : if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4898 4 : TREE_TYPE (new_lhs)))
4899 4 : gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4900 4 : update_stmt (use_stmt);
4901 : }
4902 10431 : continue;
4903 10431 : }
4904 6674 : if (gimple_code (use_stmt) == GIMPLE_COND)
4905 : {
4906 4435 : gcond *cond_stmt = as_a <gcond *> (use_stmt);
4907 4435 : gimple_cond_set_lhs (cond_stmt, ovf);
4908 4435 : gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4909 4585 : gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4910 : }
4911 : else
4912 : {
4913 2239 : gcc_checking_assert (is_gimple_assign (use_stmt));
4914 2239 : if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4915 : {
4916 2239 : if (gimple_assign_rhs_code (use_stmt) == RSHIFT_EXPR)
4917 : {
4918 6 : g2 = gimple_build_assign (make_ssa_name (boolean_type_node),
4919 : ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4920 : ovf, build_int_cst (type, 0));
4921 6 : gimple_stmt_iterator gsiu = gsi_for_stmt (use_stmt);
4922 6 : gsi_insert_before (&gsiu, g2, GSI_SAME_STMT);
4923 6 : gimple_assign_set_rhs_with_ops (&gsiu, NOP_EXPR,
4924 : gimple_assign_lhs (g2));
4925 6 : update_stmt (use_stmt);
4926 6 : use_operand_p use;
4927 6 : single_imm_use (gimple_assign_lhs (use_stmt), &use,
4928 : &use_stmt);
4929 6 : if (gimple_code (use_stmt) == GIMPLE_COND)
4930 : {
4931 0 : gcond *cond_stmt = as_a <gcond *> (use_stmt);
4932 0 : gimple_cond_set_lhs (cond_stmt, ovf);
4933 0 : gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4934 : }
4935 : else
4936 : {
4937 6 : gcc_checking_assert (is_gimple_assign (use_stmt));
4938 6 : if (gimple_assign_rhs_class (use_stmt)
4939 : == GIMPLE_BINARY_RHS)
4940 : {
4941 0 : gimple_assign_set_rhs1 (use_stmt, ovf);
4942 0 : gimple_assign_set_rhs2 (use_stmt,
4943 : build_int_cst (type, 0));
4944 : }
4945 6 : else if (gimple_assign_cast_p (use_stmt))
4946 6 : gimple_assign_set_rhs1 (use_stmt, ovf);
4947 : else
4948 : {
4949 0 : tree_code sc = gimple_assign_rhs_code (use_stmt);
4950 0 : gcc_checking_assert (sc == COND_EXPR);
4951 0 : tree cond = gimple_assign_rhs1 (use_stmt);
4952 0 : cond = build2 (TREE_CODE (cond),
4953 : boolean_type_node, ovf,
4954 : build_int_cst (type, 0));
4955 0 : gimple_assign_set_rhs1 (use_stmt, cond);
4956 : }
4957 : }
4958 6 : update_stmt (use_stmt);
4959 6 : gsi_remove (&gsiu, true);
4960 6 : gsiu = gsi_for_stmt (g2);
4961 6 : gsi_remove (&gsiu, true);
4962 6 : continue;
4963 6 : }
4964 : else
4965 : {
4966 2233 : gimple_assign_set_rhs1 (use_stmt, ovf);
4967 2233 : gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4968 2382 : gimple_assign_set_rhs_code (use_stmt,
4969 : ovf_use == 1
4970 : ? NE_EXPR : EQ_EXPR);
4971 : }
4972 : }
4973 : else
4974 : {
4975 0 : gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4976 : == COND_EXPR);
4977 0 : tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4978 : boolean_type_node, ovf,
4979 : build_int_cst (type, 0));
4980 0 : gimple_assign_set_rhs1 (use_stmt, cond);
4981 : }
4982 : }
4983 6668 : update_stmt (use_stmt);
4984 6668 : if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4985 : {
4986 145 : gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4987 145 : maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4988 : cfg_changed);
4989 145 : use_operand_p use;
4990 145 : gimple *cast_stmt;
4991 145 : if (single_imm_use (gimple_assign_lhs (orig_use_stmt), &use,
4992 : &cast_stmt)
4993 145 : && gimple_assign_cast_p (cast_stmt))
4994 : {
4995 2 : gimple_stmt_iterator gsi3 = gsi_for_stmt (cast_stmt);
4996 2 : gsi_remove (&gsi3, true);
4997 2 : release_ssa_name (gimple_assign_lhs (cast_stmt));
4998 : }
4999 145 : gsi_remove (&gsi2, true);
5000 145 : release_ssa_name (gimple_assign_lhs (orig_use_stmt));
5001 : }
5002 6640 : }
5003 6640 : if (maxval)
5004 : {
5005 12 : gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
5006 12 : gsi_remove (&gsi2, true);
5007 12 : if (add_stmt)
5008 : {
5009 8 : gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
5010 : new_lhs);
5011 8 : gsi2 = gsi_for_stmt (add_stmt);
5012 8 : gsi_replace (&gsi2, g, true);
5013 : }
5014 : }
5015 6628 : else if (code == BIT_NOT_EXPR)
5016 : {
5017 170 : *gsi = gsi_for_stmt (stmt);
5018 170 : gsi_remove (gsi, true);
5019 170 : release_ssa_name (lhs);
5020 170 : return true;
5021 : }
5022 : return false;
5023 6640 : }
5024 :
5025 : /* Helper of match_uaddc_usubc. Look through an integral cast
5026 : which should preserve [0, 1] range value (unless source has
5027 : 1-bit signed type) and the cast has single use. */
5028 :
5029 : static gimple *
5030 2051936 : uaddc_cast (gimple *g)
5031 : {
5032 2051936 : if (!gimple_assign_cast_p (g))
5033 : return g;
5034 493707 : tree op = gimple_assign_rhs1 (g);
5035 493707 : if (TREE_CODE (op) == SSA_NAME
5036 418360 : && INTEGRAL_TYPE_P (TREE_TYPE (op))
5037 290508 : && (TYPE_PRECISION (TREE_TYPE (op)) > 1
5038 5121 : || TYPE_UNSIGNED (TREE_TYPE (op)))
5039 784215 : && has_single_use (gimple_assign_lhs (g)))
5040 176953 : return SSA_NAME_DEF_STMT (op);
5041 : return g;
5042 : }
5043 :
5044 : /* Helper of match_uaddc_usubc. Look through a NE_EXPR
5045 : comparison with 0 which also preserves [0, 1] value range. */
5046 :
5047 : static gimple *
5048 2052095 : uaddc_ne0 (gimple *g)
5049 : {
5050 2052095 : if (is_gimple_assign (g)
5051 1262051 : && gimple_assign_rhs_code (g) == NE_EXPR
5052 53687 : && integer_zerop (gimple_assign_rhs2 (g))
5053 5366 : && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
5054 2057461 : && has_single_use (gimple_assign_lhs (g)))
5055 5108 : return SSA_NAME_DEF_STMT (gimple_assign_rhs1 (g));
5056 : return g;
5057 : }
5058 :
5059 : /* Return true if G is {REAL,IMAG}PART_EXPR PART with SSA_NAME
5060 : operand. */
5061 :
5062 : static bool
5063 2052932 : uaddc_is_cplxpart (gimple *g, tree_code part)
5064 : {
5065 2052932 : return (is_gimple_assign (g)
5066 1261496 : && gimple_assign_rhs_code (g) == part
5067 2055259 : && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (g), 0)) == SSA_NAME);
5068 : }
5069 :
5070 : /* Try to match e.g.
5071 : _29 = .ADD_OVERFLOW (_3, _4);
5072 : _30 = REALPART_EXPR <_29>;
5073 : _31 = IMAGPART_EXPR <_29>;
5074 : _32 = .ADD_OVERFLOW (_30, _38);
5075 : _33 = REALPART_EXPR <_32>;
5076 : _34 = IMAGPART_EXPR <_32>;
5077 : _35 = _31 + _34;
5078 : as
5079 : _36 = .UADDC (_3, _4, _38);
5080 : _33 = REALPART_EXPR <_36>;
5081 : _35 = IMAGPART_EXPR <_36>;
5082 : or
5083 : _22 = .SUB_OVERFLOW (_6, _5);
5084 : _23 = REALPART_EXPR <_22>;
5085 : _24 = IMAGPART_EXPR <_22>;
5086 : _25 = .SUB_OVERFLOW (_23, _37);
5087 : _26 = REALPART_EXPR <_25>;
5088 : _27 = IMAGPART_EXPR <_25>;
5089 : _28 = _24 | _27;
5090 : as
5091 : _29 = .USUBC (_6, _5, _37);
5092 : _26 = REALPART_EXPR <_29>;
5093 : _288 = IMAGPART_EXPR <_29>;
5094 : provided _38 or _37 above have [0, 1] range
5095 : and _3, _4 and _30 or _6, _5 and _23 are unsigned
5096 : integral types with the same precision. Whether + or | or ^ is
5097 : used on the IMAGPART_EXPR results doesn't matter, with one of
5098 : added or subtracted operands in [0, 1] range at most one
5099 : .ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow. */
5100 :
5101 : static bool
5102 2767791 : match_uaddc_usubc (gimple_stmt_iterator *gsi, gimple *stmt, tree_code code)
5103 : {
5104 2767791 : tree rhs[4];
5105 2767791 : rhs[0] = gimple_assign_rhs1 (stmt);
5106 2767791 : rhs[1] = gimple_assign_rhs2 (stmt);
5107 2767791 : rhs[2] = NULL_TREE;
5108 2767791 : rhs[3] = NULL_TREE;
5109 2767791 : tree type = TREE_TYPE (rhs[0]);
5110 2767791 : if (!INTEGRAL_TYPE_P (type) || !TYPE_UNSIGNED (type))
5111 : return false;
5112 :
5113 1629456 : auto_vec<gimple *, 2> temp_stmts;
5114 1629456 : if (code != BIT_IOR_EXPR && code != BIT_XOR_EXPR)
5115 : {
5116 : /* If overflow flag is ignored on the MSB limb, we can end up with
5117 : the most significant limb handled as r = op1 + op2 + ovf1 + ovf2;
5118 : or r = op1 - op2 - ovf1 - ovf2; or various equivalent expressions
5119 : thereof. Handle those like the ovf = ovf1 + ovf2; case to recognize
5120 : the limb below the MSB, but also create another .UADDC/.USUBC call
5121 : for the last limb.
5122 :
5123 : First look through assignments with the same rhs code as CODE,
5124 : with the exception that subtraction of a constant is canonicalized
5125 : into addition of its negation. rhs[0] will be minuend for
5126 : subtractions and one of addends for addition, all other assigned
5127 : rhs[i] operands will be subtrahends or other addends. */
5128 1510364 : while (TREE_CODE (rhs[0]) == SSA_NAME && !rhs[3])
5129 : {
5130 1482755 : gimple *g = SSA_NAME_DEF_STMT (rhs[0]);
5131 1482755 : if (has_single_use (rhs[0])
5132 499538 : && is_gimple_assign (g)
5133 1923330 : && (gimple_assign_rhs_code (g) == code
5134 407966 : || (code == MINUS_EXPR
5135 52904 : && gimple_assign_rhs_code (g) == PLUS_EXPR
5136 16786 : && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST)))
5137 : {
5138 45762 : tree r2 = gimple_assign_rhs2 (g);
5139 45762 : if (gimple_assign_rhs_code (g) != code)
5140 : {
5141 13153 : r2 = const_unop (NEGATE_EXPR, TREE_TYPE (r2), r2);
5142 13153 : if (!r2)
5143 : break;
5144 : }
5145 45762 : rhs[0] = gimple_assign_rhs1 (g);
5146 45762 : tree &r = rhs[2] ? rhs[3] : rhs[2];
5147 45762 : r = r2;
5148 45762 : temp_stmts.quick_push (g);
5149 : }
5150 : else
5151 : break;
5152 : }
5153 4393806 : for (int i = 1; i <= 2; ++i)
5154 2970676 : while (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME && !rhs[3])
5155 : {
5156 525375 : gimple *g = SSA_NAME_DEF_STMT (rhs[i]);
5157 525375 : if (has_single_use (rhs[i])
5158 264947 : && is_gimple_assign (g)
5159 773437 : && gimple_assign_rhs_code (g) == PLUS_EXPR)
5160 : {
5161 41472 : rhs[i] = gimple_assign_rhs1 (g);
5162 41472 : if (rhs[2])
5163 8166 : rhs[3] = gimple_assign_rhs2 (g);
5164 : else
5165 33306 : rhs[2] = gimple_assign_rhs2 (g);
5166 41472 : temp_stmts.quick_push (g);
5167 : }
5168 : else
5169 : break;
5170 : }
5171 : /* If there are just 3 addends or one minuend and two subtrahends,
5172 : check for UADDC or USUBC being pattern recognized earlier.
5173 : Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part
5174 : got pattern matched earlier as __imag__ .UADDC (arg1, arg2, arg3)
5175 : etc. */
5176 1464602 : if (rhs[2] && !rhs[3])
5177 : {
5178 300767 : for (int i = (code == MINUS_EXPR ? 1 : 0); i < 3; ++i)
5179 174786 : if (TREE_CODE (rhs[i]) == SSA_NAME)
5180 : {
5181 136488 : gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
5182 136488 : im = uaddc_ne0 (im);
5183 136488 : if (uaddc_is_cplxpart (im, IMAGPART_EXPR))
5184 : {
5185 : /* We found one of the 3 addends or 2 subtrahends to be
5186 : __imag__ of something, verify it is .UADDC/.USUBC. */
5187 217 : tree rhs1 = gimple_assign_rhs1 (im);
5188 217 : gimple *ovf = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs1, 0));
5189 217 : tree ovf_lhs = NULL_TREE;
5190 217 : tree ovf_arg1 = NULL_TREE, ovf_arg2 = NULL_TREE;
5191 237 : if (gimple_call_internal_p (ovf, code == PLUS_EXPR
5192 : ? IFN_ADD_OVERFLOW
5193 : : IFN_SUB_OVERFLOW))
5194 : {
5195 : /* Or verify it is .ADD_OVERFLOW/.SUB_OVERFLOW.
5196 : This is for the case of 2 chained .UADDC/.USUBC,
5197 : where the first one uses 0 carry-in and the second
5198 : one ignores the carry-out.
5199 : So, something like:
5200 : _16 = .ADD_OVERFLOW (_1, _2);
5201 : _17 = REALPART_EXPR <_16>;
5202 : _18 = IMAGPART_EXPR <_16>;
5203 : _15 = _3 + _4;
5204 : _12 = _15 + _18;
5205 : where the first 3 statements come from the lower
5206 : limb addition and the last 2 from the higher limb
5207 : which ignores carry-out. */
5208 199 : ovf_lhs = gimple_call_lhs (ovf);
5209 199 : tree ovf_lhs_type = TREE_TYPE (TREE_TYPE (ovf_lhs));
5210 199 : ovf_arg1 = gimple_call_arg (ovf, 0);
5211 199 : ovf_arg2 = gimple_call_arg (ovf, 1);
5212 : /* In that case we need to punt if the types don't
5213 : mismatch. */
5214 199 : if (!types_compatible_p (type, ovf_lhs_type)
5215 199 : || !types_compatible_p (type, TREE_TYPE (ovf_arg1))
5216 395 : || !types_compatible_p (type,
5217 196 : TREE_TYPE (ovf_arg2)))
5218 : ovf_lhs = NULL_TREE;
5219 : else
5220 : {
5221 485 : for (int i = (code == PLUS_EXPR ? 1 : 0);
5222 485 : i >= 0; --i)
5223 : {
5224 343 : tree r = gimple_call_arg (ovf, i);
5225 343 : if (TREE_CODE (r) != SSA_NAME)
5226 0 : continue;
5227 343 : if (uaddc_is_cplxpart (SSA_NAME_DEF_STMT (r),
5228 : REALPART_EXPR))
5229 : {
5230 : /* Punt if one of the args which isn't
5231 : subtracted isn't __real__; that could
5232 : then prevent better match later.
5233 : Consider:
5234 : _3 = .ADD_OVERFLOW (_1, _2);
5235 : _4 = REALPART_EXPR <_3>;
5236 : _5 = IMAGPART_EXPR <_3>;
5237 : _7 = .ADD_OVERFLOW (_4, _6);
5238 : _8 = REALPART_EXPR <_7>;
5239 : _9 = IMAGPART_EXPR <_7>;
5240 : _12 = _10 + _11;
5241 : _13 = _12 + _9;
5242 : _14 = _13 + _5;
5243 : We want to match this when called on
5244 : the last stmt as a pair of .UADDC calls,
5245 : but without this check we could turn
5246 : that prematurely on _13 = _12 + _9;
5247 : stmt into .UADDC with 0 carry-in just
5248 : on the second .ADD_OVERFLOW call and
5249 : another replacing the _12 and _13
5250 : additions. */
5251 : ovf_lhs = NULL_TREE;
5252 : break;
5253 : }
5254 : }
5255 : }
5256 192 : if (ovf_lhs)
5257 : {
5258 142 : use_operand_p use_p;
5259 142 : imm_use_iterator iter;
5260 142 : tree re_lhs = NULL_TREE;
5261 568 : FOR_EACH_IMM_USE_FAST (use_p, iter, ovf_lhs)
5262 : {
5263 284 : gimple *use_stmt = USE_STMT (use_p);
5264 284 : if (is_gimple_debug (use_stmt))
5265 0 : continue;
5266 284 : if (use_stmt == im)
5267 142 : continue;
5268 142 : if (!uaddc_is_cplxpart (use_stmt,
5269 : REALPART_EXPR))
5270 : {
5271 : ovf_lhs = NULL_TREE;
5272 : break;
5273 : }
5274 142 : re_lhs = gimple_assign_lhs (use_stmt);
5275 142 : }
5276 142 : if (ovf_lhs && re_lhs)
5277 : {
5278 520 : FOR_EACH_IMM_USE_FAST (use_p, iter, re_lhs)
5279 : {
5280 290 : gimple *use_stmt = USE_STMT (use_p);
5281 290 : if (is_gimple_debug (use_stmt))
5282 109 : continue;
5283 181 : internal_fn ifn
5284 181 : = gimple_call_internal_fn (ovf);
5285 : /* Punt if the __real__ of lhs is used
5286 : in the same .*_OVERFLOW call.
5287 : Consider:
5288 : _3 = .ADD_OVERFLOW (_1, _2);
5289 : _4 = REALPART_EXPR <_3>;
5290 : _5 = IMAGPART_EXPR <_3>;
5291 : _7 = .ADD_OVERFLOW (_4, _6);
5292 : _8 = REALPART_EXPR <_7>;
5293 : _9 = IMAGPART_EXPR <_7>;
5294 : _12 = _10 + _11;
5295 : _13 = _12 + _5;
5296 : _14 = _13 + _9;
5297 : We want to match this when called on
5298 : the last stmt as a pair of .UADDC calls,
5299 : but without this check we could turn
5300 : that prematurely on _13 = _12 + _5;
5301 : stmt into .UADDC with 0 carry-in just
5302 : on the first .ADD_OVERFLOW call and
5303 : another replacing the _12 and _13
5304 : additions. */
5305 181 : if (gimple_call_internal_p (use_stmt, ifn))
5306 : {
5307 : ovf_lhs = NULL_TREE;
5308 : break;
5309 : }
5310 142 : }
5311 : }
5312 : }
5313 : }
5314 142 : if ((ovf_lhs
5315 138 : || gimple_call_internal_p (ovf,
5316 : code == PLUS_EXPR
5317 : ? IFN_UADDC : IFN_USUBC))
5318 250 : && (optab_handler (code == PLUS_EXPR
5319 : ? uaddc5_optab : usubc5_optab,
5320 94 : TYPE_MODE (type))
5321 : != CODE_FOR_nothing))
5322 : {
5323 : /* And in that case build another .UADDC/.USUBC
5324 : call for the most significand limb addition.
5325 : Overflow bit is ignored here. */
5326 63 : if (i != 2)
5327 63 : std::swap (rhs[i], rhs[2]);
5328 63 : gimple *g
5329 77 : = gimple_build_call_internal (code == PLUS_EXPR
5330 : ? IFN_UADDC
5331 : : IFN_USUBC,
5332 : 3, rhs[0], rhs[1],
5333 : rhs[2]);
5334 63 : tree nlhs = make_ssa_name (build_complex_type (type));
5335 63 : gimple_call_set_lhs (g, nlhs);
5336 63 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5337 63 : tree ilhs = gimple_assign_lhs (stmt);
5338 63 : g = gimple_build_assign (ilhs, REALPART_EXPR,
5339 : build1 (REALPART_EXPR,
5340 63 : TREE_TYPE (ilhs),
5341 : nlhs));
5342 63 : gsi_replace (gsi, g, true);
5343 : /* And if it is initialized from result of __imag__
5344 : of .{ADD,SUB}_OVERFLOW call, replace that
5345 : call with .U{ADD,SUB}C call with the same arguments,
5346 : just 0 added as third argument. This isn't strictly
5347 : necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5348 : produce the same result, but may result in better
5349 : generated code on some targets where the backend can
5350 : better prepare in how the result will be used. */
5351 63 : if (ovf_lhs)
5352 : {
5353 57 : tree zero = build_zero_cst (type);
5354 57 : g = gimple_build_call_internal (code == PLUS_EXPR
5355 : ? IFN_UADDC
5356 : : IFN_USUBC,
5357 : 3, ovf_arg1,
5358 : ovf_arg2, zero);
5359 57 : gimple_call_set_lhs (g, ovf_lhs);
5360 57 : gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf);
5361 57 : gsi_replace (&gsi2, g, true);
5362 : }
5363 63 : return true;
5364 : }
5365 : }
5366 : }
5367 : return false;
5368 : }
5369 1401580 : if (code == MINUS_EXPR && !rhs[2])
5370 : return false;
5371 264 : if (code == MINUS_EXPR)
5372 : /* Code below expects rhs[0] and rhs[1] to have the IMAGPART_EXPRs.
5373 : So, for MINUS_EXPR swap the single added rhs operand (others are
5374 : subtracted) to rhs[3]. */
5375 264 : std::swap (rhs[0], rhs[3]);
5376 : }
5377 : /* Walk from both operands of STMT (for +/- even sometimes from
5378 : all the 4 addends or 3 subtrahends), see through casts and != 0
5379 : statements which would preserve [0, 1] range of values and
5380 : check which is initialized from __imag__. */
5381 7290171 : gimple *im1 = NULL, *im2 = NULL;
5382 14579288 : for (int i = 0; i < (code == MINUS_EXPR ? 3 : 4); i++)
5383 5832245 : if (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME)
5384 : {
5385 1915356 : gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
5386 1915356 : im = uaddc_ne0 (im);
5387 1915356 : if (uaddc_is_cplxpart (im, IMAGPART_EXPR))
5388 : {
5389 1588 : if (im1 == NULL)
5390 : {
5391 1200 : im1 = im;
5392 1200 : if (i != 0)
5393 323 : std::swap (rhs[0], rhs[i]);
5394 : }
5395 : else
5396 : {
5397 388 : im2 = im;
5398 388 : if (i != 1)
5399 22 : std::swap (rhs[1], rhs[i]);
5400 : break;
5401 : }
5402 : }
5403 : }
5404 : /* If we don't find at least two, punt. */
5405 1458314 : if (!im2)
5406 : return false;
5407 : /* Check they are __imag__ of .ADD_OVERFLOW or .SUB_OVERFLOW call results,
5408 : either both .ADD_OVERFLOW or both .SUB_OVERFLOW and that we have
5409 : uaddc5/usubc5 named pattern for the corresponding mode. */
5410 388 : gimple *ovf1
5411 388 : = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im1), 0));
5412 388 : gimple *ovf2
5413 388 : = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im2), 0));
5414 388 : internal_fn ifn;
5415 388 : if (!is_gimple_call (ovf1)
5416 388 : || !gimple_call_internal_p (ovf1)
5417 388 : || ((ifn = gimple_call_internal_fn (ovf1)) != IFN_ADD_OVERFLOW
5418 60 : && ifn != IFN_SUB_OVERFLOW)
5419 365 : || !gimple_call_internal_p (ovf2, ifn)
5420 394 : || optab_handler (ifn == IFN_ADD_OVERFLOW ? uaddc5_optab : usubc5_optab,
5421 361 : TYPE_MODE (type)) == CODE_FOR_nothing
5422 94 : || (rhs[2]
5423 17 : && optab_handler (code == PLUS_EXPR ? uaddc5_optab : usubc5_optab,
5424 15 : TYPE_MODE (type)) == CODE_FOR_nothing)
5425 94 : || !types_compatible_p (type,
5426 94 : TREE_TYPE (TREE_TYPE (gimple_call_lhs (ovf1))))
5427 481 : || !types_compatible_p (type,
5428 93 : TREE_TYPE (TREE_TYPE (gimple_call_lhs (ovf2)))))
5429 295 : return false;
5430 93 : tree arg1, arg2, arg3 = NULL_TREE;
5431 93 : gimple *re1 = NULL, *re2 = NULL;
5432 : /* On one of the two calls, one of the .ADD_OVERFLOW/.SUB_OVERFLOW arguments
5433 : should be initialized from __real__ of the other of the two calls.
5434 : Though, for .SUB_OVERFLOW, it has to be the first argument, not the
5435 : second one. */
5436 340 : for (int i = (ifn == IFN_ADD_OVERFLOW ? 1 : 0); i >= 0; --i)
5437 349 : for (gimple *ovf = ovf1; ovf; ovf = (ovf == ovf1 ? ovf2 : NULL))
5438 : {
5439 288 : tree arg = gimple_call_arg (ovf, i);
5440 288 : if (TREE_CODE (arg) != SSA_NAME)
5441 2 : continue;
5442 286 : re1 = SSA_NAME_DEF_STMT (arg);
5443 286 : if (uaddc_is_cplxpart (re1, REALPART_EXPR)
5444 379 : && (SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (re1), 0))
5445 93 : == (ovf == ovf1 ? ovf2 : ovf1)))
5446 : {
5447 93 : if (ovf == ovf1)
5448 : {
5449 : /* Make sure ovf2 is the .*_OVERFLOW call with argument
5450 : initialized from __real__ of ovf1. */
5451 20 : std::swap (rhs[0], rhs[1]);
5452 20 : std::swap (im1, im2);
5453 20 : std::swap (ovf1, ovf2);
5454 : }
5455 93 : arg3 = gimple_call_arg (ovf, 1 - i);
5456 93 : i = -1;
5457 93 : break;
5458 : }
5459 : }
5460 93 : if (!arg3)
5461 : return false;
5462 93 : arg1 = gimple_call_arg (ovf1, 0);
5463 93 : arg2 = gimple_call_arg (ovf1, 1);
5464 93 : if (!types_compatible_p (type, TREE_TYPE (arg1)))
5465 : return false;
5466 93 : int kind[2] = { 0, 0 };
5467 93 : tree arg_im[2] = { NULL_TREE, NULL_TREE };
5468 : /* At least one of arg2 and arg3 should have type compatible
5469 : with arg1/rhs[0], and the other one should have value in [0, 1]
5470 : range. If both are in [0, 1] range and type compatible with
5471 : arg1/rhs[0], try harder to find after looking through casts,
5472 : != 0 comparisons which one is initialized to __imag__ of
5473 : .{ADD,SUB}_OVERFLOW or .U{ADD,SUB}C call results. */
5474 279 : for (int i = 0; i < 2; ++i)
5475 : {
5476 186 : tree arg = i == 0 ? arg2 : arg3;
5477 186 : if (types_compatible_p (type, TREE_TYPE (arg)))
5478 161 : kind[i] = 1;
5479 372 : if (!INTEGRAL_TYPE_P (TREE_TYPE (arg))
5480 372 : || (TYPE_PRECISION (TREE_TYPE (arg)) == 1
5481 25 : && !TYPE_UNSIGNED (TREE_TYPE (arg))))
5482 0 : continue;
5483 186 : if (tree_zero_one_valued_p (arg))
5484 51 : kind[i] |= 2;
5485 186 : if (TREE_CODE (arg) == SSA_NAME)
5486 : {
5487 184 : gimple *g = SSA_NAME_DEF_STMT (arg);
5488 184 : if (gimple_assign_cast_p (g))
5489 : {
5490 30 : tree op = gimple_assign_rhs1 (g);
5491 30 : if (TREE_CODE (op) == SSA_NAME
5492 30 : && INTEGRAL_TYPE_P (TREE_TYPE (op)))
5493 30 : g = SSA_NAME_DEF_STMT (op);
5494 : }
5495 184 : g = uaddc_ne0 (g);
5496 184 : if (!uaddc_is_cplxpart (g, IMAGPART_EXPR))
5497 124 : continue;
5498 60 : arg_im[i] = gimple_assign_lhs (g);
5499 60 : g = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g), 0));
5500 60 : if (!is_gimple_call (g) || !gimple_call_internal_p (g))
5501 0 : continue;
5502 60 : switch (gimple_call_internal_fn (g))
5503 : {
5504 60 : case IFN_ADD_OVERFLOW:
5505 60 : case IFN_SUB_OVERFLOW:
5506 60 : case IFN_UADDC:
5507 60 : case IFN_USUBC:
5508 60 : break;
5509 0 : default:
5510 0 : continue;
5511 : }
5512 60 : kind[i] |= 4;
5513 : }
5514 : }
5515 : /* Make arg2 the one with compatible type and arg3 the one
5516 : with [0, 1] range. If both is true for both operands,
5517 : prefer as arg3 result of __imag__ of some ifn. */
5518 93 : if ((kind[0] & 1) == 0 || ((kind[1] & 1) != 0 && kind[0] > kind[1]))
5519 : {
5520 1 : std::swap (arg2, arg3);
5521 1 : std::swap (kind[0], kind[1]);
5522 1 : std::swap (arg_im[0], arg_im[1]);
5523 : }
5524 93 : if ((kind[0] & 1) == 0 || (kind[1] & 6) == 0)
5525 : return false;
5526 69 : if (!has_single_use (gimple_assign_lhs (im1))
5527 67 : || !has_single_use (gimple_assign_lhs (im2))
5528 67 : || !has_single_use (gimple_assign_lhs (re1))
5529 136 : || num_imm_uses (gimple_call_lhs (ovf1)) != 2)
5530 : return false;
5531 : /* Check that ovf2's result is used in __real__ and set re2
5532 : to that statement. */
5533 67 : use_operand_p use_p;
5534 67 : imm_use_iterator iter;
5535 67 : tree lhs = gimple_call_lhs (ovf2);
5536 267 : FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
5537 : {
5538 133 : gimple *use_stmt = USE_STMT (use_p);
5539 133 : if (is_gimple_debug (use_stmt))
5540 0 : continue;
5541 133 : if (use_stmt == im2)
5542 67 : continue;
5543 66 : if (re2)
5544 : return false;
5545 66 : if (!uaddc_is_cplxpart (use_stmt, REALPART_EXPR))
5546 : return false;
5547 : re2 = use_stmt;
5548 0 : }
5549 : /* Build .UADDC/.USUBC call which will be placed before the stmt. */
5550 67 : gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf2);
5551 67 : gimple *g;
5552 67 : if ((kind[1] & 4) != 0 && types_compatible_p (type, TREE_TYPE (arg_im[1])))
5553 : arg3 = arg_im[1];
5554 67 : if ((kind[1] & 1) == 0)
5555 : {
5556 25 : if (TREE_CODE (arg3) == INTEGER_CST)
5557 0 : arg3 = fold_convert (type, arg3);
5558 : else
5559 : {
5560 25 : g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, arg3);
5561 25 : gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5562 25 : arg3 = gimple_assign_lhs (g);
5563 : }
5564 : }
5565 89 : g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
5566 : ? IFN_UADDC : IFN_USUBC,
5567 : 3, arg1, arg2, arg3);
5568 67 : tree nlhs = make_ssa_name (TREE_TYPE (lhs));
5569 67 : gimple_call_set_lhs (g, nlhs);
5570 67 : gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5571 : /* In the case where stmt is | or ^ of two overflow flags
5572 : or addition of those, replace stmt with __imag__ of the above
5573 : added call. In case of arg1 + arg2 + (ovf1 + ovf2) or
5574 : arg1 - arg2 - (ovf1 + ovf2) just emit it before stmt. */
5575 67 : tree ilhs = rhs[2] ? make_ssa_name (type) : gimple_assign_lhs (stmt);
5576 67 : g = gimple_build_assign (ilhs, IMAGPART_EXPR,
5577 67 : build1 (IMAGPART_EXPR, TREE_TYPE (ilhs), nlhs));
5578 67 : if (rhs[2])
5579 : {
5580 15 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5581 : /* Remove some further statements which can't be kept in the IL because
5582 : they can use SSA_NAMEs whose setter is going to be removed too. */
5583 75 : for (gimple *g2 : temp_stmts)
5584 : {
5585 30 : gsi2 = gsi_for_stmt (g2);
5586 30 : gsi_remove (&gsi2, true);
5587 30 : release_defs (g2);
5588 : }
5589 : }
5590 : else
5591 52 : gsi_replace (gsi, g, true);
5592 : /* Remove some statements which can't be kept in the IL because they
5593 : use SSA_NAME whose setter is going to be removed too. */
5594 67 : tree rhs1 = rhs[1];
5595 103 : for (int i = 0; i < 2; i++)
5596 85 : if (rhs1 == gimple_assign_lhs (im2))
5597 : break;
5598 : else
5599 : {
5600 36 : g = SSA_NAME_DEF_STMT (rhs1);
5601 36 : rhs1 = gimple_assign_rhs1 (g);
5602 36 : gsi2 = gsi_for_stmt (g);
5603 36 : gsi_remove (&gsi2, true);
5604 36 : release_defs (g);
5605 : }
5606 67 : gcc_checking_assert (rhs1 == gimple_assign_lhs (im2));
5607 67 : gsi2 = gsi_for_stmt (im2);
5608 67 : gsi_remove (&gsi2, true);
5609 67 : release_defs (im2);
5610 : /* Replace the re2 statement with __real__ of the newly added
5611 : .UADDC/.USUBC call. */
5612 67 : if (re2)
5613 : {
5614 66 : gsi2 = gsi_for_stmt (re2);
5615 66 : tree rlhs = gimple_assign_lhs (re2);
5616 66 : g = gimple_build_assign (rlhs, REALPART_EXPR,
5617 66 : build1 (REALPART_EXPR, TREE_TYPE (rlhs), nlhs));
5618 66 : gsi_replace (&gsi2, g, true);
5619 : }
5620 67 : if (rhs[2])
5621 : {
5622 : /* If this is the arg1 + arg2 + (ovf1 + ovf2) or
5623 : arg1 - arg2 - (ovf1 + ovf2) case for the most significant limb,
5624 : replace stmt with __real__ of another .UADDC/.USUBC call which
5625 : handles the most significant limb. Overflow flag from this is
5626 : ignored. */
5627 17 : g = gimple_build_call_internal (code == PLUS_EXPR
5628 : ? IFN_UADDC : IFN_USUBC,
5629 : 3, rhs[3], rhs[2], ilhs);
5630 15 : nlhs = make_ssa_name (TREE_TYPE (lhs));
5631 15 : gimple_call_set_lhs (g, nlhs);
5632 15 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5633 15 : ilhs = gimple_assign_lhs (stmt);
5634 15 : g = gimple_build_assign (ilhs, REALPART_EXPR,
5635 15 : build1 (REALPART_EXPR, TREE_TYPE (ilhs), nlhs));
5636 15 : gsi_replace (gsi, g, true);
5637 : }
5638 67 : if (TREE_CODE (arg3) == SSA_NAME)
5639 : {
5640 : /* When pattern recognizing the second least significant limb
5641 : above (i.e. first pair of .{ADD,SUB}_OVERFLOW calls for one limb),
5642 : check if the [0, 1] range argument (i.e. carry in) isn't the
5643 : result of another .{ADD,SUB}_OVERFLOW call (one handling the
5644 : least significant limb). Again look through casts and != 0. */
5645 67 : gimple *im3 = SSA_NAME_DEF_STMT (arg3);
5646 92 : for (int i = 0; i < 2; ++i)
5647 : {
5648 92 : gimple *im4 = uaddc_cast (im3);
5649 92 : if (im4 == im3)
5650 : break;
5651 : else
5652 25 : im3 = im4;
5653 : }
5654 67 : im3 = uaddc_ne0 (im3);
5655 67 : if (uaddc_is_cplxpart (im3, IMAGPART_EXPR))
5656 : {
5657 60 : gimple *ovf3
5658 60 : = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im3), 0));
5659 60 : if (gimple_call_internal_p (ovf3, ifn))
5660 : {
5661 25 : lhs = gimple_call_lhs (ovf3);
5662 25 : arg1 = gimple_call_arg (ovf3, 0);
5663 25 : arg2 = gimple_call_arg (ovf3, 1);
5664 25 : if (types_compatible_p (type, TREE_TYPE (TREE_TYPE (lhs)))
5665 25 : && types_compatible_p (type, TREE_TYPE (arg1))
5666 50 : && types_compatible_p (type, TREE_TYPE (arg2)))
5667 : {
5668 : /* And if it is initialized from result of __imag__
5669 : of .{ADD,SUB}_OVERFLOW call, replace that
5670 : call with .U{ADD,SUB}C call with the same arguments,
5671 : just 0 added as third argument. This isn't strictly
5672 : necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5673 : produce the same result, but may result in better
5674 : generated code on some targets where the backend can
5675 : better prepare in how the result will be used. */
5676 25 : g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
5677 : ? IFN_UADDC : IFN_USUBC,
5678 : 3, arg1, arg2,
5679 : build_zero_cst (type));
5680 25 : gimple_call_set_lhs (g, lhs);
5681 25 : gsi2 = gsi_for_stmt (ovf3);
5682 25 : gsi_replace (&gsi2, g, true);
5683 : }
5684 : }
5685 : }
5686 : }
5687 : return true;
5688 1629456 : }
5689 :
5690 : /* Replace .POPCOUNT (x) == 1 or .POPCOUNT (x) != 1 with
5691 : (x & (x - 1)) > x - 1 or (x & (x - 1)) <= x - 1 if .POPCOUNT
5692 : isn't a direct optab. Also handle `<=`/`>` to be
5693 : `x & (x - 1) !=/== x`. */
5694 :
5695 : static void
5696 4398292 : match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt)
5697 : {
5698 4398292 : tree clhs, crhs;
5699 4398292 : enum tree_code code;
5700 4398292 : bool was_le = false;
5701 4398292 : if (gimple_code (stmt) == GIMPLE_COND)
5702 : {
5703 4109584 : clhs = gimple_cond_lhs (stmt);
5704 4109584 : crhs = gimple_cond_rhs (stmt);
5705 4109584 : code = gimple_cond_code (stmt);
5706 : }
5707 : else
5708 : {
5709 288708 : clhs = gimple_assign_rhs1 (stmt);
5710 288708 : crhs = gimple_assign_rhs2 (stmt);
5711 288708 : code = gimple_assign_rhs_code (stmt);
5712 : }
5713 4398292 : if (code != LE_EXPR && code != GT_EXPR
5714 4398292 : && code != EQ_EXPR && code != NE_EXPR)
5715 4398286 : return;
5716 2049024 : if (code == LE_EXPR || code == GT_EXPR)
5717 4138117 : was_le = true;
5718 4138117 : if (TREE_CODE (clhs) != SSA_NAME || !integer_onep (crhs))
5719 3982430 : return;
5720 155687 : gimple *call = SSA_NAME_DEF_STMT (clhs);
5721 155687 : combined_fn cfn = gimple_call_combined_fn (call);
5722 155687 : switch (cfn)
5723 : {
5724 15 : CASE_CFN_POPCOUNT:
5725 15 : break;
5726 : default:
5727 : return;
5728 : }
5729 15 : if (!has_single_use (clhs))
5730 : return;
5731 14 : tree arg = gimple_call_arg (call, 0);
5732 14 : tree type = TREE_TYPE (arg);
5733 14 : if (!INTEGRAL_TYPE_P (type))
5734 : return;
5735 14 : bool nonzero_arg = tree_expr_nonzero_p (arg);
5736 14 : if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, OPTIMIZE_FOR_BOTH))
5737 : {
5738 : /* Tell expand_POPCOUNT the popcount result is only used in equality
5739 : comparison with one, so that it can decide based on rtx costs. */
5740 16 : gimple *g = gimple_build_call_internal (IFN_POPCOUNT, 2, arg,
5741 : was_le ? integer_minus_one_node
5742 8 : : (nonzero_arg ? integer_zero_node
5743 : : integer_one_node));
5744 8 : gimple_call_set_lhs (g, gimple_call_lhs (call));
5745 8 : gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
5746 8 : gsi_replace (&gsi2, g, true);
5747 8 : return;
5748 : }
5749 6 : tree argm1 = make_ssa_name (type);
5750 6 : gimple *g = gimple_build_assign (argm1, PLUS_EXPR, arg,
5751 : build_int_cst (type, -1));
5752 6 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5753 6 : g = gimple_build_assign (make_ssa_name (type),
5754 6 : (nonzero_arg || was_le) ? BIT_AND_EXPR : BIT_XOR_EXPR,
5755 : arg, argm1);
5756 6 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5757 6 : tree_code cmpcode;
5758 6 : if (was_le)
5759 : {
5760 0 : argm1 = build_zero_cst (type);
5761 0 : cmpcode = code == LE_EXPR ? EQ_EXPR : NE_EXPR;
5762 : }
5763 6 : else if (nonzero_arg)
5764 : {
5765 2 : argm1 = build_zero_cst (type);
5766 2 : cmpcode = code;
5767 : }
5768 : else
5769 4 : cmpcode = code == EQ_EXPR ? GT_EXPR : LE_EXPR;
5770 6 : if (gcond *cond = dyn_cast <gcond *> (stmt))
5771 : {
5772 2 : gimple_cond_set_lhs (cond, gimple_assign_lhs (g));
5773 2 : gimple_cond_set_rhs (cond, argm1);
5774 2 : gimple_cond_set_code (cond, cmpcode);
5775 : }
5776 : else
5777 : {
5778 4 : gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (g));
5779 4 : gimple_assign_set_rhs2 (stmt, argm1);
5780 4 : gimple_assign_set_rhs_code (stmt, cmpcode);
5781 : }
5782 6 : update_stmt (stmt);
5783 6 : gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
5784 6 : gsi_remove (&gsi2, true);
5785 6 : release_defs (call);
5786 : }
5787 :
5788 : /* Return true if target has support for divmod. */
5789 :
5790 : static bool
5791 29146 : target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
5792 : {
5793 : /* If target supports hardware divmod insn, use it for divmod. */
5794 29146 : if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
5795 : return true;
5796 :
5797 : /* Check if libfunc for divmod is available. */
5798 2568 : rtx libfunc = optab_libfunc (divmod_optab, mode);
5799 2568 : if (libfunc != NULL_RTX)
5800 : {
5801 : /* If optab_handler exists for div_optab, perhaps in a wider mode,
5802 : we don't want to use the libfunc even if it exists for given mode. */
5803 : machine_mode div_mode;
5804 10707 : FOR_EACH_MODE_FROM (div_mode, mode)
5805 8139 : if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
5806 : return false;
5807 :
5808 2568 : return targetm.expand_divmod_libfunc != NULL;
5809 : }
5810 :
5811 : return false;
5812 : }
5813 :
5814 : /* Check if stmt is candidate for divmod transform. */
5815 :
5816 : static bool
5817 47891 : divmod_candidate_p (gassign *stmt)
5818 : {
5819 47891 : tree type = TREE_TYPE (gimple_assign_lhs (stmt));
5820 47891 : machine_mode mode = TYPE_MODE (type);
5821 47891 : optab divmod_optab, div_optab;
5822 :
5823 47891 : if (TYPE_UNSIGNED (type))
5824 : {
5825 : divmod_optab = udivmod_optab;
5826 : div_optab = udiv_optab;
5827 : }
5828 : else
5829 : {
5830 20231 : divmod_optab = sdivmod_optab;
5831 20231 : div_optab = sdiv_optab;
5832 : }
5833 :
5834 47891 : tree op1 = gimple_assign_rhs1 (stmt);
5835 47891 : tree op2 = gimple_assign_rhs2 (stmt);
5836 :
5837 : /* Disable the transform if either is a constant, since division-by-constant
5838 : may have specialized expansion. */
5839 47891 : if (CONSTANT_CLASS_P (op1))
5840 : return false;
5841 :
5842 44129 : if (CONSTANT_CLASS_P (op2))
5843 : {
5844 17212 : if (integer_pow2p (op2))
5845 : return false;
5846 :
5847 15120 : if (element_precision (type) <= HOST_BITS_PER_WIDE_INT
5848 16215 : && element_precision (type) <= BITS_PER_WORD)
5849 : return false;
5850 :
5851 : /* If the divisor is not power of 2 and the precision wider than
5852 : HWI, expand_divmod punts on that, so in that case it is better
5853 : to use divmod optab or libfunc. Similarly if choose_multiplier
5854 : might need pre/post shifts of BITS_PER_WORD or more. */
5855 : }
5856 :
5857 : /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
5858 : expand using the [su]divv optabs. */
5859 29146 : if (TYPE_OVERFLOW_TRAPS (type))
5860 : return false;
5861 :
5862 29146 : if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
5863 : return false;
5864 :
5865 : return true;
5866 : }
5867 :
5868 : /* This function looks for:
5869 : t1 = a TRUNC_DIV_EXPR b;
5870 : t2 = a TRUNC_MOD_EXPR b;
5871 : and transforms it to the following sequence:
5872 : complex_tmp = DIVMOD (a, b);
5873 : t1 = REALPART_EXPR(a);
5874 : t2 = IMAGPART_EXPR(b);
5875 : For conditions enabling the transform see divmod_candidate_p().
5876 :
5877 : The pass has three parts:
5878 : 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
5879 : other trunc_div_expr and trunc_mod_expr stmts.
5880 : 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
5881 : to stmts vector.
5882 : 3) Insert DIVMOD call just before top_stmt and update entries in
5883 : stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
5884 : IMAGPART_EXPR for mod). */
5885 :
5886 : static bool
5887 47910 : convert_to_divmod (gassign *stmt)
5888 : {
5889 47910 : if (stmt_can_throw_internal (cfun, stmt)
5890 47910 : || !divmod_candidate_p (stmt))
5891 18764 : return false;
5892 :
5893 29146 : tree op1 = gimple_assign_rhs1 (stmt);
5894 29146 : tree op2 = gimple_assign_rhs2 (stmt);
5895 :
5896 29146 : imm_use_iterator use_iter;
5897 29146 : gimple *use_stmt;
5898 29146 : auto_vec<gimple *> stmts;
5899 :
5900 29146 : gimple *top_stmt = stmt;
5901 29146 : basic_block top_bb = gimple_bb (stmt);
5902 :
5903 : /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
5904 : at-least stmt and possibly other trunc_div/trunc_mod stmts
5905 : having same operands as stmt. */
5906 :
5907 125462 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
5908 : {
5909 96316 : if (is_gimple_assign (use_stmt)
5910 58850 : && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
5911 47080 : || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
5912 48531 : && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
5913 144730 : && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
5914 : {
5915 40809 : if (stmt_can_throw_internal (cfun, use_stmt))
5916 0 : continue;
5917 :
5918 40809 : basic_block bb = gimple_bb (use_stmt);
5919 :
5920 40809 : if (bb == top_bb)
5921 : {
5922 40090 : if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
5923 5154 : top_stmt = use_stmt;
5924 : }
5925 719 : else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
5926 : {
5927 195 : top_bb = bb;
5928 195 : top_stmt = use_stmt;
5929 : }
5930 : }
5931 29146 : }
5932 :
5933 29146 : tree top_op1 = gimple_assign_rhs1 (top_stmt);
5934 29146 : tree top_op2 = gimple_assign_rhs2 (top_stmt);
5935 :
5936 29146 : stmts.safe_push (top_stmt);
5937 29146 : bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
5938 :
5939 : /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
5940 : to stmts vector. The 2nd loop will always add stmt to stmts vector, since
5941 : gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
5942 : 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
5943 :
5944 125462 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
5945 : {
5946 96316 : if (is_gimple_assign (use_stmt)
5947 58850 : && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
5948 47080 : || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
5949 48531 : && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
5950 144730 : && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
5951 : {
5952 70039 : if (use_stmt == top_stmt
5953 11663 : || stmt_can_throw_internal (cfun, use_stmt)
5954 52472 : || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
5955 29230 : continue;
5956 :
5957 11579 : stmts.safe_push (use_stmt);
5958 11579 : if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
5959 96316 : div_seen = true;
5960 : }
5961 29146 : }
5962 :
5963 29146 : if (!div_seen)
5964 : return false;
5965 :
5966 : /* Part 3: Create libcall to internal fn DIVMOD:
5967 : divmod_tmp = DIVMOD (op1, op2). */
5968 :
5969 11552 : gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
5970 11552 : tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
5971 : call_stmt, "divmod_tmp");
5972 11552 : gimple_call_set_lhs (call_stmt, res);
5973 : /* We rejected throwing statements above. */
5974 11552 : gimple_call_set_nothrow (call_stmt, true);
5975 :
5976 : /* Insert the call before top_stmt. */
5977 11552 : gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
5978 11552 : gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
5979 :
5980 11552 : widen_mul_stats.divmod_calls_inserted++;
5981 :
5982 : /* Update all statements in stmts vector:
5983 : lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
5984 : lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
5985 :
5986 63827 : for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
5987 : {
5988 23129 : tree new_rhs;
5989 :
5990 23129 : switch (gimple_assign_rhs_code (use_stmt))
5991 : {
5992 11562 : case TRUNC_DIV_EXPR:
5993 11562 : new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
5994 11562 : break;
5995 :
5996 11567 : case TRUNC_MOD_EXPR:
5997 11567 : new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
5998 11567 : break;
5999 :
6000 0 : default:
6001 0 : gcc_unreachable ();
6002 : }
6003 :
6004 23129 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
6005 23129 : gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
6006 23129 : update_stmt (use_stmt);
6007 : }
6008 :
6009 : return true;
6010 29146 : }
6011 :
6012 : /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
6013 : its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
6014 : value is true iff we converted the statement. */
6015 :
6016 : static bool
6017 169361 : convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
6018 : {
6019 169361 : tree lhs = gimple_assign_lhs (stmt);
6020 169361 : tree stype = TREE_TYPE (lhs);
6021 169361 : tree sarg0 = gimple_assign_rhs1 (stmt);
6022 169361 : tree sarg1 = gimple_assign_rhs2 (stmt);
6023 :
6024 169361 : if (TREE_CODE (stype) != INTEGER_TYPE
6025 162503 : || TREE_CODE (sarg1) != INTEGER_CST
6026 146690 : || TREE_CODE (sarg0) != SSA_NAME
6027 146689 : || !tree_fits_uhwi_p (sarg1)
6028 316050 : || !has_single_use (sarg0))
6029 : return false;
6030 :
6031 43792 : gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
6032 40758 : if (!def)
6033 : return false;
6034 :
6035 40758 : enum tree_code mcode = gimple_assign_rhs_code (def);
6036 40758 : if (mcode == NOP_EXPR)
6037 : {
6038 8963 : tree tmp = gimple_assign_rhs1 (def);
6039 8963 : if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
6040 : return false;
6041 171430 : def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
6042 2918 : if (!def)
6043 : return false;
6044 2918 : mcode = gimple_assign_rhs_code (def);
6045 : }
6046 :
6047 34713 : if (mcode != WIDEN_MULT_EXPR
6048 34713 : || gimple_bb (def) != gimple_bb (stmt))
6049 : return false;
6050 850 : tree mtype = TREE_TYPE (gimple_assign_lhs (def));
6051 850 : if (TREE_CODE (mtype) != INTEGER_TYPE
6052 850 : || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
6053 : return false;
6054 :
6055 850 : tree mop1 = gimple_assign_rhs1 (def);
6056 850 : tree mop2 = gimple_assign_rhs2 (def);
6057 850 : tree optype = TREE_TYPE (mop1);
6058 850 : bool unsignedp = TYPE_UNSIGNED (optype);
6059 850 : unsigned int prec = TYPE_PRECISION (optype);
6060 :
6061 850 : if (unsignedp != TYPE_UNSIGNED (mtype)
6062 850 : || TYPE_PRECISION (mtype) != 2 * prec)
6063 : return false;
6064 :
6065 850 : unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
6066 850 : if (bits < prec || bits >= 2 * prec)
6067 : return false;
6068 :
6069 : /* For the time being, require operands to have the same sign. */
6070 849 : if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
6071 : return false;
6072 :
6073 849 : machine_mode mode = TYPE_MODE (optype);
6074 849 : optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
6075 849 : if (optab_handler (tab, mode) == CODE_FOR_nothing)
6076 : return false;
6077 :
6078 849 : location_t loc = gimple_location (stmt);
6079 849 : tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
6080 : MULT_HIGHPART_EXPR, mop1, mop2);
6081 849 : tree highpart2 = highpart1;
6082 849 : tree ntype = optype;
6083 :
6084 849 : if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
6085 : {
6086 16 : ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
6087 7 : : signed_type_for (optype);
6088 16 : highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
6089 : }
6090 849 : if (bits > prec)
6091 29 : highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
6092 : RSHIFT_EXPR, highpart2,
6093 29 : build_int_cst (ntype, bits - prec));
6094 :
6095 849 : gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
6096 849 : gsi_replace (gsi, new_stmt, true);
6097 :
6098 849 : widen_mul_stats.highpart_mults_inserted++;
6099 849 : return true;
6100 : }
6101 :
6102 : /* If target has spaceship<MODE>3 expander, pattern recognize
6103 : <bb 2> [local count: 1073741824]:
6104 : if (a_2(D) == b_3(D))
6105 : goto <bb 6>; [34.00%]
6106 : else
6107 : goto <bb 3>; [66.00%]
6108 :
6109 : <bb 3> [local count: 708669601]:
6110 : if (a_2(D) < b_3(D))
6111 : goto <bb 6>; [1.04%]
6112 : else
6113 : goto <bb 4>; [98.96%]
6114 :
6115 : <bb 4> [local count: 701299439]:
6116 : if (a_2(D) > b_3(D))
6117 : goto <bb 5>; [48.89%]
6118 : else
6119 : goto <bb 6>; [51.11%]
6120 :
6121 : <bb 5> [local count: 342865295]:
6122 :
6123 : <bb 6> [local count: 1073741824]:
6124 : and turn it into:
6125 : <bb 2> [local count: 1073741824]:
6126 : _1 = .SPACESHIP (a_2(D), b_3(D), 0);
6127 : if (_1 == 0)
6128 : goto <bb 6>; [34.00%]
6129 : else
6130 : goto <bb 3>; [66.00%]
6131 :
6132 : <bb 3> [local count: 708669601]:
6133 : if (_1 == -1)
6134 : goto <bb 6>; [1.04%]
6135 : else
6136 : goto <bb 4>; [98.96%]
6137 :
6138 : <bb 4> [local count: 701299439]:
6139 : if (_1 == 1)
6140 : goto <bb 5>; [48.89%]
6141 : else
6142 : goto <bb 6>; [51.11%]
6143 :
6144 : <bb 5> [local count: 342865295]:
6145 :
6146 : <bb 6> [local count: 1073741824]:
6147 : so that the backend can emit optimal comparison and
6148 : conditional jump sequence. If the
6149 : <bb 6> [local count: 1073741824]:
6150 : above has a single PHI like:
6151 : # _27 = PHI<0(2), -1(3), -128(4), 1(5)>
6152 : then replace it with effectively
6153 : _1 = .SPACESHIP (a_2(D), b_3(D), -128);
6154 : _27 = _1; */
6155 :
6156 : static void
6157 4109584 : optimize_spaceship (gcond *stmt)
6158 : {
6159 4109584 : enum tree_code code = gimple_cond_code (stmt);
6160 4109584 : if (code != EQ_EXPR && code != NE_EXPR)
6161 4109392 : return;
6162 3329748 : tree arg1 = gimple_cond_lhs (stmt);
6163 3329748 : tree arg2 = gimple_cond_rhs (stmt);
6164 3329748 : if ((!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
6165 3219463 : && !INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
6166 2573653 : || optab_handler (spaceship_optab,
6167 2573653 : TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
6168 5862747 : || operand_equal_p (arg1, arg2, 0))
6169 798066 : return;
6170 :
6171 2531682 : basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL;
6172 2531682 : edge em1 = NULL, e1 = NULL, e2 = NULL;
6173 2531682 : bb1 = EDGE_SUCC (bb0, 1)->dest;
6174 2531682 : if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
6175 1531794 : bb1 = EDGE_SUCC (bb0, 0)->dest;
6176 :
6177 7544939 : gcond *g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb1));
6178 1099514 : if (g == NULL
6179 1099514 : || !single_pred_p (bb1)
6180 686249 : || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
6181 576638 : ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
6182 467027 : : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
6183 962 : || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
6184 586552 : || !cond_only_block_p (bb1))
6185 2522463 : return;
6186 :
6187 9219 : enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
6188 9219 : ? LT_EXPR : GT_EXPR);
6189 9219 : switch (gimple_cond_code (g))
6190 : {
6191 : case LT_EXPR:
6192 : case LE_EXPR:
6193 : break;
6194 7762 : case GT_EXPR:
6195 7762 : case GE_EXPR:
6196 7762 : ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
6197 : break;
6198 : default:
6199 : return;
6200 : }
6201 :
6202 27558 : for (int i = 0; i < 2; ++i)
6203 : {
6204 : /* With NaNs, </<=/>/>= are false, so we need to look for the
6205 : third comparison on the false edge from whatever non-equality
6206 : comparison the second comparison is. */
6207 18426 : if (HONOR_NANS (TREE_TYPE (arg1))
6208 18426 : && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
6209 199 : continue;
6210 :
6211 18227 : bb2 = EDGE_SUCC (bb1, i)->dest;
6212 54338 : g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb2));
6213 12336 : if (g == NULL
6214 12336 : || !single_pred_p (bb2)
6215 17430 : || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
6216 9808 : ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
6217 2186 : : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
6218 19 : || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
6219 85 : || !cond_only_block_p (bb2)
6220 9893 : || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
6221 18142 : continue;
6222 :
6223 85 : enum tree_code ccode2
6224 85 : = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
6225 85 : switch (gimple_cond_code (g))
6226 : {
6227 : case LT_EXPR:
6228 : case LE_EXPR:
6229 : break;
6230 55 : case GT_EXPR:
6231 55 : case GE_EXPR:
6232 55 : ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
6233 : break;
6234 1 : default:
6235 1 : continue;
6236 : }
6237 84 : if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
6238 0 : continue;
6239 :
6240 168 : if ((ccode == LT_EXPR)
6241 84 : ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
6242 : {
6243 55 : em1 = EDGE_SUCC (bb1, 1 - i);
6244 55 : e1 = EDGE_SUCC (bb2, 0);
6245 55 : e2 = EDGE_SUCC (bb2, 1);
6246 55 : if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
6247 0 : std::swap (e1, e2);
6248 : }
6249 : else
6250 : {
6251 29 : e1 = EDGE_SUCC (bb1, 1 - i);
6252 29 : em1 = EDGE_SUCC (bb2, 0);
6253 29 : e2 = EDGE_SUCC (bb2, 1);
6254 29 : if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
6255 : std::swap (em1, e2);
6256 : }
6257 : break;
6258 : }
6259 :
6260 9187 : if (em1 == NULL)
6261 : {
6262 18264 : if ((ccode == LT_EXPR)
6263 9132 : ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
6264 : {
6265 3136 : em1 = EDGE_SUCC (bb1, 1);
6266 3136 : e1 = EDGE_SUCC (bb1, 0);
6267 3136 : e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
6268 : }
6269 : else
6270 : {
6271 5996 : em1 = EDGE_SUCC (bb1, 0);
6272 5996 : e1 = EDGE_SUCC (bb1, 1);
6273 5996 : e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
6274 : }
6275 : }
6276 :
6277 : /* Check if there is a single bb into which all failed conditions
6278 : jump to (perhaps through an empty block) and if it results in
6279 : a single integral PHI which just sets it to -1, 0, 1, X
6280 : (or -1, 0, 1 when NaNs can't happen). In that case use 1 rather
6281 : than 0 as last .SPACESHIP argument to tell backends it might
6282 : consider different code generation and just cast the result
6283 : of .SPACESHIP to the PHI result. X above is some value
6284 : other than -1, 0, 1, for libstdc++ -128, for libc++ -127. */
6285 9216 : tree arg3 = integer_zero_node;
6286 9216 : edge e = EDGE_SUCC (bb0, 0);
6287 9216 : if (e->dest == bb1)
6288 6526 : e = EDGE_SUCC (bb0, 1);
6289 9216 : basic_block bbp = e->dest;
6290 9216 : gphi *phi = NULL;
6291 9216 : for (gphi_iterator psi = gsi_start_phis (bbp);
6292 11135 : !gsi_end_p (psi); gsi_next (&psi))
6293 : {
6294 3491 : gphi *gp = psi.phi ();
6295 3491 : tree res = gimple_phi_result (gp);
6296 :
6297 3491 : if (phi != NULL
6298 3149 : || virtual_operand_p (res)
6299 2215 : || !INTEGRAL_TYPE_P (TREE_TYPE (res))
6300 5561 : || TYPE_PRECISION (TREE_TYPE (res)) < 2)
6301 : {
6302 : phi = NULL;
6303 : break;
6304 : }
6305 1919 : phi = gp;
6306 : }
6307 9216 : if (phi
6308 1577 : && integer_zerop (gimple_phi_arg_def_from_edge (phi, e))
6309 9741 : && EDGE_COUNT (bbp->preds) == (HONOR_NANS (TREE_TYPE (arg1)) ? 4 : 3))
6310 : {
6311 106 : HOST_WIDE_INT argval
6312 106 : = SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) ? -128 : -1;
6313 628 : for (unsigned i = 0; phi && i < EDGE_COUNT (bbp->preds) - 1; ++i)
6314 : {
6315 227 : edge e3 = i == 0 ? e1 : i == 1 ? em1 : e2;
6316 227 : if (e3->dest != bbp)
6317 : {
6318 105 : if (!empty_block_p (e3->dest)
6319 96 : || !single_succ_p (e3->dest)
6320 201 : || single_succ (e3->dest) != bbp)
6321 : {
6322 : phi = NULL;
6323 : break;
6324 : }
6325 : e3 = single_succ_edge (e3->dest);
6326 : }
6327 218 : tree a = gimple_phi_arg_def_from_edge (phi, e3);
6328 218 : if (TREE_CODE (a) != INTEGER_CST
6329 218 : || (i == 0 && !integer_onep (a))
6330 430 : || (i == 1 && !integer_all_onesp (a)))
6331 : {
6332 : phi = NULL;
6333 : break;
6334 : }
6335 212 : if (i == 2)
6336 : {
6337 30 : tree minv = TYPE_MIN_VALUE (signed_char_type_node);
6338 30 : tree maxv = TYPE_MAX_VALUE (signed_char_type_node);
6339 30 : widest_int w = widest_int::from (wi::to_wide (a), SIGNED);
6340 41 : if ((w >= -1 && w <= 1)
6341 26 : || w < wi::to_widest (minv)
6342 56 : || w > wi::to_widest (maxv))
6343 : {
6344 4 : phi = NULL;
6345 4 : break;
6346 : }
6347 26 : argval = w.to_shwi ();
6348 26 : }
6349 : }
6350 106 : if (phi)
6351 87 : arg3 = build_int_cst (integer_type_node,
6352 103 : TYPE_UNSIGNED (TREE_TYPE (arg1)) ? 1 : argval);
6353 : }
6354 :
6355 : /* For integral <=> comparisons only use .SPACESHIP if it is turned
6356 : into an integer (-1, 0, 1). */
6357 9216 : if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) && arg3 == integer_zero_node)
6358 : return;
6359 :
6360 279 : gcall *gc = gimple_build_call_internal (IFN_SPACESHIP, 3, arg1, arg2, arg3);
6361 279 : tree lhs = make_ssa_name (integer_type_node);
6362 279 : gimple_call_set_lhs (gc, lhs);
6363 279 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6364 279 : gsi_insert_before (&gsi, gc, GSI_SAME_STMT);
6365 :
6366 471 : wide_int wmin = wi::minus_one (TYPE_PRECISION (integer_type_node));
6367 471 : wide_int wmax = wi::one (TYPE_PRECISION (integer_type_node));
6368 279 : if (HONOR_NANS (TREE_TYPE (arg1)))
6369 : {
6370 199 : if (arg3 == integer_zero_node)
6371 173 : wmin = wi::shwi (-128, TYPE_PRECISION (integer_type_node));
6372 26 : else if (tree_int_cst_sgn (arg3) < 0)
6373 19 : wmin = wi::to_wide (arg3);
6374 : else
6375 7 : wmax = wi::to_wide (arg3);
6376 : }
6377 471 : int_range<1> vr (TREE_TYPE (lhs), wmin, wmax);
6378 279 : set_range_info (lhs, vr);
6379 :
6380 279 : if (arg3 != integer_zero_node)
6381 : {
6382 87 : tree type = TREE_TYPE (gimple_phi_result (phi));
6383 87 : if (!useless_type_conversion_p (type, integer_type_node))
6384 : {
6385 63 : tree tem = make_ssa_name (type);
6386 63 : gimple *gcv = gimple_build_assign (tem, NOP_EXPR, lhs);
6387 63 : gsi_insert_before (&gsi, gcv, GSI_SAME_STMT);
6388 63 : lhs = tem;
6389 : }
6390 87 : SET_PHI_ARG_DEF_ON_EDGE (phi, e, lhs);
6391 87 : gimple_cond_set_lhs (stmt, boolean_false_node);
6392 87 : gimple_cond_set_rhs (stmt, boolean_false_node);
6393 161 : gimple_cond_set_code (stmt, (e->flags & EDGE_TRUE_VALUE)
6394 : ? EQ_EXPR : NE_EXPR);
6395 87 : update_stmt (stmt);
6396 87 : return;
6397 : }
6398 :
6399 192 : gimple_cond_set_lhs (stmt, lhs);
6400 192 : gimple_cond_set_rhs (stmt, integer_zero_node);
6401 192 : update_stmt (stmt);
6402 :
6403 384 : gcond *cond = as_a <gcond *> (*gsi_last_bb (bb1));
6404 192 : gimple_cond_set_lhs (cond, lhs);
6405 192 : if (em1->src == bb1 && e2 != em1)
6406 : {
6407 112 : gimple_cond_set_rhs (cond, integer_minus_one_node);
6408 118 : gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
6409 : ? EQ_EXPR : NE_EXPR);
6410 : }
6411 : else
6412 : {
6413 80 : gcc_assert (e1->src == bb1 && e2 != e1);
6414 80 : gimple_cond_set_rhs (cond, integer_one_node);
6415 80 : gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
6416 : ? EQ_EXPR : NE_EXPR);
6417 : }
6418 192 : update_stmt (cond);
6419 :
6420 192 : if (e2 != e1 && e2 != em1)
6421 : {
6422 116 : cond = as_a <gcond *> (*gsi_last_bb (bb2));
6423 58 : gimple_cond_set_lhs (cond, lhs);
6424 58 : if (em1->src == bb2)
6425 29 : gimple_cond_set_rhs (cond, integer_minus_one_node);
6426 : else
6427 : {
6428 29 : gcc_assert (e1->src == bb2);
6429 29 : gimple_cond_set_rhs (cond, integer_one_node);
6430 : }
6431 58 : gimple_cond_set_code (cond,
6432 58 : (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
6433 58 : update_stmt (cond);
6434 : }
6435 : }
6436 :
6437 :
6438 : /* Find integer multiplications where the operands are extended from
6439 : smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
6440 : or MULT_HIGHPART_EXPR where appropriate. */
6441 :
6442 : namespace {
6443 :
6444 : const pass_data pass_data_optimize_widening_mul =
6445 : {
6446 : GIMPLE_PASS, /* type */
6447 : "widening_mul", /* name */
6448 : OPTGROUP_NONE, /* optinfo_flags */
6449 : TV_TREE_WIDEN_MUL, /* tv_id */
6450 : PROP_ssa, /* properties_required */
6451 : 0, /* properties_provided */
6452 : 0, /* properties_destroyed */
6453 : 0, /* todo_flags_start */
6454 : TODO_update_ssa, /* todo_flags_finish */
6455 : };
6456 :
6457 : class pass_optimize_widening_mul : public gimple_opt_pass
6458 : {
6459 : public:
6460 288767 : pass_optimize_widening_mul (gcc::context *ctxt)
6461 577534 : : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
6462 : {}
6463 :
6464 : /* opt_pass methods: */
6465 1044325 : bool gate (function *) final override
6466 : {
6467 1044325 : return flag_expensive_optimizations && optimize;
6468 : }
6469 :
6470 : unsigned int execute (function *) final override;
6471 :
6472 : }; // class pass_optimize_widening_mul
6473 :
6474 : /* Walker class to perform the transformation in reverse dominance order. */
6475 :
6476 : class math_opts_dom_walker : public dom_walker
6477 : {
6478 : public:
6479 : /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
6480 : if walking modidifes the CFG. */
6481 :
6482 966277 : math_opts_dom_walker (bool *cfg_changed_p)
6483 2898831 : : dom_walker (CDI_DOMINATORS), m_last_result_set (),
6484 966277 : m_cfg_changed_p (cfg_changed_p) {}
6485 :
6486 : /* The actual actions performed in the walk. */
6487 :
6488 : void after_dom_children (basic_block) final override;
6489 :
6490 : /* Set of results of chains of multiply and add statement combinations that
6491 : were not transformed into FMAs because of active deferring. */
6492 : hash_set<tree> m_last_result_set;
6493 :
6494 : /* Pointer to a flag of the user that needs to be set if CFG has been
6495 : modified. */
6496 : bool *m_cfg_changed_p;
6497 : };
6498 :
6499 : void
6500 10157378 : math_opts_dom_walker::after_dom_children (basic_block bb)
6501 : {
6502 10157378 : gimple_stmt_iterator gsi;
6503 :
6504 10157378 : fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
6505 :
6506 14293615 : for (gphi_iterator psi_next, psi = gsi_start_phis (bb); !gsi_end_p (psi);
6507 4136237 : psi = psi_next)
6508 : {
6509 4136237 : psi_next = psi;
6510 4136237 : gsi_next (&psi_next);
6511 :
6512 4136237 : gimple_stmt_iterator gsi = gsi_after_labels (bb);
6513 4136237 : gphi *phi = psi.phi ();
6514 :
6515 4136237 : if (match_saturation_add (&gsi, phi)
6516 4136220 : || match_saturation_sub (&gsi, phi)
6517 4136194 : || match_saturation_trunc (&gsi, phi)
6518 8272431 : || match_saturation_mul (&gsi, phi))
6519 43 : remove_phi_node (&psi, /* release_lhs_p */ false);
6520 : }
6521 :
6522 91204076 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
6523 : {
6524 81046698 : gimple *stmt = gsi_stmt (gsi);
6525 81046698 : enum tree_code code;
6526 :
6527 81046698 : if (is_gimple_assign (stmt))
6528 : {
6529 20892375 : code = gimple_assign_rhs_code (stmt);
6530 20892375 : switch (code)
6531 : {
6532 722770 : case MULT_EXPR:
6533 722770 : if (!convert_mult_to_widen (stmt, &gsi)
6534 712696 : && !convert_expand_mult_copysign (stmt, &gsi)
6535 1435423 : && convert_mult_to_fma (stmt,
6536 : gimple_assign_rhs1 (stmt),
6537 : gimple_assign_rhs2 (stmt),
6538 : &fma_state))
6539 : {
6540 16438 : gsi_remove (&gsi, true);
6541 16438 : release_defs (stmt);
6542 16438 : continue;
6543 : }
6544 706332 : match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
6545 706332 : match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
6546 706332 : break;
6547 :
6548 2265254 : case PLUS_EXPR:
6549 2265254 : match_saturation_add_with_assign (&gsi, as_a<gassign *> (stmt));
6550 2265254 : match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
6551 : /* fall-through */
6552 2560768 : case MINUS_EXPR:
6553 2560768 : if (!convert_plusminus_to_widen (&gsi, stmt, code))
6554 : {
6555 2560768 : match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
6556 2560768 : if (gsi_stmt (gsi) == stmt)
6557 2554306 : match_uaddc_usubc (&gsi, stmt, code);
6558 : }
6559 : break;
6560 :
6561 36387 : case BIT_NOT_EXPR:
6562 36387 : if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
6563 170 : continue;
6564 : break;
6565 :
6566 47910 : case TRUNC_MOD_EXPR:
6567 47910 : convert_to_divmod (as_a<gassign *> (stmt));
6568 47910 : break;
6569 :
6570 169361 : case RSHIFT_EXPR:
6571 169361 : convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
6572 169361 : break;
6573 :
6574 183364 : case BIT_IOR_EXPR:
6575 183364 : match_unsigned_saturation_mul (&gsi, as_a<gassign *> (stmt));
6576 183364 : match_saturation_add_with_assign (&gsi, as_a<gassign *> (stmt));
6577 183364 : match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
6578 : /* fall-through */
6579 213485 : case BIT_XOR_EXPR:
6580 213485 : match_uaddc_usubc (&gsi, stmt, code);
6581 213485 : break;
6582 :
6583 288708 : case EQ_EXPR:
6584 288708 : case NE_EXPR:
6585 288708 : case LE_EXPR:
6586 288708 : case GT_EXPR:
6587 288708 : match_single_bit_test (&gsi, stmt);
6588 288708 : break;
6589 :
6590 332621 : case COND_EXPR:
6591 332621 : case BIT_AND_EXPR:
6592 332621 : match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
6593 332621 : break;
6594 :
6595 2383750 : case NOP_EXPR:
6596 2383750 : match_unsigned_saturation_mul (&gsi, as_a<gassign *> (stmt));
6597 2383750 : match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
6598 2383750 : match_saturation_add_with_assign (&gsi, as_a<gassign *> (stmt));
6599 2383750 : break;
6600 :
6601 : default:;
6602 : }
6603 : }
6604 60154323 : else if (is_gimple_call (stmt))
6605 : {
6606 4780127 : switch (gimple_call_combined_fn (stmt))
6607 : {
6608 129 : case CFN_COND_MUL:
6609 129 : if (convert_mult_to_fma (stmt,
6610 : gimple_call_arg (stmt, 1),
6611 : gimple_call_arg (stmt, 2),
6612 : &fma_state,
6613 : gimple_call_arg (stmt, 0)))
6614 :
6615 : {
6616 84 : gsi_remove (&gsi, true);
6617 84 : release_defs (stmt);
6618 84 : continue;
6619 : }
6620 : break;
6621 :
6622 0 : case CFN_COND_LEN_MUL:
6623 0 : if (convert_mult_to_fma (stmt,
6624 : gimple_call_arg (stmt, 1),
6625 : gimple_call_arg (stmt, 2),
6626 : &fma_state,
6627 : gimple_call_arg (stmt, 0),
6628 : gimple_call_arg (stmt, 4),
6629 : gimple_call_arg (stmt, 5)))
6630 :
6631 : {
6632 0 : gsi_remove (&gsi, true);
6633 0 : release_defs (stmt);
6634 0 : continue;
6635 : }
6636 : break;
6637 :
6638 3631386 : case CFN_LAST:
6639 3631386 : cancel_fma_deferring (&fma_state);
6640 3631386 : break;
6641 :
6642 : default:
6643 : break;
6644 : }
6645 : }
6646 55374196 : else if (gimple_code (stmt) == GIMPLE_COND)
6647 : {
6648 4109584 : match_single_bit_test (&gsi, stmt);
6649 4109584 : optimize_spaceship (as_a <gcond *> (stmt));
6650 : }
6651 81030006 : gsi_next (&gsi);
6652 : }
6653 10157378 : if (fma_state.m_deferring_p
6654 7432461 : && fma_state.m_initial_phi)
6655 : {
6656 360 : gcc_checking_assert (fma_state.m_last_result);
6657 360 : if (!last_fma_candidate_feeds_initial_phi (&fma_state,
6658 : &m_last_result_set))
6659 264 : cancel_fma_deferring (&fma_state);
6660 : else
6661 96 : m_last_result_set.add (fma_state.m_last_result);
6662 : }
6663 10157378 : }
6664 :
6665 :
6666 : unsigned int
6667 966277 : pass_optimize_widening_mul::execute (function *fun)
6668 : {
6669 966277 : bool cfg_changed = false;
6670 :
6671 966277 : memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
6672 966277 : calculate_dominance_info (CDI_DOMINATORS);
6673 966277 : renumber_gimple_stmt_uids (cfun);
6674 :
6675 966277 : math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6676 :
6677 966277 : statistics_counter_event (fun, "widening multiplications inserted",
6678 : widen_mul_stats.widen_mults_inserted);
6679 966277 : statistics_counter_event (fun, "widening maccs inserted",
6680 : widen_mul_stats.maccs_inserted);
6681 966277 : statistics_counter_event (fun, "fused multiply-adds inserted",
6682 : widen_mul_stats.fmas_inserted);
6683 966277 : statistics_counter_event (fun, "divmod calls inserted",
6684 : widen_mul_stats.divmod_calls_inserted);
6685 966277 : statistics_counter_event (fun, "highpart multiplications inserted",
6686 : widen_mul_stats.highpart_mults_inserted);
6687 :
6688 966277 : return cfg_changed ? TODO_cleanup_cfg : 0;
6689 : }
6690 :
6691 : } // anon namespace
6692 :
6693 : gimple_opt_pass *
6694 288767 : make_pass_optimize_widening_mul (gcc::context *ctxt)
6695 : {
6696 288767 : return new pass_optimize_widening_mul (ctxt);
6697 : }
|