Line data Source code
1 : /* Global, SSA-based optimizations using mathematical identities.
2 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 : operations. These are common in sequences such as this one:
22 :
23 : modulus = sqrt(x*x + y*y + z*z);
24 : x = x / modulus;
25 : y = y / modulus;
26 : z = z / modulus;
27 :
28 : that can be optimized to
29 :
30 : modulus = sqrt(x*x + y*y + z*z);
31 : rmodulus = 1.0 / modulus;
32 : x = x * rmodulus;
33 : y = y * rmodulus;
34 : z = z * rmodulus;
35 :
36 : We do this for loop invariant divisors, and with this pass whenever
37 : we notice that a division has the same divisor multiple times.
38 :
39 : Of course, like in PRE, we don't insert a division if a dominator
40 : already has one. However, this cannot be done as an extension of
41 : PRE for several reasons.
42 :
43 : First of all, with some experiments it was found out that the
44 : transformation is not always useful if there are only two divisions
45 : by the same divisor. This is probably because modern processors
46 : can pipeline the divisions; on older, in-order processors it should
47 : still be effective to optimize two divisions by the same number.
48 : We make this a param, and it shall be called N in the remainder of
49 : this comment.
50 :
51 : Second, if trapping math is active, we have less freedom on where
52 : to insert divisions: we can only do so in basic blocks that already
53 : contain one. (If divisions don't trap, instead, we can insert
54 : divisions elsewhere, which will be in blocks that are common dominators
55 : of those that have the division).
56 :
57 : We really don't want to compute the reciprocal unless a division will
58 : be found. To do this, we won't insert the division in a basic block
59 : that has less than N divisions *post-dominating* it.
60 :
61 : The algorithm constructs a subset of the dominator tree, holding the
62 : blocks containing the divisions and the common dominators to them,
63 : and walk it twice. The first walk is in post-order, and it annotates
64 : each block with the number of divisions that post-dominate it: this
65 : gives information on where divisions can be inserted profitably.
66 : The second walk is in pre-order, and it inserts divisions as explained
67 : above, and replaces divisions by multiplications.
68 :
69 : In the best case, the cost of the pass is O(n_statements). In the
70 : worst-case, the cost is due to creating the dominator tree subset,
71 : with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 : for n_statements / n_basic_blocks statements. So, the amortized cost
73 : of creating the dominator tree subset is O(n_basic_blocks) and the
74 : worst-case cost of the pass is O(n_statements * n_basic_blocks).
75 :
76 : More practically, the cost will be small because there are few
77 : divisions, and they tend to be in the same basic block, so insert_bb
78 : is called very few times.
79 :
80 : If we did this using domwalk.cc, an efficient implementation would have
81 : to work on all the variables in a single pass, because we could not
82 : work on just a subset of the dominator tree, as we do now, and the
83 : cost would also be something like O(n_statements * n_basic_blocks).
84 : The data structures would be more complex in order to work on all the
85 : variables in a single pass. */
86 :
87 : #include "config.h"
88 : #include "system.h"
89 : #include "coretypes.h"
90 : #include "backend.h"
91 : #include "target.h"
92 : #include "rtl.h"
93 : #include "tree.h"
94 : #include "gimple.h"
95 : #include "predict.h"
96 : #include "alloc-pool.h"
97 : #include "tree-pass.h"
98 : #include "ssa.h"
99 : #include "optabs-tree.h"
100 : #include "gimple-pretty-print.h"
101 : #include "alias.h"
102 : #include "fold-const.h"
103 : #include "gimple-iterator.h"
104 : #include "gimple-fold.h"
105 : #include "stor-layout.h"
106 : #include "tree-cfg.h"
107 : #include "tree-dfa.h"
108 : #include "tree-ssa.h"
109 : #include "builtins.h"
110 : #include "internal-fn.h"
111 : #include "case-cfn-macros.h"
112 : #include "optabs-libfuncs.h"
113 : #include "tree-eh.h"
114 : #include "targhooks.h"
115 : #include "domwalk.h"
116 : #include "tree-ssa-math-opts.h"
117 : #include "dbgcnt.h"
118 : #include "langhooks.h"
119 : #include "cfghooks.h"
120 :
121 : /* This structure represents one basic block that either computes a
122 : division, or is a common dominator for basic block that compute a
123 : division. */
124 : struct occurrence {
125 : /* The basic block represented by this structure. */
126 : basic_block bb = basic_block();
127 :
128 : /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
129 : inserted in BB. */
130 : tree recip_def = tree();
131 :
132 : /* If non-NULL, the SSA_NAME holding the definition for a squared
133 : reciprocal inserted in BB. */
134 : tree square_recip_def = tree();
135 :
136 : /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
137 : was inserted in BB. */
138 : gimple *recip_def_stmt = nullptr;
139 :
140 : /* Pointer to a list of "struct occurrence"s for blocks dominated
141 : by BB. */
142 : struct occurrence *children = nullptr;
143 :
144 : /* Pointer to the next "struct occurrence"s in the list of blocks
145 : sharing a common dominator. */
146 : struct occurrence *next = nullptr;
147 :
148 : /* The number of divisions that are in BB before compute_merit. The
149 : number of divisions that are in BB or post-dominate it after
150 : compute_merit. */
151 : int num_divisions = 0;
152 :
153 : /* True if the basic block has a division, false if it is a common
154 : dominator for basic blocks that do. If it is false and trapping
155 : math is active, BB is not a candidate for inserting a reciprocal. */
156 : bool bb_has_division = false;
157 :
158 : /* Construct a struct occurrence for basic block BB, and whose
159 : children list is headed by CHILDREN. */
160 605 : occurrence (basic_block bb, struct occurrence *children)
161 605 : : bb (bb), children (children)
162 : {
163 605 : bb->aux = this;
164 : }
165 :
166 : /* Destroy a struct occurrence and remove it from its basic block. */
167 605 : ~occurrence ()
168 : {
169 605 : bb->aux = nullptr;
170 605 : }
171 :
172 : /* Allocate memory for a struct occurrence from OCC_POOL. */
173 : static void* operator new (size_t);
174 :
175 : /* Return memory for a struct occurrence to OCC_POOL. */
176 : static void operator delete (void*, size_t);
177 : };
178 :
179 : static struct
180 : {
181 : /* Number of 1.0/X ops inserted. */
182 : int rdivs_inserted;
183 :
184 : /* Number of 1.0/FUNC ops inserted. */
185 : int rfuncs_inserted;
186 : } reciprocal_stats;
187 :
188 : static struct
189 : {
190 : /* Number of cexpi calls inserted. */
191 : int inserted;
192 :
193 : /* Number of conversions removed. */
194 : int conv_removed;
195 :
196 : } sincos_stats;
197 :
198 : static struct
199 : {
200 : /* Number of widening multiplication ops inserted. */
201 : int widen_mults_inserted;
202 :
203 : /* Number of integer multiply-and-accumulate ops inserted. */
204 : int maccs_inserted;
205 :
206 : /* Number of fp fused multiply-add ops inserted. */
207 : int fmas_inserted;
208 :
209 : /* Number of divmod calls inserted. */
210 : int divmod_calls_inserted;
211 :
212 : /* Number of highpart multiplication ops inserted. */
213 : int highpart_mults_inserted;
214 : } widen_mul_stats;
215 :
216 : /* The instance of "struct occurrence" representing the highest
217 : interesting block in the dominator tree. */
218 : static struct occurrence *occ_head;
219 :
220 : /* Allocation pool for getting instances of "struct occurrence". */
221 : static object_allocator<occurrence> *occ_pool;
222 :
223 605 : void* occurrence::operator new (size_t n)
224 : {
225 605 : gcc_assert (n == sizeof(occurrence));
226 605 : return occ_pool->allocate_raw ();
227 : }
228 :
229 605 : void occurrence::operator delete (void *occ, size_t n)
230 : {
231 605 : gcc_assert (n == sizeof(occurrence));
232 605 : occ_pool->remove_raw (occ);
233 605 : }
234 :
235 : /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
236 : list of "struct occurrence"s, one per basic block, having IDOM as
237 : their common dominator.
238 :
239 : We try to insert NEW_OCC as deep as possible in the tree, and we also
240 : insert any other block that is a common dominator for BB and one
241 : block already in the tree. */
242 :
243 : static void
244 595 : insert_bb (struct occurrence *new_occ, basic_block idom,
245 : struct occurrence **p_head)
246 : {
247 600 : struct occurrence *occ, **p_occ;
248 :
249 623 : for (p_occ = p_head; (occ = *p_occ) != NULL; )
250 : {
251 28 : basic_block bb = new_occ->bb, occ_bb = occ->bb;
252 28 : basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
253 28 : if (dom == bb)
254 : {
255 : /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
256 : from its list. */
257 7 : *p_occ = occ->next;
258 7 : occ->next = new_occ->children;
259 7 : new_occ->children = occ;
260 :
261 : /* Try the next block (it may as well be dominated by BB). */
262 : }
263 :
264 21 : else if (dom == occ_bb)
265 : {
266 : /* OCC_BB dominates BB. Tail recurse to look deeper. */
267 5 : insert_bb (new_occ, dom, &occ->children);
268 5 : return;
269 : }
270 :
271 16 : else if (dom != idom)
272 : {
273 10 : gcc_assert (!dom->aux);
274 :
275 : /* There is a dominator between IDOM and BB, add it and make
276 : two children out of NEW_OCC and OCC. First, remove OCC from
277 : its list. */
278 10 : *p_occ = occ->next;
279 10 : new_occ->next = occ;
280 10 : occ->next = NULL;
281 :
282 : /* None of the previous blocks has DOM as a dominator: if we tail
283 : recursed, we would reexamine them uselessly. Just switch BB with
284 : DOM, and go on looking for blocks dominated by DOM. */
285 10 : new_occ = new occurrence (dom, new_occ);
286 : }
287 :
288 : else
289 : {
290 : /* Nothing special, go on with the next element. */
291 6 : p_occ = &occ->next;
292 : }
293 : }
294 :
295 : /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
296 595 : new_occ->next = *p_head;
297 595 : *p_head = new_occ;
298 : }
299 :
300 : /* Register that we found a division in BB.
301 : IMPORTANCE is a measure of how much weighting to give
302 : that division. Use IMPORTANCE = 2 to register a single
303 : division. If the division is going to be found multiple
304 : times use 1 (as it is with squares). */
305 :
306 : static inline void
307 696 : register_division_in (basic_block bb, int importance)
308 : {
309 696 : struct occurrence *occ;
310 :
311 696 : occ = (struct occurrence *) bb->aux;
312 696 : if (!occ)
313 : {
314 595 : occ = new occurrence (bb, NULL);
315 595 : insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
316 : }
317 :
318 696 : occ->bb_has_division = true;
319 696 : occ->num_divisions += importance;
320 696 : }
321 :
322 :
323 : /* Compute the number of divisions that postdominate each block in OCC and
324 : its children. */
325 :
326 : static void
327 28 : compute_merit (struct occurrence *occ)
328 : {
329 28 : struct occurrence *occ_child;
330 28 : basic_block dom = occ->bb;
331 :
332 51 : for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
333 : {
334 23 : basic_block bb;
335 23 : if (occ_child->children)
336 3 : compute_merit (occ_child);
337 :
338 23 : if (flag_exceptions)
339 6 : bb = single_noncomplex_succ (dom);
340 : else
341 : bb = dom;
342 :
343 23 : if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
344 12 : occ->num_divisions += occ_child->num_divisions;
345 : }
346 28 : }
347 :
348 :
349 : /* Return whether USE_STMT is a floating-point division by DEF. */
350 : static inline bool
351 348548 : is_division_by (gimple *use_stmt, tree def)
352 : {
353 348548 : return is_gimple_assign (use_stmt)
354 238384 : && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
355 1235 : && gimple_assign_rhs2 (use_stmt) == def
356 : /* Do not recognize x / x as valid division, as we are getting
357 : confused later by replacing all immediate uses x in such
358 : a stmt. */
359 869 : && gimple_assign_rhs1 (use_stmt) != def
360 349417 : && !stmt_can_throw_internal (cfun, use_stmt);
361 : }
362 :
363 : /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
364 : static inline bool
365 344797 : is_mult_by (gimple *use_stmt, tree def, tree a)
366 : {
367 344797 : if (gimple_code (use_stmt) == GIMPLE_ASSIGN
368 344797 : && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
369 : {
370 78954 : tree op0 = gimple_assign_rhs1 (use_stmt);
371 78954 : tree op1 = gimple_assign_rhs2 (use_stmt);
372 :
373 78954 : return (op0 == def && op1 == a)
374 78954 : || (op0 == a && op1 == def);
375 : }
376 : return 0;
377 : }
378 :
379 : /* Return whether USE_STMT is DEF * DEF. */
380 : static inline bool
381 344752 : is_square_of (gimple *use_stmt, tree def)
382 : {
383 5 : return is_mult_by (use_stmt, def, def);
384 : }
385 :
386 : /* Return whether USE_STMT is a floating-point division by
387 : DEF * DEF. */
388 : static inline bool
389 180 : is_division_by_square (gimple *use_stmt, tree def)
390 : {
391 180 : if (gimple_code (use_stmt) == GIMPLE_ASSIGN
392 173 : && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
393 7 : && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
394 187 : && !stmt_can_throw_internal (cfun, use_stmt))
395 : {
396 7 : tree denominator = gimple_assign_rhs2 (use_stmt);
397 7 : if (TREE_CODE (denominator) == SSA_NAME)
398 7 : return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
399 : }
400 : return 0;
401 : }
402 :
403 : /* Walk the subset of the dominator tree rooted at OCC, setting the
404 : RECIP_DEF field to a definition of 1.0 / DEF that can be used in
405 : the given basic block. The field may be left NULL, of course,
406 : if it is not possible or profitable to do the optimization.
407 :
408 : DEF_BSI is an iterator pointing at the statement defining DEF.
409 : If RECIP_DEF is set, a dominator already has a computation that can
410 : be used.
411 :
412 : If should_insert_square_recip is set, then this also inserts
413 : the square of the reciprocal immediately after the definition
414 : of the reciprocal. */
415 :
416 : static void
417 48 : insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
418 : tree def, tree recip_def, tree square_recip_def,
419 : int should_insert_square_recip, int threshold)
420 : {
421 48 : tree type;
422 48 : gassign *new_stmt, *new_square_stmt;
423 48 : gimple_stmt_iterator gsi;
424 48 : struct occurrence *occ_child;
425 :
426 48 : if (!recip_def
427 32 : && (occ->bb_has_division || !flag_trapping_math)
428 : /* Divide by two as all divisions are counted twice in
429 : the costing loop. */
430 31 : && occ->num_divisions / 2 >= threshold)
431 : {
432 : /* Make a variable with the replacement and substitute it. */
433 24 : type = TREE_TYPE (def);
434 24 : recip_def = create_tmp_reg (type, "reciptmp");
435 24 : new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
436 : build_one_cst (type), def);
437 :
438 24 : if (should_insert_square_recip)
439 : {
440 4 : square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
441 4 : new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
442 : recip_def, recip_def);
443 : }
444 :
445 24 : if (occ->bb_has_division)
446 : {
447 : /* Case 1: insert before an existing division. */
448 21 : gsi = gsi_after_labels (occ->bb);
449 200 : while (!gsi_end_p (gsi)
450 200 : && (!is_division_by (gsi_stmt (gsi), def))
451 380 : && (!is_division_by_square (gsi_stmt (gsi), def)))
452 179 : gsi_next (&gsi);
453 :
454 21 : gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
455 21 : if (should_insert_square_recip)
456 3 : gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
457 : }
458 3 : else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
459 : {
460 : /* Case 2: insert right after the definition. Note that this will
461 : never happen if the definition statement can throw, because in
462 : that case the sole successor of the statement's basic block will
463 : dominate all the uses as well. */
464 2 : gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
465 2 : if (should_insert_square_recip)
466 1 : gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
467 : }
468 : else
469 : {
470 : /* Case 3: insert in a basic block not containing defs/uses. */
471 1 : gsi = gsi_after_labels (occ->bb);
472 1 : gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
473 1 : if (should_insert_square_recip)
474 0 : gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
475 : }
476 :
477 24 : reciprocal_stats.rdivs_inserted++;
478 :
479 24 : occ->recip_def_stmt = new_stmt;
480 : }
481 :
482 48 : occ->recip_def = recip_def;
483 48 : occ->square_recip_def = square_recip_def;
484 71 : for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
485 23 : insert_reciprocals (def_gsi, occ_child, def, recip_def,
486 : square_recip_def, should_insert_square_recip,
487 : threshold);
488 48 : }
489 :
490 : /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
491 : Take as argument the use for (x * x). */
492 : static inline void
493 4 : replace_reciprocal_squares (use_operand_p use_p)
494 : {
495 4 : gimple *use_stmt = USE_STMT (use_p);
496 4 : basic_block bb = gimple_bb (use_stmt);
497 4 : struct occurrence *occ = (struct occurrence *) bb->aux;
498 :
499 8 : if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
500 8 : && occ->recip_def)
501 : {
502 4 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
503 4 : gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
504 4 : gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
505 4 : SET_USE (use_p, occ->square_recip_def);
506 4 : fold_stmt_inplace (&gsi);
507 4 : update_stmt (use_stmt);
508 : }
509 4 : }
510 :
511 :
512 : /* Replace the division at USE_P with a multiplication by the reciprocal, if
513 : possible. */
514 :
515 : static inline void
516 111 : replace_reciprocal (use_operand_p use_p)
517 : {
518 111 : gimple *use_stmt = USE_STMT (use_p);
519 111 : basic_block bb = gimple_bb (use_stmt);
520 111 : struct occurrence *occ = (struct occurrence *) bb->aux;
521 :
522 111 : if (optimize_bb_for_speed_p (bb)
523 111 : && occ->recip_def && use_stmt != occ->recip_def_stmt)
524 : {
525 80 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
526 80 : gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
527 80 : SET_USE (use_p, occ->recip_def);
528 80 : fold_stmt_inplace (&gsi);
529 80 : update_stmt (use_stmt);
530 : }
531 111 : }
532 :
533 :
534 : /* Free OCC and return one more "struct occurrence" to be freed. */
535 :
536 : static struct occurrence *
537 605 : free_bb (struct occurrence *occ)
538 : {
539 605 : struct occurrence *child, *next;
540 :
541 : /* First get the two pointers hanging off OCC. */
542 605 : next = occ->next;
543 605 : child = occ->children;
544 605 : delete occ;
545 :
546 : /* Now ensure that we don't recurse unless it is necessary. */
547 605 : if (!child)
548 : return next;
549 : else
550 : {
551 19 : while (next)
552 0 : next = free_bb (next);
553 :
554 : return child;
555 : }
556 : }
557 :
558 : /* Transform sequences like
559 : t = sqrt (a)
560 : x = 1.0 / t;
561 : r1 = x * x;
562 : r2 = a * x;
563 : into:
564 : t = sqrt (a)
565 : r1 = 1.0 / a;
566 : r2 = t;
567 : x = r1 * r2;
568 : depending on the uses of x, r1, r2. This removes one multiplication and
569 : allows the sqrt and division operations to execute in parallel.
570 : DEF_GSI is the gsi of the initial division by sqrt that defines
571 : DEF (x in the example above). */
572 :
573 : static void
574 558 : optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
575 : {
576 558 : gimple *use_stmt;
577 558 : imm_use_iterator use_iter;
578 558 : gimple *stmt = gsi_stmt (*def_gsi);
579 558 : tree x = def;
580 558 : tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
581 558 : tree div_rhs1 = gimple_assign_rhs1 (stmt);
582 :
583 558 : if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
584 553 : || TREE_CODE (div_rhs1) != REAL_CST
585 744 : || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
586 448 : return;
587 :
588 110 : gcall *sqrt_stmt
589 588 : = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
590 :
591 42 : if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
592 : return;
593 :
594 42 : switch (gimple_call_combined_fn (sqrt_stmt))
595 : {
596 31 : CASE_CFN_SQRT:
597 31 : CASE_CFN_SQRT_FN:
598 31 : break;
599 :
600 : default:
601 : return;
602 : }
603 31 : tree a = gimple_call_arg (sqrt_stmt, 0);
604 :
605 : /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
606 :
607 : /* Statements that use x in x * x. */
608 43 : auto_vec<gimple *> sqr_stmts;
609 : /* Statements that use x in a * x. */
610 12 : auto_vec<gimple *> mult_stmts;
611 31 : bool has_other_use = false;
612 31 : bool mult_on_main_path = false;
613 :
614 89 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
615 : {
616 58 : if (is_gimple_debug (use_stmt))
617 1 : continue;
618 57 : if (is_square_of (use_stmt, x))
619 : {
620 12 : sqr_stmts.safe_push (use_stmt);
621 12 : if (gimple_bb (use_stmt) == gimple_bb (stmt))
622 17 : mult_on_main_path = true;
623 : }
624 45 : else if (is_mult_by (use_stmt, x, a))
625 : {
626 14 : mult_stmts.safe_push (use_stmt);
627 14 : if (gimple_bb (use_stmt) == gimple_bb (stmt))
628 17 : mult_on_main_path = true;
629 : }
630 : else
631 : has_other_use = true;
632 31 : }
633 :
634 : /* In the x * x and a * x cases we just rewire stmt operands or
635 : remove multiplications. In the has_other_use case we introduce
636 : a multiplication so make sure we don't introduce a multiplication
637 : on a path where there was none. */
638 31 : if (has_other_use && !mult_on_main_path)
639 19 : return;
640 :
641 12 : if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
642 : return;
643 :
644 : /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
645 : to be able to compose it from the sqr and mult cases. */
646 41 : if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
647 : return;
648 :
649 12 : if (dump_file)
650 : {
651 10 : fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
652 10 : print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
653 10 : print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
654 10 : fprintf (dump_file, "\n");
655 : }
656 :
657 12 : bool delete_div = !has_other_use;
658 12 : tree sqr_ssa_name = NULL_TREE;
659 12 : if (!sqr_stmts.is_empty ())
660 : {
661 : /* r1 = x * x. Transform the original
662 : x = 1.0 / t
663 : into
664 : tmp1 = 1.0 / a
665 : r1 = tmp1. */
666 :
667 10 : sqr_ssa_name
668 10 : = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
669 :
670 10 : if (dump_file)
671 : {
672 10 : fprintf (dump_file, "Replacing original division\n");
673 10 : print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
674 10 : fprintf (dump_file, "with new division\n");
675 : }
676 10 : stmt
677 10 : = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
678 : gimple_assign_rhs1 (stmt), a);
679 10 : gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
680 10 : gsi_remove (def_gsi, true);
681 10 : *def_gsi = gsi_for_stmt (stmt);
682 10 : fold_stmt_inplace (def_gsi);
683 10 : update_stmt (stmt);
684 :
685 10 : if (dump_file)
686 10 : print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
687 :
688 20 : delete_div = false;
689 : gimple *sqr_stmt;
690 : unsigned int i;
691 20 : FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
692 : {
693 10 : gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
694 10 : gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
695 10 : update_stmt (sqr_stmt);
696 : }
697 : }
698 12 : if (!mult_stmts.is_empty ())
699 : {
700 : /* r2 = a * x. Transform this into:
701 : r2 = t (The original sqrt (a)). */
702 : unsigned int i;
703 24 : gimple *mult_stmt = NULL;
704 24 : FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
705 : {
706 12 : gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
707 :
708 12 : if (dump_file)
709 : {
710 10 : fprintf (dump_file, "Replacing squaring multiplication\n");
711 10 : print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
712 10 : fprintf (dump_file, "with assignment\n");
713 : }
714 12 : gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
715 12 : fold_stmt_inplace (&gsi2);
716 12 : update_stmt (mult_stmt);
717 12 : if (dump_file)
718 10 : print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
719 : }
720 : }
721 :
722 12 : if (has_other_use)
723 : {
724 : /* Using the two temporaries tmp1, tmp2 from above
725 : the original x is now:
726 : x = tmp1 * tmp2. */
727 10 : gcc_assert (orig_sqrt_ssa_name);
728 10 : gcc_assert (sqr_ssa_name);
729 :
730 10 : gimple *new_stmt
731 10 : = gimple_build_assign (x, MULT_EXPR,
732 : orig_sqrt_ssa_name, sqr_ssa_name);
733 10 : gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
734 10 : update_stmt (stmt);
735 : }
736 2 : else if (delete_div)
737 : {
738 : /* Remove the original division. */
739 2 : gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
740 2 : gsi_remove (&gsi2, true);
741 2 : release_defs (stmt);
742 : }
743 : else
744 0 : release_ssa_name (x);
745 : }
746 :
747 : /* Look for floating-point divisions among DEF's uses, and try to
748 : replace them by multiplications with the reciprocal. Add
749 : as many statements computing the reciprocal as needed.
750 :
751 : DEF must be a GIMPLE register of a floating-point type. */
752 :
753 : static void
754 209571 : execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
755 : {
756 209571 : use_operand_p use_p, square_use_p;
757 209571 : imm_use_iterator use_iter, square_use_iter;
758 209571 : tree square_def;
759 209571 : struct occurrence *occ;
760 209571 : int count = 0;
761 209571 : int threshold;
762 209571 : int square_recip_count = 0;
763 209571 : int sqrt_recip_count = 0;
764 :
765 209571 : gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
766 209571 : threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
767 :
768 : /* If DEF is a square (x * x), count the number of divisions by x.
769 : If there are more divisions by x than by (DEF * DEF), prefer to optimize
770 : the reciprocal of x instead of DEF. This improves cases like:
771 : def = x * x
772 : t0 = a / def
773 : t1 = b / def
774 : t2 = c / x
775 : Reciprocal optimization of x results in 1 division rather than 2 or 3. */
776 209571 : gimple *def_stmt = SSA_NAME_DEF_STMT (def);
777 :
778 209571 : if (is_gimple_assign (def_stmt)
779 163347 : && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
780 40000 : && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
781 249494 : && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
782 : {
783 665 : tree op0 = gimple_assign_rhs1 (def_stmt);
784 :
785 3381 : FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
786 : {
787 2051 : gimple *use_stmt = USE_STMT (use_p);
788 2051 : if (is_division_by (use_stmt, op0))
789 14 : sqrt_recip_count++;
790 665 : }
791 : }
792 :
793 763825 : FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
794 : {
795 344683 : gimple *use_stmt = USE_STMT (use_p);
796 344683 : if (is_division_by (use_stmt, def))
797 : {
798 630 : register_division_in (gimple_bb (use_stmt), 2);
799 630 : count++;
800 : }
801 :
802 344683 : if (is_square_of (use_stmt, def))
803 : {
804 1338 : square_def = gimple_assign_lhs (use_stmt);
805 4156 : FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
806 : {
807 1480 : gimple *square_use_stmt = USE_STMT (square_use_p);
808 1480 : if (is_division_by (square_use_stmt, square_def))
809 : {
810 : /* This is executed twice for each division by a square. */
811 66 : register_division_in (gimple_bb (square_use_stmt), 1);
812 66 : square_recip_count++;
813 : }
814 1338 : }
815 : }
816 209571 : }
817 :
818 : /* Square reciprocals were counted twice above. */
819 209571 : square_recip_count /= 2;
820 :
821 : /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
822 209571 : if (sqrt_recip_count > square_recip_count)
823 14 : goto out;
824 :
825 : /* Do the expensive part only if we can hope to optimize something. */
826 209557 : if (count + square_recip_count >= threshold && count >= 1)
827 : {
828 25 : gimple *use_stmt;
829 50 : for (occ = occ_head; occ; occ = occ->next)
830 : {
831 25 : compute_merit (occ);
832 25 : insert_reciprocals (def_gsi, occ, def, NULL, NULL,
833 : square_recip_count, threshold);
834 : }
835 :
836 176 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
837 : {
838 126 : if (is_division_by (use_stmt, def))
839 : {
840 333 : FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
841 111 : replace_reciprocal (use_p);
842 : }
843 20 : else if (square_recip_count > 0 && is_square_of (use_stmt, def))
844 : {
845 16 : FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
846 : {
847 : /* Find all uses of the square that are divisions and
848 : * replace them by multiplications with the inverse. */
849 8 : imm_use_iterator square_iterator;
850 8 : gimple *powmult_use_stmt = USE_STMT (use_p);
851 8 : tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
852 :
853 24 : FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
854 : square_iterator, powmult_def_name)
855 24 : FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
856 : {
857 8 : gimple *powmult_use_stmt = USE_STMT (square_use_p);
858 8 : if (is_division_by (powmult_use_stmt, powmult_def_name))
859 4 : replace_reciprocal_squares (square_use_p);
860 8 : }
861 : }
862 : }
863 25 : }
864 : }
865 :
866 209532 : out:
867 210176 : for (occ = occ_head; occ; )
868 605 : occ = free_bb (occ);
869 :
870 209571 : occ_head = NULL;
871 209571 : }
872 :
873 : /* Return an internal function that implements the reciprocal of CALL,
874 : or IFN_LAST if there is no such function that the target supports. */
875 :
876 : internal_fn
877 113 : internal_fn_reciprocal (gcall *call)
878 : {
879 113 : internal_fn ifn;
880 :
881 113 : switch (gimple_call_combined_fn (call))
882 : {
883 97 : CASE_CFN_SQRT:
884 97 : CASE_CFN_SQRT_FN:
885 97 : ifn = IFN_RSQRT;
886 97 : break;
887 :
888 : default:
889 : return IFN_LAST;
890 : }
891 :
892 97 : tree_pair types = direct_internal_fn_types (ifn, call);
893 97 : if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
894 : return IFN_LAST;
895 :
896 : return ifn;
897 : }
898 :
899 : /* Go through all the floating-point SSA_NAMEs, and call
900 : execute_cse_reciprocals_1 on each of them. */
901 : namespace {
902 :
903 : const pass_data pass_data_cse_reciprocals =
904 : {
905 : GIMPLE_PASS, /* type */
906 : "recip", /* name */
907 : OPTGROUP_NONE, /* optinfo_flags */
908 : TV_TREE_RECIP, /* tv_id */
909 : PROP_ssa, /* properties_required */
910 : 0, /* properties_provided */
911 : 0, /* properties_destroyed */
912 : 0, /* todo_flags_start */
913 : TODO_update_ssa, /* todo_flags_finish */
914 : };
915 :
916 : class pass_cse_reciprocals : public gimple_opt_pass
917 : {
918 : public:
919 298828 : pass_cse_reciprocals (gcc::context *ctxt)
920 597656 : : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
921 : {}
922 :
923 : /* opt_pass methods: */
924 1039819 : bool gate (function *) final override
925 : {
926 1039819 : return optimize && flag_reciprocal_math;
927 : }
928 : unsigned int execute (function *) final override;
929 :
930 : }; // class pass_cse_reciprocals
931 :
932 : unsigned int
933 8736 : pass_cse_reciprocals::execute (function *fun)
934 : {
935 8736 : basic_block bb;
936 8736 : tree arg;
937 :
938 8736 : occ_pool = new object_allocator<occurrence> ("dominators for recip");
939 :
940 8736 : memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
941 8736 : calculate_dominance_info (CDI_DOMINATORS);
942 8736 : calculate_dominance_info (CDI_POST_DOMINATORS);
943 :
944 8736 : if (flag_checking)
945 93786 : FOR_EACH_BB_FN (bb, fun)
946 85050 : gcc_assert (!bb->aux);
947 :
948 21633 : for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
949 20468 : if (FLOAT_TYPE_P (TREE_TYPE (arg))
950 13984 : && is_gimple_reg (arg))
951 : {
952 6412 : tree name = ssa_default_def (fun, arg);
953 6412 : if (name)
954 5417 : execute_cse_reciprocals_1 (NULL, name);
955 : }
956 :
957 93786 : FOR_EACH_BB_FN (bb, fun)
958 : {
959 85050 : tree def;
960 :
961 194358 : for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
962 109308 : gsi_next (&gsi))
963 : {
964 109308 : gphi *phi = gsi.phi ();
965 109308 : def = PHI_RESULT (phi);
966 109308 : if (! virtual_operand_p (def)
967 109308 : && FLOAT_TYPE_P (TREE_TYPE (def)))
968 30682 : execute_cse_reciprocals_1 (NULL, def);
969 : }
970 :
971 1384422 : for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
972 1299372 : gsi_next (&gsi))
973 : {
974 1299372 : gimple *stmt = gsi_stmt (gsi);
975 :
976 2598744 : if (gimple_has_lhs (stmt)
977 819097 : && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
978 779027 : && FLOAT_TYPE_P (TREE_TYPE (def))
979 196334 : && TREE_CODE (def) == SSA_NAME)
980 : {
981 173472 : execute_cse_reciprocals_1 (&gsi, def);
982 173472 : stmt = gsi_stmt (gsi);
983 173472 : if (flag_unsafe_math_optimizations
984 173445 : && is_gimple_assign (stmt)
985 163322 : && gimple_assign_lhs (stmt) == def
986 163320 : && !stmt_can_throw_internal (cfun, stmt)
987 336748 : && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
988 558 : optimize_recip_sqrt (&gsi, def);
989 : }
990 : }
991 :
992 85050 : if (optimize_bb_for_size_p (bb))
993 5326 : continue;
994 :
995 : /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
996 1357341 : for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
997 1277617 : gsi_next (&gsi))
998 : {
999 1277617 : gimple *stmt = gsi_stmt (gsi);
1000 :
1001 1277617 : if (is_gimple_assign (stmt)
1002 1277617 : && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
1003 : {
1004 591 : tree arg1 = gimple_assign_rhs2 (stmt);
1005 591 : gimple *stmt1;
1006 :
1007 591 : if (TREE_CODE (arg1) != SSA_NAME)
1008 5 : continue;
1009 :
1010 586 : stmt1 = SSA_NAME_DEF_STMT (arg1);
1011 :
1012 586 : if (is_gimple_call (stmt1)
1013 586 : && gimple_call_lhs (stmt1))
1014 : {
1015 113 : bool fail;
1016 113 : imm_use_iterator ui;
1017 113 : use_operand_p use_p;
1018 113 : tree fndecl = NULL_TREE;
1019 :
1020 113 : gcall *call = as_a <gcall *> (stmt1);
1021 113 : internal_fn ifn = internal_fn_reciprocal (call);
1022 113 : if (ifn == IFN_LAST)
1023 : {
1024 60 : fndecl = gimple_call_fndecl (call);
1025 120 : if (!fndecl
1026 60 : || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1027 62 : continue;
1028 0 : fndecl = targetm.builtin_reciprocal (fndecl);
1029 0 : if (!fndecl)
1030 0 : continue;
1031 : }
1032 :
1033 : /* Check that all uses of the SSA name are divisions,
1034 : otherwise replacing the defining statement will do
1035 : the wrong thing. */
1036 53 : fail = false;
1037 159 : FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1038 : {
1039 55 : gimple *stmt2 = USE_STMT (use_p);
1040 55 : if (is_gimple_debug (stmt2))
1041 0 : continue;
1042 55 : if (!is_gimple_assign (stmt2)
1043 55 : || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1044 53 : || gimple_assign_rhs1 (stmt2) == arg1
1045 108 : || gimple_assign_rhs2 (stmt2) != arg1)
1046 : {
1047 : fail = true;
1048 : break;
1049 : }
1050 53 : }
1051 53 : if (fail)
1052 2 : continue;
1053 :
1054 51 : gimple_replace_ssa_lhs (call, arg1);
1055 51 : reset_flow_sensitive_info (arg1);
1056 51 : if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1057 : {
1058 30 : auto_vec<tree, 4> args;
1059 30 : for (unsigned int i = 0;
1060 60 : i < gimple_call_num_args (call); i++)
1061 30 : args.safe_push (gimple_call_arg (call, i));
1062 30 : gcall *stmt2;
1063 30 : if (ifn == IFN_LAST)
1064 0 : stmt2 = gimple_build_call_vec (fndecl, args);
1065 : else
1066 30 : stmt2 = gimple_build_call_internal_vec (ifn, args);
1067 30 : gimple_call_set_lhs (stmt2, arg1);
1068 30 : gimple_move_vops (stmt2, call);
1069 30 : gimple_call_set_nothrow (stmt2,
1070 30 : gimple_call_nothrow_p (call));
1071 30 : gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1072 30 : gsi_replace (&gsi2, stmt2, true);
1073 30 : }
1074 : else
1075 : {
1076 21 : if (ifn == IFN_LAST)
1077 0 : gimple_call_set_fndecl (call, fndecl);
1078 : else
1079 21 : gimple_call_set_internal_fn (call, ifn);
1080 21 : update_stmt (call);
1081 : }
1082 51 : reciprocal_stats.rfuncs_inserted++;
1083 :
1084 153 : FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1085 : {
1086 51 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1087 51 : gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1088 51 : fold_stmt_inplace (&gsi);
1089 51 : update_stmt (stmt);
1090 51 : }
1091 : }
1092 : }
1093 : }
1094 : }
1095 :
1096 8736 : statistics_counter_event (fun, "reciprocal divs inserted",
1097 : reciprocal_stats.rdivs_inserted);
1098 8736 : statistics_counter_event (fun, "reciprocal functions inserted",
1099 : reciprocal_stats.rfuncs_inserted);
1100 :
1101 8736 : free_dominance_info (CDI_DOMINATORS);
1102 8736 : free_dominance_info (CDI_POST_DOMINATORS);
1103 17472 : delete occ_pool;
1104 8736 : return 0;
1105 : }
1106 :
1107 : } // anon namespace
1108 :
1109 : gimple_opt_pass *
1110 298828 : make_pass_cse_reciprocals (gcc::context *ctxt)
1111 : {
1112 298828 : return new pass_cse_reciprocals (ctxt);
1113 : }
1114 :
1115 : /* If NAME is the result of a type conversion, look for other
1116 : equivalent dominating or dominated conversions, and replace all
1117 : uses with the earliest dominating name, removing the redundant
1118 : conversions. Return the prevailing name. */
1119 :
1120 : static tree
1121 1039 : execute_cse_conv_1 (tree name, bool *cfg_changed)
1122 : {
1123 1039 : if (SSA_NAME_IS_DEFAULT_DEF (name)
1124 1039 : || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1125 : return name;
1126 :
1127 939 : gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1128 :
1129 939 : if (!gimple_assign_cast_p (def_stmt))
1130 : return name;
1131 :
1132 136 : tree src = gimple_assign_rhs1 (def_stmt);
1133 :
1134 136 : if (TREE_CODE (src) != SSA_NAME)
1135 : return name;
1136 :
1137 136 : imm_use_iterator use_iter;
1138 136 : gimple *use_stmt;
1139 :
1140 : /* Find the earliest dominating def. */
1141 657 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1142 : {
1143 763 : if (use_stmt == def_stmt
1144 385 : || !gimple_assign_cast_p (use_stmt))
1145 763 : continue;
1146 :
1147 7 : tree lhs = gimple_assign_lhs (use_stmt);
1148 :
1149 7 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1150 14 : || (gimple_assign_rhs1 (use_stmt)
1151 7 : != gimple_assign_rhs1 (def_stmt))
1152 14 : || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1153 0 : continue;
1154 :
1155 7 : bool use_dominates;
1156 7 : if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1157 : {
1158 0 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1159 0 : while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1160 0 : gsi_next (&gsi);
1161 0 : use_dominates = !gsi_end_p (gsi);
1162 : }
1163 7 : else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1164 7 : gimple_bb (def_stmt)))
1165 : use_dominates = false;
1166 7 : else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1167 7 : gimple_bb (use_stmt)))
1168 : use_dominates = true;
1169 : else
1170 4 : continue;
1171 :
1172 0 : if (use_dominates)
1173 : {
1174 : std::swap (name, lhs);
1175 : std::swap (def_stmt, use_stmt);
1176 : }
1177 136 : }
1178 :
1179 : /* Now go through all uses of SRC again, replacing the equivalent
1180 : dominated conversions. We may replace defs that were not
1181 : dominated by the then-prevailing defs when we first visited
1182 : them. */
1183 657 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1184 : {
1185 763 : if (use_stmt == def_stmt
1186 385 : || !gimple_assign_cast_p (use_stmt))
1187 378 : continue;
1188 :
1189 7 : tree lhs = gimple_assign_lhs (use_stmt);
1190 :
1191 7 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1192 14 : || (gimple_assign_rhs1 (use_stmt)
1193 7 : != gimple_assign_rhs1 (def_stmt))
1194 14 : || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1195 0 : continue;
1196 :
1197 7 : basic_block use_bb = gimple_bb (use_stmt);
1198 7 : if (gimple_bb (def_stmt) == use_bb
1199 7 : || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (def_stmt)))
1200 : {
1201 3 : sincos_stats.conv_removed++;
1202 :
1203 3 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1204 3 : replace_uses_by (lhs, name);
1205 3 : if (gsi_remove (&gsi, true)
1206 3 : && gimple_purge_dead_eh_edges (use_bb))
1207 3 : *cfg_changed = true;
1208 3 : release_defs (use_stmt);
1209 : }
1210 136 : }
1211 :
1212 136 : return name;
1213 : }
1214 :
1215 : /* Records an occurrence at statement USE_STMT in the vector of trees
1216 : STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1217 : is not yet initialized. Returns true if the occurrence was pushed on
1218 : the vector. Adjusts *TOP_BB to be the basic block dominating all
1219 : statements in the vector. */
1220 :
1221 : static bool
1222 1254 : maybe_record_sincos (vec<gimple *> *stmts,
1223 : basic_block *top_bb, gimple *use_stmt)
1224 : {
1225 1254 : basic_block use_bb = gimple_bb (use_stmt);
1226 1254 : if (*top_bb
1227 1254 : && (*top_bb == use_bb
1228 66 : || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1229 156 : stmts->safe_push (use_stmt);
1230 1098 : else if (!*top_bb
1231 1098 : || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1232 : {
1233 1078 : stmts->safe_push (use_stmt);
1234 1078 : *top_bb = use_bb;
1235 : }
1236 : else
1237 : return false;
1238 :
1239 : return true;
1240 : }
1241 :
1242 : /* Look for sin, cos and cexpi calls with the same argument NAME and
1243 : create a single call to cexpi CSEing the result in this case.
1244 : We first walk over all immediate uses of the argument collecting
1245 : statements that we can CSE in a vector and in a second pass replace
1246 : the statement rhs with a REALPART or IMAGPART expression on the
1247 : result of the cexpi call we insert before the use statement that
1248 : dominates all other candidates. */
1249 :
1250 : static bool
1251 1039 : execute_cse_sincos_1 (tree name)
1252 : {
1253 1039 : gimple_stmt_iterator gsi;
1254 1039 : imm_use_iterator use_iter;
1255 1039 : tree fndecl, res, type = NULL_TREE;
1256 1039 : gimple *def_stmt, *use_stmt, *stmt;
1257 1039 : int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1258 1039 : auto_vec<gimple *> stmts;
1259 1039 : basic_block top_bb = NULL;
1260 1039 : int i;
1261 1039 : bool cfg_changed = false;
1262 :
1263 1039 : name = execute_cse_conv_1 (name, &cfg_changed);
1264 :
1265 5035 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1266 : {
1267 2959 : if (gimple_code (use_stmt) != GIMPLE_CALL
1268 2959 : || !gimple_call_lhs (use_stmt))
1269 1673 : continue;
1270 :
1271 1286 : switch (gimple_call_combined_fn (use_stmt))
1272 : {
1273 456 : CASE_CFN_COS:
1274 456 : seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1275 456 : break;
1276 :
1277 792 : CASE_CFN_SIN:
1278 792 : seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1279 792 : break;
1280 :
1281 6 : CASE_CFN_CEXPI:
1282 6 : seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1283 6 : break;
1284 :
1285 32 : default:;
1286 32 : continue;
1287 : }
1288 :
1289 1254 : auto stmt_cfn = gimple_call_combined_fn (use_stmt);
1290 1254 : tree t = mathfn_built_in_type (stmt_cfn);
1291 1254 : if (!t)
1292 : {
1293 : /* It is possible to get IFN_{SIN,COS} calls, for which
1294 : mathfn_built_in_type will return NULL. Those are normally only
1295 : present for vector operations. We won't be able to CSE those
1296 : at the moment. */
1297 2 : gcc_checking_assert (internal_fn_p (stmt_cfn));
1298 : return false;
1299 : }
1300 :
1301 1252 : if (!type)
1302 : {
1303 1037 : type = t;
1304 1037 : t = TREE_TYPE (name);
1305 : }
1306 : /* This checks that NAME has the right type in the first round,
1307 : and, in subsequent rounds, that the built_in type is the same
1308 : type, or a compatible type. */
1309 1252 : if (type != t && !types_compatible_p (type, t))
1310 : return false;
1311 2 : }
1312 1037 : if (seen_cos + seen_sin + seen_cexpi <= 1)
1313 : return false;
1314 :
1315 : /* Simply insert cexpi at the beginning of top_bb but not earlier than
1316 : the name def statement. */
1317 195 : fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1318 195 : if (!fndecl)
1319 : return false;
1320 151 : stmt = gimple_build_call (fndecl, 1, name);
1321 151 : res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1322 151 : gimple_call_set_lhs (stmt, res);
1323 :
1324 151 : def_stmt = SSA_NAME_DEF_STMT (name);
1325 151 : if (!SSA_NAME_IS_DEFAULT_DEF (name)
1326 121 : && gimple_code (def_stmt) != GIMPLE_PHI
1327 264 : && gimple_bb (def_stmt) == top_bb)
1328 : {
1329 113 : gsi = gsi_for_stmt (def_stmt);
1330 113 : gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1331 : }
1332 : else
1333 : {
1334 38 : gsi = gsi_after_labels (top_bb);
1335 38 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1336 : }
1337 151 : sincos_stats.inserted++;
1338 :
1339 : /* And adjust the recorded old call sites. */
1340 453 : for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1341 : {
1342 302 : tree rhs = NULL;
1343 :
1344 302 : switch (gimple_call_combined_fn (use_stmt))
1345 : {
1346 151 : CASE_CFN_COS:
1347 151 : rhs = fold_build1 (REALPART_EXPR, type, res);
1348 151 : break;
1349 :
1350 151 : CASE_CFN_SIN:
1351 151 : rhs = fold_build1 (IMAGPART_EXPR, type, res);
1352 151 : break;
1353 :
1354 : CASE_CFN_CEXPI:
1355 : rhs = res;
1356 : break;
1357 :
1358 0 : default:;
1359 0 : gcc_unreachable ();
1360 : }
1361 :
1362 : /* Replace call with a copy. */
1363 302 : stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1364 :
1365 302 : gsi = gsi_for_stmt (use_stmt);
1366 302 : gsi_replace (&gsi, stmt, true);
1367 302 : if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1368 0 : cfg_changed = true;
1369 : }
1370 :
1371 151 : return cfg_changed;
1372 1039 : }
1373 :
1374 : /* To evaluate powi(x,n), the floating point value x raised to the
1375 : constant integer exponent n, we use a hybrid algorithm that
1376 : combines the "window method" with look-up tables. For an
1377 : introduction to exponentiation algorithms and "addition chains",
1378 : see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1379 : "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1380 : 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1381 : Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1382 :
1383 : /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1384 : multiplications to inline before calling the system library's pow
1385 : function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1386 : so this default never requires calling pow, powf or powl. */
1387 :
1388 : #ifndef POWI_MAX_MULTS
1389 : #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1390 : #endif
1391 :
1392 : /* The size of the "optimal power tree" lookup table. All
1393 : exponents less than this value are simply looked up in the
1394 : powi_table below. This threshold is also used to size the
1395 : cache of pseudo registers that hold intermediate results. */
1396 : #define POWI_TABLE_SIZE 256
1397 :
1398 : /* The size, in bits of the window, used in the "window method"
1399 : exponentiation algorithm. This is equivalent to a radix of
1400 : (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1401 : #define POWI_WINDOW_SIZE 3
1402 :
1403 : /* The following table is an efficient representation of an
1404 : "optimal power tree". For each value, i, the corresponding
1405 : value, j, in the table states than an optimal evaluation
1406 : sequence for calculating pow(x,i) can be found by evaluating
1407 : pow(x,j)*pow(x,i-j). An optimal power tree for the first
1408 : 100 integers is given in Knuth's "Seminumerical algorithms". */
1409 :
1410 : static const unsigned char powi_table[POWI_TABLE_SIZE] =
1411 : {
1412 : 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1413 : 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1414 : 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1415 : 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1416 : 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1417 : 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1418 : 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1419 : 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1420 : 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1421 : 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1422 : 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1423 : 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1424 : 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1425 : 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1426 : 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1427 : 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1428 : 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1429 : 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1430 : 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1431 : 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1432 : 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1433 : 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1434 : 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1435 : 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1436 : 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1437 : 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1438 : 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1439 : 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1440 : 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1441 : 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1442 : 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1443 : 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1444 : };
1445 :
1446 :
1447 : /* Return the number of multiplications required to calculate
1448 : powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1449 : subroutine of powi_cost. CACHE is an array indicating
1450 : which exponents have already been calculated. */
1451 :
1452 : static int
1453 1120 : powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1454 : {
1455 : /* If we've already calculated this exponent, then this evaluation
1456 : doesn't require any additional multiplications. */
1457 1861 : if (cache[n])
1458 1120 : return 0;
1459 :
1460 741 : cache[n] = true;
1461 741 : return powi_lookup_cost (n - powi_table[n], cache)
1462 741 : + powi_lookup_cost (powi_table[n], cache) + 1;
1463 : }
1464 :
1465 : /* Return the number of multiplications required to calculate
1466 : powi(x,n) for an arbitrary x, given the exponent N. This
1467 : function needs to be kept in sync with powi_as_mults below. */
1468 :
1469 : static int
1470 384 : powi_cost (HOST_WIDE_INT n)
1471 : {
1472 384 : bool cache[POWI_TABLE_SIZE];
1473 384 : unsigned HOST_WIDE_INT digit;
1474 384 : unsigned HOST_WIDE_INT val;
1475 384 : int result;
1476 :
1477 384 : if (n == 0)
1478 : return 0;
1479 :
1480 : /* Ignore the reciprocal when calculating the cost. */
1481 379 : val = absu_hwi (n);
1482 :
1483 : /* Initialize the exponent cache. */
1484 379 : memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1485 379 : cache[1] = true;
1486 :
1487 379 : result = 0;
1488 :
1489 379 : while (val >= POWI_TABLE_SIZE)
1490 : {
1491 0 : if (val & 1)
1492 : {
1493 0 : digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1494 0 : result += powi_lookup_cost (digit, cache)
1495 0 : + POWI_WINDOW_SIZE + 1;
1496 0 : val >>= POWI_WINDOW_SIZE;
1497 : }
1498 : else
1499 : {
1500 0 : val >>= 1;
1501 0 : result++;
1502 : }
1503 : }
1504 :
1505 379 : return result + powi_lookup_cost (val, cache);
1506 : }
1507 :
1508 : /* Recursive subroutine of powi_as_mults. This function takes the
1509 : array, CACHE, of already calculated exponents and an exponent N and
1510 : returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1511 :
1512 : static tree
1513 6113 : powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1514 : unsigned HOST_WIDE_INT n, tree *cache)
1515 : {
1516 6113 : tree op0, op1, ssa_target;
1517 6113 : unsigned HOST_WIDE_INT digit;
1518 6113 : gassign *mult_stmt;
1519 :
1520 6113 : if (n < POWI_TABLE_SIZE && cache[n])
1521 : return cache[n];
1522 :
1523 2166 : ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1524 :
1525 2166 : if (n < POWI_TABLE_SIZE)
1526 : {
1527 2163 : cache[n] = ssa_target;
1528 2163 : op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1529 2163 : op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1530 : }
1531 3 : else if (n & 1)
1532 : {
1533 1 : digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1534 1 : op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1535 1 : op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1536 : }
1537 : else
1538 : {
1539 2 : op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1540 2 : op1 = op0;
1541 : }
1542 :
1543 2166 : mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1544 2166 : gimple_set_location (mult_stmt, loc);
1545 2166 : gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1546 :
1547 2166 : return ssa_target;
1548 : }
1549 :
1550 : /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1551 : This function needs to be kept in sync with powi_cost above. */
1552 :
1553 : tree
1554 1783 : powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1555 : tree arg0, HOST_WIDE_INT n)
1556 : {
1557 1783 : tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1558 1783 : gassign *div_stmt;
1559 1783 : tree target;
1560 :
1561 1783 : if (n == 0)
1562 0 : return build_one_cst (type);
1563 :
1564 1783 : memset (cache, 0, sizeof (cache));
1565 1783 : cache[1] = arg0;
1566 :
1567 1783 : result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache);
1568 1783 : if (n >= 0)
1569 : return result;
1570 :
1571 : /* If the original exponent was negative, reciprocate the result. */
1572 8 : target = make_temp_ssa_name (type, NULL, "powmult");
1573 8 : div_stmt = gimple_build_assign (target, RDIV_EXPR,
1574 : build_real (type, dconst1), result);
1575 8 : gimple_set_location (div_stmt, loc);
1576 8 : gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1577 :
1578 8 : return target;
1579 : }
1580 :
1581 : /* ARG0 and N are the two arguments to a powi builtin in GSI with
1582 : location info LOC. If the arguments are appropriate, create an
1583 : equivalent sequence of statements prior to GSI using an optimal
1584 : number of multiplications, and return an expression holding the
1585 : result. */
1586 :
1587 : static tree
1588 630 : gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1589 : tree arg0, HOST_WIDE_INT n)
1590 : {
1591 630 : if ((n >= -1 && n <= 2)
1592 630 : || (optimize_function_for_speed_p (cfun)
1593 351 : && powi_cost (n) <= POWI_MAX_MULTS))
1594 622 : return powi_as_mults (gsi, loc, arg0, n);
1595 :
1596 : return NULL_TREE;
1597 : }
1598 :
1599 : /* Build a gimple call statement that calls FN with argument ARG.
1600 : Set the lhs of the call statement to a fresh SSA name. Insert the
1601 : statement prior to GSI's current position, and return the fresh
1602 : SSA name. */
1603 :
1604 : static tree
1605 44 : build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1606 : tree fn, tree arg)
1607 : {
1608 44 : gcall *call_stmt;
1609 44 : tree ssa_target;
1610 :
1611 44 : call_stmt = gimple_build_call (fn, 1, arg);
1612 44 : ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1613 44 : gimple_set_lhs (call_stmt, ssa_target);
1614 44 : gimple_set_location (call_stmt, loc);
1615 44 : gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1616 :
1617 44 : return ssa_target;
1618 : }
1619 :
1620 : /* Build a gimple binary operation with the given CODE and arguments
1621 : ARG0, ARG1, assigning the result to a new SSA name for variable
1622 : TARGET. Insert the statement prior to GSI's current position, and
1623 : return the fresh SSA name.*/
1624 :
1625 : static tree
1626 912 : build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1627 : const char *name, enum tree_code code,
1628 : tree arg0, tree arg1)
1629 : {
1630 912 : tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1631 912 : gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1632 912 : gimple_set_location (stmt, loc);
1633 912 : gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1634 912 : return result;
1635 : }
1636 :
1637 : /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1638 : prior to GSI's current position, and return the fresh SSA name. */
1639 :
1640 : static tree
1641 16861 : build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1642 : tree type, tree val)
1643 : {
1644 0 : return gimple_convert (gsi, true, GSI_SAME_STMT, loc, type, val);
1645 : }
1646 :
1647 : struct pow_synth_sqrt_info
1648 : {
1649 : bool *factors;
1650 : unsigned int deepest;
1651 : unsigned int num_mults;
1652 : };
1653 :
1654 : /* Return true iff the real value C can be represented as a
1655 : sum of powers of 0.5 up to N. That is:
1656 : C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1657 : Record in INFO the various parameters of the synthesis algorithm such
1658 : as the factors a[i], the maximum 0.5 power and the number of
1659 : multiplications that will be required. */
1660 :
1661 : bool
1662 33 : representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1663 : struct pow_synth_sqrt_info *info)
1664 : {
1665 33 : REAL_VALUE_TYPE factor = dconsthalf;
1666 33 : REAL_VALUE_TYPE remainder = c;
1667 :
1668 33 : info->deepest = 0;
1669 33 : info->num_mults = 0;
1670 33 : memset (info->factors, 0, n * sizeof (bool));
1671 :
1672 97 : for (unsigned i = 0; i < n; i++)
1673 : {
1674 90 : REAL_VALUE_TYPE res;
1675 :
1676 : /* If something inexact happened bail out now. */
1677 90 : if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1678 26 : return false;
1679 :
1680 : /* We have hit zero. The number is representable as a sum
1681 : of powers of 0.5. */
1682 90 : if (real_equal (&res, &dconst0))
1683 : {
1684 26 : info->factors[i] = true;
1685 26 : info->deepest = i + 1;
1686 26 : return true;
1687 : }
1688 64 : else if (!REAL_VALUE_NEGATIVE (res))
1689 : {
1690 29 : remainder = res;
1691 29 : info->factors[i] = true;
1692 29 : info->num_mults++;
1693 : }
1694 : else
1695 35 : info->factors[i] = false;
1696 :
1697 64 : real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1698 : }
1699 : return false;
1700 : }
1701 :
1702 : /* Return the tree corresponding to FN being applied
1703 : to ARG N times at GSI and LOC.
1704 : Look up previous results from CACHE if need be.
1705 : cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1706 :
1707 : static tree
1708 63 : get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1709 : tree fn, location_t loc, tree *cache)
1710 : {
1711 63 : tree res = cache[n];
1712 63 : if (!res)
1713 : {
1714 40 : tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1715 40 : res = build_and_insert_call (gsi, loc, fn, prev);
1716 40 : cache[n] = res;
1717 : }
1718 :
1719 63 : return res;
1720 : }
1721 :
1722 : /* Print to STREAM the repeated application of function FNAME to ARG
1723 : N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1724 : "foo (foo (x))". */
1725 :
1726 : static void
1727 36 : print_nested_fn (FILE* stream, const char *fname, const char* arg,
1728 : unsigned int n)
1729 : {
1730 36 : if (n == 0)
1731 10 : fprintf (stream, "%s", arg);
1732 : else
1733 : {
1734 26 : fprintf (stream, "%s (", fname);
1735 26 : print_nested_fn (stream, fname, arg, n - 1);
1736 26 : fprintf (stream, ")");
1737 : }
1738 36 : }
1739 :
1740 : /* Print to STREAM the fractional sequence of sqrt chains
1741 : applied to ARG, described by INFO. Used for the dump file. */
1742 :
1743 : static void
1744 7 : dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1745 : struct pow_synth_sqrt_info *info)
1746 : {
1747 29 : for (unsigned int i = 0; i < info->deepest; i++)
1748 : {
1749 22 : bool is_set = info->factors[i];
1750 22 : if (is_set)
1751 : {
1752 10 : print_nested_fn (stream, "sqrt", arg, i + 1);
1753 10 : if (i != info->deepest - 1)
1754 3 : fprintf (stream, " * ");
1755 : }
1756 : }
1757 7 : }
1758 :
1759 : /* Print to STREAM a representation of raising ARG to an integer
1760 : power N. Used for the dump file. */
1761 :
1762 : static void
1763 7 : dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1764 : {
1765 7 : if (n > 1)
1766 3 : fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1767 4 : else if (n == 1)
1768 3 : fprintf (stream, "%s", arg);
1769 7 : }
1770 :
1771 : /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1772 : square roots. Place at GSI and LOC. Limit the maximum depth
1773 : of the sqrt chains to MAX_DEPTH. Return the tree holding the
1774 : result of the expanded sequence or NULL_TREE if the expansion failed.
1775 :
1776 : This routine assumes that ARG1 is a real number with a fractional part
1777 : (the integer exponent case will have been handled earlier in
1778 : gimple_expand_builtin_pow).
1779 :
1780 : For ARG1 > 0.0:
1781 : * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1782 : FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1783 : FRAC_PART == ARG1 - WHOLE_PART:
1784 : Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1785 : POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1786 : if it can be expressed as such, that is if FRAC_PART satisfies:
1787 : FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1788 : where integer a[i] is either 0 or 1.
1789 :
1790 : Example:
1791 : POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1792 : --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1793 :
1794 : For ARG1 < 0.0 there are two approaches:
1795 : * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1796 : is calculated as above.
1797 :
1798 : Example:
1799 : POW (x, -5.625) == 1.0 / POW (x, 5.625)
1800 : --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1801 :
1802 : * (B) : WHOLE_PART := - ceil (abs (ARG1))
1803 : FRAC_PART := ARG1 - WHOLE_PART
1804 : and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1805 : Example:
1806 : POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1807 : --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1808 :
1809 : For ARG1 < 0.0 we choose between (A) and (B) depending on
1810 : how many multiplications we'd have to do.
1811 : So, for the example in (B): POW (x, -5.875), if we were to
1812 : follow algorithm (A) we would produce:
1813 : 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1814 : which contains more multiplications than approach (B).
1815 :
1816 : Hopefully, this approach will eliminate potentially expensive POW library
1817 : calls when unsafe floating point math is enabled and allow the compiler to
1818 : further optimise the multiplies, square roots and divides produced by this
1819 : function. */
1820 :
1821 : static tree
1822 25 : expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1823 : tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1824 : {
1825 25 : tree type = TREE_TYPE (arg0);
1826 25 : machine_mode mode = TYPE_MODE (type);
1827 25 : tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1828 25 : bool one_over = true;
1829 :
1830 25 : if (!sqrtfn)
1831 : return NULL_TREE;
1832 :
1833 25 : if (TREE_CODE (arg1) != REAL_CST)
1834 : return NULL_TREE;
1835 :
1836 25 : REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1837 :
1838 25 : gcc_assert (max_depth > 0);
1839 25 : tree *cache = XALLOCAVEC (tree, max_depth + 1);
1840 :
1841 25 : struct pow_synth_sqrt_info synth_info;
1842 25 : synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1843 25 : synth_info.deepest = 0;
1844 25 : synth_info.num_mults = 0;
1845 :
1846 25 : bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1847 25 : REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1848 :
1849 : /* The whole and fractional parts of exp. */
1850 25 : REAL_VALUE_TYPE whole_part;
1851 25 : REAL_VALUE_TYPE frac_part;
1852 :
1853 25 : real_floor (&whole_part, mode, &exp);
1854 25 : real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1855 :
1856 :
1857 25 : REAL_VALUE_TYPE ceil_whole = dconst0;
1858 25 : REAL_VALUE_TYPE ceil_fract = dconst0;
1859 :
1860 25 : if (neg_exp)
1861 : {
1862 10 : real_ceil (&ceil_whole, mode, &exp);
1863 10 : real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1864 : }
1865 :
1866 25 : if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1867 : return NULL_TREE;
1868 :
1869 : /* Check whether it's more profitable to not use 1.0 / ... */
1870 18 : if (neg_exp)
1871 : {
1872 8 : struct pow_synth_sqrt_info alt_synth_info;
1873 8 : alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1874 8 : alt_synth_info.deepest = 0;
1875 8 : alt_synth_info.num_mults = 0;
1876 :
1877 8 : if (representable_as_half_series_p (ceil_fract, max_depth,
1878 : &alt_synth_info)
1879 8 : && alt_synth_info.deepest <= synth_info.deepest
1880 16 : && alt_synth_info.num_mults < synth_info.num_mults)
1881 : {
1882 2 : whole_part = ceil_whole;
1883 2 : frac_part = ceil_fract;
1884 2 : synth_info.deepest = alt_synth_info.deepest;
1885 2 : synth_info.num_mults = alt_synth_info.num_mults;
1886 2 : memcpy (synth_info.factors, alt_synth_info.factors,
1887 : (max_depth + 1) * sizeof (bool));
1888 2 : one_over = false;
1889 : }
1890 : }
1891 :
1892 18 : HOST_WIDE_INT n = real_to_integer (&whole_part);
1893 18 : REAL_VALUE_TYPE cint;
1894 18 : real_from_integer (&cint, VOIDmode, n, SIGNED);
1895 :
1896 18 : if (!real_identical (&whole_part, &cint))
1897 : return NULL_TREE;
1898 :
1899 18 : if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1900 : return NULL_TREE;
1901 :
1902 18 : memset (cache, 0, (max_depth + 1) * sizeof (tree));
1903 :
1904 18 : tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1905 :
1906 : /* Calculate the integer part of the exponent. */
1907 18 : if (n > 1)
1908 : {
1909 6 : integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1910 6 : if (!integer_res)
1911 : return NULL_TREE;
1912 : }
1913 :
1914 18 : if (dump_file)
1915 : {
1916 7 : char string[64];
1917 :
1918 7 : real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1919 7 : fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1920 :
1921 7 : if (neg_exp)
1922 : {
1923 2 : if (one_over)
1924 : {
1925 1 : fprintf (dump_file, "1.0 / (");
1926 1 : dump_integer_part (dump_file, "x", n);
1927 1 : if (n > 0)
1928 1 : fprintf (dump_file, " * ");
1929 1 : dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1930 1 : fprintf (dump_file, ")");
1931 : }
1932 : else
1933 : {
1934 1 : dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1935 1 : fprintf (dump_file, " / (");
1936 1 : dump_integer_part (dump_file, "x", n);
1937 1 : fprintf (dump_file, ")");
1938 : }
1939 : }
1940 : else
1941 : {
1942 5 : dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1943 5 : if (n > 0)
1944 4 : fprintf (dump_file, " * ");
1945 5 : dump_integer_part (dump_file, "x", n);
1946 : }
1947 :
1948 7 : fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1949 : }
1950 :
1951 :
1952 18 : tree fract_res = NULL_TREE;
1953 18 : cache[0] = arg0;
1954 :
1955 : /* Calculate the fractional part of the exponent. */
1956 58 : for (unsigned i = 0; i < synth_info.deepest; i++)
1957 : {
1958 40 : if (synth_info.factors[i])
1959 : {
1960 23 : tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1961 :
1962 23 : if (!fract_res)
1963 : fract_res = sqrt_chain;
1964 :
1965 : else
1966 5 : fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1967 : fract_res, sqrt_chain);
1968 : }
1969 : }
1970 :
1971 18 : tree res = NULL_TREE;
1972 :
1973 18 : if (neg_exp)
1974 : {
1975 8 : if (one_over)
1976 : {
1977 6 : if (n > 0)
1978 4 : res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1979 : fract_res, integer_res);
1980 : else
1981 : res = fract_res;
1982 :
1983 6 : res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1984 : build_real (type, dconst1), res);
1985 : }
1986 : else
1987 : {
1988 2 : res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1989 : fract_res, integer_res);
1990 : }
1991 : }
1992 : else
1993 10 : res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1994 : fract_res, integer_res);
1995 : return res;
1996 : }
1997 :
1998 : /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1999 : with location info LOC. If possible, create an equivalent and
2000 : less expensive sequence of statements prior to GSI, and return an
2001 : expression holding the result. */
2002 :
2003 : static tree
2004 601 : gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2005 : tree arg0, tree arg1)
2006 : {
2007 601 : REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2008 601 : REAL_VALUE_TYPE c2, dconst3;
2009 601 : HOST_WIDE_INT n;
2010 601 : tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2011 601 : machine_mode mode;
2012 601 : bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2013 601 : bool hw_sqrt_exists, c_is_int, c2_is_int;
2014 :
2015 601 : dconst1_4 = dconst1;
2016 601 : SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2017 :
2018 : /* If the exponent isn't a constant, there's nothing of interest
2019 : to be done. */
2020 601 : if (TREE_CODE (arg1) != REAL_CST)
2021 : return NULL_TREE;
2022 :
2023 : /* Don't perform the operation if flag_signaling_nans is on
2024 : and the operand is a signaling NaN. */
2025 363 : if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2026 363 : && ((TREE_CODE (arg0) == REAL_CST
2027 0 : && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2028 1 : || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2029 0 : return NULL_TREE;
2030 :
2031 363 : if (flag_errno_math)
2032 : return NULL_TREE;
2033 :
2034 : /* If the exponent is equivalent to an integer, expand to an optimal
2035 : multiplication sequence when profitable. */
2036 75 : c = TREE_REAL_CST (arg1);
2037 75 : n = real_to_integer (&c);
2038 75 : real_from_integer (&cint, VOIDmode, n, SIGNED);
2039 75 : c_is_int = real_identical (&c, &cint);
2040 :
2041 75 : if (c_is_int
2042 75 : && ((n >= -1 && n <= 2)
2043 21 : || (flag_unsafe_math_optimizations
2044 11 : && speed_p
2045 11 : && powi_cost (n) <= POWI_MAX_MULTS)))
2046 30 : return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2047 :
2048 : /* Attempt various optimizations using sqrt and cbrt. */
2049 45 : type = TREE_TYPE (arg0);
2050 45 : mode = TYPE_MODE (type);
2051 45 : sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2052 :
2053 : /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2054 : unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2055 : sqrt(-0) = -0. */
2056 45 : if (sqrtfn
2057 45 : && real_equal (&c, &dconsthalf)
2058 52 : && !HONOR_SIGNED_ZEROS (mode))
2059 0 : return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2060 :
2061 45 : hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2062 :
2063 : /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2064 : optimizations since 1./3. is not exactly representable. If x
2065 : is negative and finite, the correct value of pow(x,1./3.) is
2066 : a NaN with the "invalid" exception raised, because the value
2067 : of 1./3. actually has an even denominator. The correct value
2068 : of cbrt(x) is a negative real value. */
2069 45 : cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2070 45 : dconst1_3 = real_value_truncate (mode, dconst_third ());
2071 :
2072 45 : if (flag_unsafe_math_optimizations
2073 25 : && cbrtfn
2074 25 : && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2075 70 : && real_equal (&c, &dconst1_3))
2076 0 : return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2077 :
2078 : /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2079 : if we don't have a hardware sqrt insn. */
2080 45 : dconst1_6 = dconst1_3;
2081 45 : SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2082 :
2083 45 : if (flag_unsafe_math_optimizations
2084 25 : && sqrtfn
2085 25 : && cbrtfn
2086 25 : && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2087 : && speed_p
2088 25 : && hw_sqrt_exists
2089 70 : && real_equal (&c, &dconst1_6))
2090 : {
2091 : /* sqrt(x) */
2092 0 : sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2093 :
2094 : /* cbrt(sqrt(x)) */
2095 0 : return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2096 : }
2097 :
2098 :
2099 : /* Attempt to expand the POW as a product of square root chains.
2100 : Expand the 0.25 case even when optimising for size. */
2101 45 : if (flag_unsafe_math_optimizations
2102 25 : && sqrtfn
2103 25 : && hw_sqrt_exists
2104 25 : && (speed_p || real_equal (&c, &dconst1_4))
2105 70 : && !HONOR_SIGNED_ZEROS (mode))
2106 : {
2107 50 : unsigned int max_depth = speed_p
2108 25 : ? param_max_pow_sqrt_depth
2109 : : 2;
2110 :
2111 25 : tree expand_with_sqrts
2112 25 : = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2113 :
2114 25 : if (expand_with_sqrts)
2115 : return expand_with_sqrts;
2116 : }
2117 :
2118 27 : real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2119 27 : n = real_to_integer (&c2);
2120 27 : real_from_integer (&cint, VOIDmode, n, SIGNED);
2121 27 : c2_is_int = real_identical (&c2, &cint);
2122 :
2123 : /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2124 :
2125 : powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2126 : 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2127 :
2128 : Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2129 : different from pow(x, 1./3.) due to rounding and behavior with
2130 : negative x, we need to constrain this transformation to unsafe
2131 : math and positive x or finite math. */
2132 27 : real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2133 27 : real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2134 27 : real_round (&c2, mode, &c2);
2135 27 : n = real_to_integer (&c2);
2136 27 : real_from_integer (&cint, VOIDmode, n, SIGNED);
2137 27 : real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2138 27 : real_convert (&c2, mode, &c2);
2139 :
2140 27 : if (flag_unsafe_math_optimizations
2141 7 : && cbrtfn
2142 7 : && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2143 7 : && real_identical (&c2, &c)
2144 4 : && !c2_is_int
2145 4 : && optimize_function_for_speed_p (cfun)
2146 31 : && powi_cost (n / 3) <= POWI_MAX_MULTS)
2147 : {
2148 4 : tree powi_x_ndiv3 = NULL_TREE;
2149 :
2150 : /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2151 : possible or profitable, give up. Skip the degenerate case when
2152 : abs(n) < 3, where the result is always 1. */
2153 4 : if (absu_hwi (n) >= 3)
2154 : {
2155 4 : powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2156 : abs_hwi (n / 3));
2157 4 : if (!powi_x_ndiv3)
2158 : return NULL_TREE;
2159 : }
2160 :
2161 : /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2162 : as that creates an unnecessary variable. Instead, just produce
2163 : either cbrt(x) or cbrt(x) * cbrt(x). */
2164 4 : cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2165 :
2166 4 : if (absu_hwi (n) % 3 == 1)
2167 : powi_cbrt_x = cbrt_x;
2168 : else
2169 2 : powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2170 : cbrt_x, cbrt_x);
2171 :
2172 : /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2173 4 : if (absu_hwi (n) < 3)
2174 : result = powi_cbrt_x;
2175 : else
2176 4 : result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2177 : powi_x_ndiv3, powi_cbrt_x);
2178 :
2179 : /* If n is negative, reciprocate the result. */
2180 4 : if (n < 0)
2181 1 : result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2182 : build_real (type, dconst1), result);
2183 :
2184 4 : return result;
2185 : }
2186 :
2187 : /* No optimizations succeeded. */
2188 : return NULL_TREE;
2189 : }
2190 :
2191 : /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2192 : on the SSA_NAME argument of each of them. */
2193 :
2194 : namespace {
2195 :
2196 : const pass_data pass_data_cse_sincos =
2197 : {
2198 : GIMPLE_PASS, /* type */
2199 : "sincos", /* name */
2200 : OPTGROUP_NONE, /* optinfo_flags */
2201 : TV_TREE_SINCOS, /* tv_id */
2202 : PROP_ssa, /* properties_required */
2203 : 0, /* properties_provided */
2204 : 0, /* properties_destroyed */
2205 : 0, /* todo_flags_start */
2206 : TODO_update_ssa, /* todo_flags_finish */
2207 : };
2208 :
2209 : class pass_cse_sincos : public gimple_opt_pass
2210 : {
2211 : public:
2212 298828 : pass_cse_sincos (gcc::context *ctxt)
2213 597656 : : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2214 : {}
2215 :
2216 : /* opt_pass methods: */
2217 1039819 : bool gate (function *) final override
2218 : {
2219 1039819 : return optimize;
2220 : }
2221 :
2222 : unsigned int execute (function *) final override;
2223 :
2224 : }; // class pass_cse_sincos
2225 :
2226 : unsigned int
2227 1039784 : pass_cse_sincos::execute (function *fun)
2228 : {
2229 1039784 : basic_block bb;
2230 1039784 : bool cfg_changed = false;
2231 :
2232 1039784 : calculate_dominance_info (CDI_DOMINATORS);
2233 1039784 : memset (&sincos_stats, 0, sizeof (sincos_stats));
2234 :
2235 10854473 : FOR_EACH_BB_FN (bb, fun)
2236 : {
2237 9814689 : gimple_stmt_iterator gsi;
2238 :
2239 92481183 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2240 : {
2241 82666494 : gimple *stmt = gsi_stmt (gsi);
2242 :
2243 82666494 : if (is_gimple_call (stmt)
2244 82666494 : && gimple_call_lhs (stmt))
2245 : {
2246 2012120 : tree arg;
2247 2012120 : switch (gimple_call_combined_fn (stmt))
2248 : {
2249 1039 : CASE_CFN_COS:
2250 1039 : CASE_CFN_SIN:
2251 1039 : CASE_CFN_CEXPI:
2252 1039 : arg = gimple_call_arg (stmt, 0);
2253 : /* Make sure we have either sincos or cexp. */
2254 1039 : if (!targetm.libc_has_function (function_c99_math_complex,
2255 1039 : TREE_TYPE (arg))
2256 1039 : && !targetm.libc_has_function (function_sincos,
2257 0 : TREE_TYPE (arg)))
2258 : break;
2259 :
2260 1039 : if (TREE_CODE (arg) == SSA_NAME)
2261 1039 : cfg_changed |= execute_cse_sincos_1 (arg);
2262 : break;
2263 : default:
2264 : break;
2265 : }
2266 : }
2267 : }
2268 : }
2269 :
2270 1039784 : statistics_counter_event (fun, "sincos statements inserted",
2271 : sincos_stats.inserted);
2272 1039784 : statistics_counter_event (fun, "conv statements removed",
2273 : sincos_stats.conv_removed);
2274 :
2275 1039784 : return cfg_changed ? TODO_cleanup_cfg : 0;
2276 : }
2277 :
2278 : } // anon namespace
2279 :
2280 : gimple_opt_pass *
2281 298828 : make_pass_cse_sincos (gcc::context *ctxt)
2282 : {
2283 298828 : return new pass_cse_sincos (ctxt);
2284 : }
2285 :
2286 : /* Expand powi(x,n) into an optimal number of multiplies, when n is a
2287 : constant. */
2288 : namespace {
2289 :
2290 : const pass_data pass_data_expand_pow =
2291 : {
2292 : GIMPLE_PASS, /* type */
2293 : "pow", /* name */
2294 : OPTGROUP_NONE, /* optinfo_flags */
2295 : TV_TREE_POW, /* tv_id */
2296 : PROP_ssa, /* properties_required */
2297 : PROP_gimple_opt_math, /* properties_provided */
2298 : 0, /* properties_destroyed */
2299 : 0, /* todo_flags_start */
2300 : TODO_update_ssa, /* todo_flags_finish */
2301 : };
2302 :
2303 : class pass_expand_pow : public gimple_opt_pass
2304 : {
2305 : public:
2306 298828 : pass_expand_pow (gcc::context *ctxt)
2307 597656 : : gimple_opt_pass (pass_data_expand_pow, ctxt)
2308 : {}
2309 :
2310 : /* opt_pass methods: */
2311 1039819 : bool gate (function *) final override
2312 : {
2313 1039819 : return optimize;
2314 : }
2315 :
2316 : unsigned int execute (function *) final override;
2317 :
2318 : }; // class pass_expand_pow
2319 :
2320 : unsigned int
2321 1039814 : pass_expand_pow::execute (function *fun)
2322 : {
2323 1039814 : basic_block bb;
2324 1039814 : bool cfg_changed = false;
2325 :
2326 1039814 : calculate_dominance_info (CDI_DOMINATORS);
2327 :
2328 10311040 : FOR_EACH_BB_FN (bb, fun)
2329 : {
2330 9271226 : gimple_stmt_iterator gsi;
2331 9271226 : bool cleanup_eh = false;
2332 :
2333 89751083 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2334 : {
2335 80479857 : gimple *stmt = gsi_stmt (gsi);
2336 :
2337 : /* Only the last stmt in a bb could throw, no need to call
2338 : gimple_purge_dead_eh_edges if we change something in the middle
2339 : of a basic block. */
2340 80479857 : cleanup_eh = false;
2341 :
2342 80479857 : if (is_gimple_call (stmt)
2343 80479857 : && gimple_call_lhs (stmt))
2344 : {
2345 1985679 : tree arg0, arg1, result;
2346 1985679 : HOST_WIDE_INT n;
2347 1985679 : location_t loc;
2348 :
2349 1985679 : switch (gimple_call_combined_fn (stmt))
2350 : {
2351 601 : CASE_CFN_POW:
2352 601 : arg0 = gimple_call_arg (stmt, 0);
2353 601 : arg1 = gimple_call_arg (stmt, 1);
2354 :
2355 601 : loc = gimple_location (stmt);
2356 601 : result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2357 :
2358 601 : if (result)
2359 : {
2360 52 : tree lhs = gimple_get_lhs (stmt);
2361 52 : gassign *new_stmt = gimple_build_assign (lhs, result);
2362 52 : gimple_set_location (new_stmt, loc);
2363 52 : unlink_stmt_vdef (stmt);
2364 52 : gsi_replace (&gsi, new_stmt, true);
2365 52 : cleanup_eh = true;
2366 104 : if (gimple_vdef (stmt))
2367 0 : release_ssa_name (gimple_vdef (stmt));
2368 : }
2369 : break;
2370 :
2371 812 : CASE_CFN_POWI:
2372 812 : arg0 = gimple_call_arg (stmt, 0);
2373 812 : arg1 = gimple_call_arg (stmt, 1);
2374 812 : loc = gimple_location (stmt);
2375 :
2376 812 : if (real_minus_onep (arg0))
2377 : {
2378 11 : tree t0, t1, cond, one, minus_one;
2379 11 : gassign *stmt;
2380 :
2381 11 : t0 = TREE_TYPE (arg0);
2382 11 : t1 = TREE_TYPE (arg1);
2383 11 : one = build_real (t0, dconst1);
2384 11 : minus_one = build_real (t0, dconstm1);
2385 :
2386 11 : cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2387 11 : stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2388 : arg1, build_int_cst (t1, 1));
2389 11 : gimple_set_location (stmt, loc);
2390 11 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2391 :
2392 11 : result = make_temp_ssa_name (t0, NULL, "powi");
2393 11 : stmt = gimple_build_assign (result, COND_EXPR, cond,
2394 : minus_one, one);
2395 11 : gimple_set_location (stmt, loc);
2396 11 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2397 : }
2398 : else
2399 : {
2400 801 : if (!tree_fits_shwi_p (arg1))
2401 : break;
2402 :
2403 590 : n = tree_to_shwi (arg1);
2404 590 : result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2405 : }
2406 :
2407 601 : if (result)
2408 : {
2409 593 : tree lhs = gimple_get_lhs (stmt);
2410 593 : gassign *new_stmt = gimple_build_assign (lhs, result);
2411 593 : gimple_set_location (new_stmt, loc);
2412 593 : unlink_stmt_vdef (stmt);
2413 593 : gsi_replace (&gsi, new_stmt, true);
2414 593 : cleanup_eh = true;
2415 80481043 : if (gimple_vdef (stmt))
2416 0 : release_ssa_name (gimple_vdef (stmt));
2417 : }
2418 : break;
2419 :
2420 211 : default:;
2421 : }
2422 : }
2423 : }
2424 9271226 : if (cleanup_eh)
2425 3 : cfg_changed |= gimple_purge_dead_eh_edges (bb);
2426 : }
2427 :
2428 1039814 : return cfg_changed ? TODO_cleanup_cfg : 0;
2429 : }
2430 :
2431 : } // anon namespace
2432 :
2433 : gimple_opt_pass *
2434 298828 : make_pass_expand_pow (gcc::context *ctxt)
2435 : {
2436 298828 : return new pass_expand_pow (ctxt);
2437 : }
2438 :
2439 : /* Return true if stmt is a type conversion operation that can be stripped
2440 : when used in a widening multiply operation. */
2441 : static bool
2442 469799 : widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2443 : {
2444 469799 : enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2445 :
2446 469799 : if (TREE_CODE (result_type) == INTEGER_TYPE)
2447 : {
2448 469799 : tree op_type;
2449 469799 : tree inner_op_type;
2450 :
2451 469799 : if (!CONVERT_EXPR_CODE_P (rhs_code))
2452 : return false;
2453 :
2454 187903 : op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2455 :
2456 : /* If the type of OP has the same precision as the result, then
2457 : we can strip this conversion. The multiply operation will be
2458 : selected to create the correct extension as a by-product. */
2459 187903 : if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2460 : return true;
2461 :
2462 : /* We can also strip a conversion if it preserves the signed-ness of
2463 : the operation and doesn't narrow the range. */
2464 1166 : inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2465 :
2466 : /* If the inner-most type is unsigned, then we can strip any
2467 : intermediate widening operation. If it's signed, then the
2468 : intermediate widening operation must also be signed. */
2469 1166 : if ((TYPE_UNSIGNED (inner_op_type)
2470 1163 : || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2471 2329 : && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2472 : return true;
2473 :
2474 1164 : return false;
2475 : }
2476 :
2477 0 : return rhs_code == FIXED_CONVERT_EXPR;
2478 : }
2479 :
2480 : /* Return true if RHS is a suitable operand for a widening multiplication,
2481 : assuming a target type of TYPE.
2482 : There are two cases:
2483 :
2484 : - RHS makes some value at least twice as wide. Store that value
2485 : in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2486 :
2487 : - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2488 : but leave *TYPE_OUT untouched. */
2489 :
2490 : static bool
2491 924972 : is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2492 : tree *new_rhs_out)
2493 : {
2494 924972 : gimple *stmt;
2495 924972 : tree type1, rhs1;
2496 :
2497 924972 : if (TREE_CODE (rhs) == SSA_NAME)
2498 : {
2499 : /* Use tree_non_zero_bits to see if this operand is zero_extended
2500 : for unsigned widening multiplications or non-negative for
2501 : signed widening multiplications. */
2502 766205 : if (TREE_CODE (type) == INTEGER_TYPE
2503 766205 : && (TYPE_PRECISION (type) & 1) == 0
2504 1532410 : && int_mode_for_size (TYPE_PRECISION (type) / 2, 1).exists ())
2505 : {
2506 760256 : unsigned int prec = TYPE_PRECISION (type);
2507 760256 : unsigned int hprec = prec / 2;
2508 760256 : wide_int bits = wide_int::from (tree_nonzero_bits (rhs), prec,
2509 1520512 : TYPE_SIGN (TREE_TYPE (rhs)));
2510 760256 : if (TYPE_UNSIGNED (type)
2511 1318053 : && wi::bit_and (bits, wi::mask (hprec, true, prec)) == 0)
2512 : {
2513 149914 : *type_out = build_nonstandard_integer_type (hprec, true);
2514 : /* X & MODE_MASK can be simplified to (T)X. */
2515 149914 : stmt = SSA_NAME_DEF_STMT (rhs);
2516 299828 : if (is_gimple_assign (stmt)
2517 131332 : && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
2518 16913 : && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST
2519 183140 : && wide_int::from (wi::to_wide (gimple_assign_rhs2 (stmt)),
2520 16613 : prec, TYPE_SIGN (TREE_TYPE (rhs)))
2521 199753 : == wi::mask (hprec, false, prec))
2522 14819 : *new_rhs_out = gimple_assign_rhs1 (stmt);
2523 : else
2524 135095 : *new_rhs_out = rhs;
2525 149914 : return true;
2526 : }
2527 610342 : else if (!TYPE_UNSIGNED (type)
2528 812801 : && wi::bit_and (bits, wi::mask (hprec - 1, true, prec)) == 0)
2529 : {
2530 24983 : *type_out = build_nonstandard_integer_type (hprec, false);
2531 24983 : *new_rhs_out = rhs;
2532 24983 : return true;
2533 : }
2534 760256 : }
2535 :
2536 591308 : stmt = SSA_NAME_DEF_STMT (rhs);
2537 591308 : if (is_gimple_assign (stmt))
2538 : {
2539 :
2540 469799 : if (widening_mult_conversion_strippable_p (type, stmt))
2541 : {
2542 186739 : rhs1 = gimple_assign_rhs1 (stmt);
2543 :
2544 186739 : if (TREE_CODE (rhs1) == INTEGER_CST)
2545 : {
2546 0 : *new_rhs_out = rhs1;
2547 0 : *type_out = NULL;
2548 0 : return true;
2549 : }
2550 : }
2551 : else
2552 : rhs1 = rhs;
2553 : }
2554 : else
2555 : rhs1 = rhs;
2556 :
2557 591308 : type1 = TREE_TYPE (rhs1);
2558 :
2559 591308 : if (TREE_CODE (type1) != TREE_CODE (type)
2560 591308 : || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2561 : return false;
2562 :
2563 59361 : *new_rhs_out = rhs1;
2564 59361 : *type_out = type1;
2565 59361 : return true;
2566 : }
2567 :
2568 158767 : if (TREE_CODE (rhs) == INTEGER_CST)
2569 : {
2570 158767 : *new_rhs_out = rhs;
2571 158767 : *type_out = NULL;
2572 158767 : return true;
2573 : }
2574 :
2575 : return false;
2576 : }
2577 :
2578 : /* Return true if STMT performs a widening multiplication, assuming the
2579 : output type is TYPE. If so, store the unwidened types of the operands
2580 : in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2581 : *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2582 : and *TYPE2_OUT would give the operands of the multiplication. */
2583 :
2584 : static bool
2585 725550 : is_widening_mult_p (gimple *stmt,
2586 : tree *type1_out, tree *rhs1_out,
2587 : tree *type2_out, tree *rhs2_out)
2588 : {
2589 725550 : tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2590 :
2591 725550 : if (TREE_CODE (type) == INTEGER_TYPE)
2592 : {
2593 725550 : if (TYPE_OVERFLOW_TRAPS (type))
2594 : return false;
2595 : }
2596 0 : else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2597 : return false;
2598 :
2599 725522 : if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2600 : rhs1_out))
2601 : return false;
2602 :
2603 199450 : if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2604 : rhs2_out))
2605 : return false;
2606 :
2607 193575 : if (*type1_out == NULL)
2608 : {
2609 0 : if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2610 : return false;
2611 0 : *type1_out = *type2_out;
2612 : }
2613 :
2614 193575 : if (*type2_out == NULL)
2615 : {
2616 158767 : if (!int_fits_type_p (*rhs2_out, *type1_out))
2617 : return false;
2618 154366 : *type2_out = *type1_out;
2619 : }
2620 :
2621 : /* Ensure that the larger of the two operands comes first. */
2622 189174 : if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2623 : {
2624 87 : std::swap (*type1_out, *type2_out);
2625 87 : std::swap (*rhs1_out, *rhs2_out);
2626 : }
2627 :
2628 : return true;
2629 : }
2630 :
2631 : /* Check to see if the CALL statement is an invocation of copysign
2632 : with 1. being the first argument. */
2633 : static bool
2634 164763 : is_copysign_call_with_1 (gimple *call)
2635 : {
2636 169863 : gcall *c = dyn_cast <gcall *> (call);
2637 5153 : if (! c)
2638 : return false;
2639 :
2640 5153 : enum combined_fn code = gimple_call_combined_fn (c);
2641 :
2642 5153 : if (code == CFN_LAST)
2643 : return false;
2644 :
2645 4205 : if (builtin_fn_p (code))
2646 : {
2647 1185 : switch (as_builtin_fn (code))
2648 : {
2649 30 : CASE_FLT_FN (BUILT_IN_COPYSIGN):
2650 30 : CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2651 30 : return real_onep (gimple_call_arg (c, 0));
2652 : default:
2653 : return false;
2654 : }
2655 : }
2656 :
2657 3020 : if (internal_fn_p (code))
2658 : {
2659 3020 : switch (as_internal_fn (code))
2660 : {
2661 23 : case IFN_COPYSIGN:
2662 23 : return real_onep (gimple_call_arg (c, 0));
2663 : default:
2664 : return false;
2665 : }
2666 : }
2667 :
2668 : return false;
2669 : }
2670 :
2671 : /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2672 : This only happens when the xorsign optab is defined, if the
2673 : pattern is not a xorsign pattern or if expansion fails FALSE is
2674 : returned, otherwise TRUE is returned. */
2675 : static bool
2676 714265 : convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2677 : {
2678 714265 : tree treeop0, treeop1, lhs, type;
2679 714265 : location_t loc = gimple_location (stmt);
2680 714265 : lhs = gimple_assign_lhs (stmt);
2681 714265 : treeop0 = gimple_assign_rhs1 (stmt);
2682 714265 : treeop1 = gimple_assign_rhs2 (stmt);
2683 714265 : type = TREE_TYPE (lhs);
2684 714265 : machine_mode mode = TYPE_MODE (type);
2685 :
2686 714265 : if (HONOR_SNANS (type))
2687 : return false;
2688 :
2689 713742 : if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2690 : {
2691 215081 : gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2692 215081 : if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2693 : {
2694 215055 : call0 = SSA_NAME_DEF_STMT (treeop1);
2695 215055 : if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2696 215038 : return false;
2697 :
2698 : treeop1 = treeop0;
2699 : }
2700 43 : if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2701 : return false;
2702 :
2703 43 : gcall *c = as_a<gcall*> (call0);
2704 43 : treeop0 = gimple_call_arg (c, 1);
2705 :
2706 43 : gcall *call_stmt
2707 43 : = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2708 43 : gimple_set_lhs (call_stmt, lhs);
2709 43 : gimple_set_location (call_stmt, loc);
2710 43 : gsi_replace (gsi, call_stmt, true);
2711 43 : return true;
2712 : }
2713 :
2714 : return false;
2715 : }
2716 :
2717 : /* Process a single gimple statement STMT, which has a MULT_EXPR as
2718 : its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2719 : value is true iff we converted the statement. */
2720 :
2721 : static bool
2722 724340 : convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2723 : {
2724 724340 : tree lhs, rhs1, rhs2, type, type1, type2;
2725 724340 : enum insn_code handler;
2726 724340 : scalar_int_mode to_mode, from_mode, actual_mode;
2727 724340 : optab op;
2728 724340 : int actual_precision;
2729 724340 : location_t loc = gimple_location (stmt);
2730 724340 : bool from_unsigned1, from_unsigned2;
2731 :
2732 724340 : lhs = gimple_assign_lhs (stmt);
2733 724340 : type = TREE_TYPE (lhs);
2734 724340 : if (TREE_CODE (type) != INTEGER_TYPE)
2735 : return false;
2736 :
2737 588702 : if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2738 : return false;
2739 :
2740 : /* if any one of rhs1 and rhs2 is subject to abnormal coalescing,
2741 : avoid the transform. */
2742 152388 : if ((TREE_CODE (rhs1) == SSA_NAME
2743 152388 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
2744 304775 : || (TREE_CODE (rhs2) == SSA_NAME
2745 24261 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2)))
2746 : return false;
2747 :
2748 152387 : to_mode = SCALAR_INT_TYPE_MODE (type);
2749 152387 : from_mode = SCALAR_INT_TYPE_MODE (type1);
2750 152387 : if (to_mode == from_mode)
2751 : return false;
2752 :
2753 152383 : from_unsigned1 = TYPE_UNSIGNED (type1);
2754 152383 : from_unsigned2 = TYPE_UNSIGNED (type2);
2755 :
2756 152383 : if (from_unsigned1 && from_unsigned2)
2757 : op = umul_widen_optab;
2758 56094 : else if (!from_unsigned1 && !from_unsigned2)
2759 : op = smul_widen_optab;
2760 : else
2761 1911 : op = usmul_widen_optab;
2762 :
2763 152383 : handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2764 : &actual_mode);
2765 :
2766 152383 : if (handler == CODE_FOR_nothing)
2767 : {
2768 142308 : if (op != smul_widen_optab)
2769 : {
2770 : /* We can use a signed multiply with unsigned types as long as
2771 : there is a wider mode to use, or it is the smaller of the two
2772 : types that is unsigned. Note that type1 >= type2, always. */
2773 89636 : if ((TYPE_UNSIGNED (type1)
2774 87938 : && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2775 89636 : || (TYPE_UNSIGNED (type2)
2776 1698 : && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2777 : {
2778 89636 : if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2779 179272 : || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2780 89636 : return false;
2781 : }
2782 :
2783 0 : op = smul_widen_optab;
2784 0 : handler = find_widening_optab_handler_and_mode (op, to_mode,
2785 : from_mode,
2786 : &actual_mode);
2787 :
2788 0 : if (handler == CODE_FOR_nothing)
2789 : return false;
2790 :
2791 : from_unsigned1 = from_unsigned2 = false;
2792 : }
2793 : else
2794 : {
2795 : /* Expand can synthesize smul_widen_optab if the target
2796 : supports umul_widen_optab. */
2797 52672 : op = umul_widen_optab;
2798 52672 : handler = find_widening_optab_handler_and_mode (op, to_mode,
2799 : from_mode,
2800 : &actual_mode);
2801 52672 : if (handler == CODE_FOR_nothing)
2802 : return false;
2803 : }
2804 : }
2805 :
2806 : /* Ensure that the inputs to the handler are in the correct precision
2807 : for the opcode. This will be the full mode size. */
2808 10075 : actual_precision = GET_MODE_PRECISION (actual_mode);
2809 10075 : if (2 * actual_precision > TYPE_PRECISION (type))
2810 : return false;
2811 10075 : if (actual_precision != TYPE_PRECISION (type1)
2812 10075 : || from_unsigned1 != TYPE_UNSIGNED (type1))
2813 : {
2814 8 : if (!useless_type_conversion_p (type1, TREE_TYPE (rhs1)))
2815 : {
2816 0 : if (TREE_CODE (rhs1) == INTEGER_CST)
2817 0 : rhs1 = fold_convert (type1, rhs1);
2818 : else
2819 0 : rhs1 = build_and_insert_cast (gsi, loc, type1, rhs1);
2820 : }
2821 8 : type1 = build_nonstandard_integer_type (actual_precision,
2822 : from_unsigned1);
2823 : }
2824 10075 : if (!useless_type_conversion_p (type1, TREE_TYPE (rhs1)))
2825 : {
2826 9334 : if (TREE_CODE (rhs1) == INTEGER_CST)
2827 0 : rhs1 = fold_convert (type1, rhs1);
2828 : else
2829 9334 : rhs1 = build_and_insert_cast (gsi, loc, type1, rhs1);
2830 : }
2831 10075 : if (actual_precision != TYPE_PRECISION (type2)
2832 10075 : || from_unsigned2 != TYPE_UNSIGNED (type2))
2833 : {
2834 8 : if (!useless_type_conversion_p (type2, TREE_TYPE (rhs2)))
2835 : {
2836 8 : if (TREE_CODE (rhs2) == INTEGER_CST)
2837 8 : rhs2 = fold_convert (type2, rhs2);
2838 : else
2839 0 : rhs2 = build_and_insert_cast (gsi, loc, type2, rhs2);
2840 : }
2841 8 : type2 = build_nonstandard_integer_type (actual_precision,
2842 : from_unsigned2);
2843 : }
2844 10075 : if (!useless_type_conversion_p (type2, TREE_TYPE (rhs2)))
2845 : {
2846 9529 : if (TREE_CODE (rhs2) == INTEGER_CST)
2847 2018 : rhs2 = fold_convert (type2, rhs2);
2848 : else
2849 7511 : rhs2 = build_and_insert_cast (gsi, loc, type2, rhs2);
2850 : }
2851 :
2852 10075 : gimple_assign_set_rhs1 (stmt, rhs1);
2853 10075 : gimple_assign_set_rhs2 (stmt, rhs2);
2854 10075 : gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2855 10075 : update_stmt (stmt);
2856 10075 : widen_mul_stats.widen_mults_inserted++;
2857 10075 : return true;
2858 : }
2859 :
2860 : /* Process a single gimple statement STMT, which is found at the
2861 : iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2862 : rhs (given by CODE), and try to convert it into a
2863 : WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2864 : is true iff we converted the statement. */
2865 :
2866 : static bool
2867 2546833 : convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2868 : enum tree_code code)
2869 : {
2870 2546833 : gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2871 2546833 : gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2872 2546833 : tree type, type1, type2, optype;
2873 2546833 : tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2874 2546833 : enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2875 2546833 : optab this_optab;
2876 2546833 : enum tree_code wmult_code;
2877 2546833 : enum insn_code handler;
2878 2546833 : scalar_mode to_mode, from_mode, actual_mode;
2879 2546833 : location_t loc = gimple_location (stmt);
2880 2546833 : int actual_precision;
2881 2546833 : bool from_unsigned1, from_unsigned2;
2882 :
2883 2546833 : lhs = gimple_assign_lhs (stmt);
2884 2546833 : type = TREE_TYPE (lhs);
2885 2546833 : if ((TREE_CODE (type) != INTEGER_TYPE
2886 399392 : && TREE_CODE (type) != FIXED_POINT_TYPE)
2887 2546833 : || !type_has_mode_precision_p (type))
2888 400466 : return false;
2889 :
2890 2146367 : if (code == MINUS_EXPR)
2891 : wmult_code = WIDEN_MULT_MINUS_EXPR;
2892 : else
2893 1908483 : wmult_code = WIDEN_MULT_PLUS_EXPR;
2894 :
2895 2146367 : rhs1 = gimple_assign_rhs1 (stmt);
2896 2146367 : rhs2 = gimple_assign_rhs2 (stmt);
2897 :
2898 2146367 : if (TREE_CODE (rhs1) == SSA_NAME)
2899 : {
2900 2111083 : rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2901 2111083 : if (is_gimple_assign (rhs1_stmt))
2902 1243162 : rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2903 : }
2904 :
2905 2146367 : if (TREE_CODE (rhs2) == SSA_NAME)
2906 : {
2907 790853 : rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2908 790853 : if (is_gimple_assign (rhs2_stmt))
2909 609127 : rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2910 : }
2911 :
2912 : /* Allow for one conversion statement between the multiply
2913 : and addition/subtraction statement. If there are more than
2914 : one conversions then we assume they would invalidate this
2915 : transformation. If that's not the case then they should have
2916 : been folded before now. */
2917 2146367 : if (CONVERT_EXPR_CODE_P (rhs1_code))
2918 : {
2919 419727 : conv1_stmt = rhs1_stmt;
2920 419727 : rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2921 419727 : if (TREE_CODE (rhs1) == SSA_NAME)
2922 : {
2923 353968 : rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2924 353968 : if (is_gimple_assign (rhs1_stmt))
2925 205286 : rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2926 : }
2927 : else
2928 : return false;
2929 : }
2930 2080608 : if (CONVERT_EXPR_CODE_P (rhs2_code))
2931 : {
2932 198963 : conv2_stmt = rhs2_stmt;
2933 198963 : rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2934 198963 : if (TREE_CODE (rhs2) == SSA_NAME)
2935 : {
2936 187748 : rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2937 187748 : if (is_gimple_assign (rhs2_stmt))
2938 123141 : rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2939 : }
2940 : else
2941 : return false;
2942 : }
2943 :
2944 : /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2945 : is_widening_mult_p, but we still need the rhs returns.
2946 :
2947 : It might also appear that it would be sufficient to use the existing
2948 : operands of the widening multiply, but that would limit the choice of
2949 : multiply-and-accumulate instructions.
2950 :
2951 : If the widened-multiplication result has more than one uses, it is
2952 : probably wiser not to do the conversion. Also restrict this operation
2953 : to single basic block to avoid moving the multiply to a different block
2954 : with a higher execution frequency. */
2955 2069393 : if (code == PLUS_EXPR
2956 1836361 : && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2957 : {
2958 136751 : if (!has_single_use (rhs1)
2959 78147 : || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2960 204828 : || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2961 : &type2, &mult_rhs2))
2962 114830 : return false;
2963 : add_rhs = rhs2;
2964 : conv_stmt = conv1_stmt;
2965 : }
2966 1932642 : else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2967 : {
2968 124259 : if (!has_single_use (rhs2)
2969 76043 : || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2970 193030 : || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2971 : &type2, &mult_rhs2))
2972 109394 : return false;
2973 : add_rhs = rhs1;
2974 : conv_stmt = conv2_stmt;
2975 : }
2976 : else
2977 : return false;
2978 :
2979 36786 : to_mode = SCALAR_TYPE_MODE (type);
2980 36786 : from_mode = SCALAR_TYPE_MODE (type1);
2981 36786 : if (to_mode == from_mode)
2982 : return false;
2983 :
2984 : /* For fixed point types, the mode classes could be different
2985 : so reject that case. */
2986 36783 : if (GET_MODE_CLASS (from_mode) != GET_MODE_CLASS (to_mode))
2987 : return false;
2988 :
2989 36783 : from_unsigned1 = TYPE_UNSIGNED (type1);
2990 36783 : from_unsigned2 = TYPE_UNSIGNED (type2);
2991 36783 : optype = type1;
2992 :
2993 : /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2994 36783 : if (from_unsigned1 != from_unsigned2)
2995 : {
2996 907 : if (!INTEGRAL_TYPE_P (type))
2997 : return false;
2998 : /* We can use a signed multiply with unsigned types as long as
2999 : there is a wider mode to use, or it is the smaller of the two
3000 : types that is unsigned. Note that type1 >= type2, always. */
3001 907 : if ((from_unsigned1
3002 56 : && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3003 907 : || (from_unsigned2
3004 851 : && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3005 : {
3006 1778 : if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
3007 1814 : || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
3008 871 : return false;
3009 : }
3010 :
3011 36 : from_unsigned1 = from_unsigned2 = false;
3012 36 : optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
3013 : false);
3014 : }
3015 :
3016 : /* If there was a conversion between the multiply and addition
3017 : then we need to make sure it fits a multiply-and-accumulate.
3018 : The should be a single mode change which does not change the
3019 : value. */
3020 35912 : if (conv_stmt)
3021 : {
3022 : /* We use the original, unmodified data types for this. */
3023 778 : tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3024 778 : tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3025 778 : int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3026 778 : bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3027 :
3028 778 : if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3029 : {
3030 : /* Conversion is a truncate. */
3031 0 : if (TYPE_PRECISION (to_type) < data_size)
3032 : return false;
3033 : }
3034 778 : else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3035 : {
3036 : /* Conversion is an extend. Check it's the right sort. */
3037 413 : if (TYPE_UNSIGNED (from_type) != is_unsigned
3038 413 : && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3039 : return false;
3040 : }
3041 : /* else convert is a no-op for our purposes. */
3042 : }
3043 :
3044 : /* Verify that the machine can perform a widening multiply
3045 : accumulate in this mode/signedness combination, otherwise
3046 : this transformation is likely to pessimize code. */
3047 35593 : this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3048 35593 : handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3049 : from_mode, &actual_mode);
3050 :
3051 35593 : if (handler == CODE_FOR_nothing)
3052 : return false;
3053 :
3054 : /* Ensure that the inputs to the handler are in the correct precision
3055 : for the opcode. This will be the full mode size. */
3056 0 : actual_precision = GET_MODE_PRECISION (actual_mode);
3057 0 : if (actual_precision != TYPE_PRECISION (type1)
3058 0 : || from_unsigned1 != TYPE_UNSIGNED (type1))
3059 : {
3060 0 : if (!useless_type_conversion_p (type1, TREE_TYPE (mult_rhs1)))
3061 : {
3062 0 : if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3063 0 : mult_rhs1 = fold_convert (type1, mult_rhs1);
3064 : else
3065 0 : mult_rhs1 = build_and_insert_cast (gsi, loc, type1, mult_rhs1);
3066 : }
3067 0 : type1 = build_nonstandard_integer_type (actual_precision,
3068 : from_unsigned1);
3069 : }
3070 0 : if (!useless_type_conversion_p (type1, TREE_TYPE (mult_rhs1)))
3071 : {
3072 0 : if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3073 0 : mult_rhs1 = fold_convert (type1, mult_rhs1);
3074 : else
3075 0 : mult_rhs1 = build_and_insert_cast (gsi, loc, type1, mult_rhs1);
3076 : }
3077 0 : if (actual_precision != TYPE_PRECISION (type2)
3078 0 : || from_unsigned2 != TYPE_UNSIGNED (type2))
3079 : {
3080 0 : if (!useless_type_conversion_p (type2, TREE_TYPE (mult_rhs2)))
3081 : {
3082 0 : if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3083 0 : mult_rhs2 = fold_convert (type2, mult_rhs2);
3084 : else
3085 0 : mult_rhs2 = build_and_insert_cast (gsi, loc, type2, mult_rhs2);
3086 : }
3087 0 : type2 = build_nonstandard_integer_type (actual_precision,
3088 : from_unsigned2);
3089 : }
3090 0 : if (!useless_type_conversion_p (type2, TREE_TYPE (mult_rhs2)))
3091 : {
3092 0 : if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3093 0 : mult_rhs2 = fold_convert (type2, mult_rhs2);
3094 : else
3095 0 : mult_rhs2 = build_and_insert_cast (gsi, loc, type2, mult_rhs2);
3096 : }
3097 :
3098 0 : if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3099 0 : add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3100 :
3101 0 : gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3102 : add_rhs);
3103 0 : update_stmt (gsi_stmt (*gsi));
3104 0 : widen_mul_stats.maccs_inserted++;
3105 0 : return true;
3106 : }
3107 :
3108 : /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3109 : OP2 and which we know is used in statements that can be, together with the
3110 : multiplication, converted to FMAs, perform the transformation. */
3111 :
3112 : static void
3113 17812 : convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
3114 : {
3115 17812 : gimple *use_stmt;
3116 17812 : imm_use_iterator imm_iter;
3117 17812 : gcall *fma_stmt;
3118 :
3119 53486 : FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3120 : {
3121 17862 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3122 17862 : tree addop, mulop1 = op1, result = mul_result;
3123 17862 : bool negate_p = false;
3124 17862 : gimple_seq seq = NULL;
3125 :
3126 17862 : if (is_gimple_debug (use_stmt))
3127 0 : continue;
3128 :
3129 : /* If the use is a type convert, look further into it if the operations
3130 : are the same under two's complement. */
3131 17862 : tree lhs_type;
3132 17862 : if (gimple_assign_cast_p (use_stmt)
3133 0 : && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
3134 17862 : && tree_nop_conversion_p (lhs_type, TREE_TYPE (op1)))
3135 : {
3136 0 : tree cast_lhs = gimple_get_lhs (use_stmt);
3137 0 : gimple *tmp_use;
3138 0 : use_operand_p tmp_use_p;
3139 0 : if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
3140 : {
3141 0 : release_defs (use_stmt);
3142 0 : use_stmt = tmp_use;
3143 0 : result = cast_lhs;
3144 0 : gsi_remove (&gsi, true);
3145 0 : gsi = gsi_for_stmt (use_stmt);
3146 : }
3147 : }
3148 :
3149 17862 : if (is_gimple_assign (use_stmt)
3150 17862 : && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3151 : {
3152 700 : result = gimple_assign_lhs (use_stmt);
3153 700 : use_operand_p use_p;
3154 700 : gimple *neguse_stmt;
3155 700 : single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3156 700 : gsi_remove (&gsi, true);
3157 700 : release_defs (use_stmt);
3158 :
3159 700 : use_stmt = neguse_stmt;
3160 700 : gsi = gsi_for_stmt (use_stmt);
3161 700 : negate_p = true;
3162 : }
3163 :
3164 17862 : tree cond, else_value, ops[3], len, bias;
3165 17862 : tree_code code;
3166 17862 : if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3167 : ops, &else_value,
3168 : &len, &bias))
3169 0 : gcc_unreachable ();
3170 17862 : addop = ops[0] == result ? ops[1] : ops[0];
3171 :
3172 17862 : if (code == MINUS_EXPR)
3173 : {
3174 5823 : if (ops[0] == result)
3175 : /* a * b - c -> a * b + (-c) */
3176 2911 : addop = gimple_build (&seq, NEGATE_EXPR, TREE_TYPE (addop), addop);
3177 : else
3178 : /* a - b * c -> (-b) * c + a */
3179 2912 : negate_p = !negate_p;
3180 : }
3181 :
3182 17862 : if (negate_p)
3183 3612 : mulop1 = gimple_build (&seq, NEGATE_EXPR, TREE_TYPE (mulop1), mulop1);
3184 :
3185 17862 : if (seq)
3186 5818 : gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3187 :
3188 : /* Ensure all the operands are of the same type. Use the type of the
3189 : addend as that's the statement being replaced. */
3190 17862 : op2 = gimple_convert (&gsi, true, GSI_SAME_STMT,
3191 17862 : UNKNOWN_LOCATION, TREE_TYPE (addop), op2);
3192 17862 : mulop1 = gimple_convert (&gsi, true, GSI_SAME_STMT,
3193 17862 : UNKNOWN_LOCATION, TREE_TYPE (addop), mulop1);
3194 :
3195 17862 : if (len)
3196 0 : fma_stmt
3197 0 : = gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2,
3198 : addop, else_value, len, bias);
3199 17862 : else if (cond)
3200 94 : fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3201 : op2, addop, else_value);
3202 : else
3203 17768 : fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3204 17862 : gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3205 17862 : gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3206 : use_stmt));
3207 17862 : gsi_replace (&gsi, fma_stmt, true);
3208 : /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3209 : regardless of where the negation occurs. */
3210 17862 : gimple *orig_stmt = gsi_stmt (gsi);
3211 17862 : if (fold_stmt (&gsi, follow_all_ssa_edges))
3212 : {
3213 5867 : if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3214 0 : gcc_unreachable ();
3215 5867 : update_stmt (gsi_stmt (gsi));
3216 : }
3217 :
3218 17862 : if (dump_file && (dump_flags & TDF_DETAILS))
3219 : {
3220 3 : fprintf (dump_file, "Generated FMA ");
3221 3 : print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3222 3 : fprintf (dump_file, "\n");
3223 : }
3224 :
3225 : /* If the FMA result is negated in a single use, fold the negation
3226 : too. */
3227 17862 : orig_stmt = gsi_stmt (gsi);
3228 17862 : use_operand_p use_p;
3229 17862 : gimple *neg_stmt;
3230 17862 : if (is_gimple_call (orig_stmt)
3231 17862 : && gimple_call_internal_p (orig_stmt)
3232 17862 : && gimple_call_lhs (orig_stmt)
3233 17862 : && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3234 17862 : && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3235 12814 : && is_gimple_assign (neg_stmt)
3236 10200 : && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3237 19215 : && !stmt_could_throw_p (cfun, neg_stmt))
3238 : {
3239 1353 : gsi = gsi_for_stmt (neg_stmt);
3240 1353 : if (fold_stmt (&gsi, follow_all_ssa_edges))
3241 : {
3242 1353 : if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3243 0 : gcc_unreachable ();
3244 1353 : update_stmt (gsi_stmt (gsi));
3245 1353 : if (dump_file && (dump_flags & TDF_DETAILS))
3246 : {
3247 0 : fprintf (dump_file, "Folded FMA negation ");
3248 0 : print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3249 0 : fprintf (dump_file, "\n");
3250 : }
3251 : }
3252 : }
3253 :
3254 17862 : widen_mul_stats.fmas_inserted++;
3255 17812 : }
3256 17812 : }
3257 :
3258 : /* Data necessary to perform the actual transformation from a multiplication
3259 : and an addition to an FMA after decision is taken it should be done and to
3260 : then delete the multiplication statement from the function IL. */
3261 :
3262 : struct fma_transformation_info
3263 : {
3264 : gimple *mul_stmt;
3265 : tree mul_result;
3266 : tree op1;
3267 : tree op2;
3268 : };
3269 :
3270 : /* Structure containing the current state of FMA deferring, i.e. whether we are
3271 : deferring, whether to continue deferring, and all data necessary to come
3272 : back and perform all deferred transformations. */
3273 :
3274 10065745 : class fma_deferring_state
3275 : {
3276 : public:
3277 : /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3278 : do any deferring. */
3279 :
3280 10065745 : fma_deferring_state (bool perform_deferring)
3281 10065745 : : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3282 10065745 : m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3283 :
3284 : /* List of FMA candidates for which we the transformation has been determined
3285 : possible but we at this point in BB analysis we do not consider them
3286 : beneficial. */
3287 : auto_vec<fma_transformation_info, 8> m_candidates;
3288 :
3289 : /* Set of results of multiplication that are part of an already deferred FMA
3290 : candidates. */
3291 : hash_set<tree> m_mul_result_set;
3292 :
3293 : /* The PHI that supposedly feeds back result of a FMA to another over loop
3294 : boundary. */
3295 : gphi *m_initial_phi;
3296 :
3297 : /* Result of the last produced FMA candidate or NULL if there has not been
3298 : one. */
3299 : tree m_last_result;
3300 :
3301 : /* If true, deferring might still be profitable. If false, transform all
3302 : candidates and no longer defer. */
3303 : bool m_deferring_p;
3304 : };
3305 :
3306 : /* Transform all deferred FMA candidates and mark STATE as no longer
3307 : deferring. */
3308 :
3309 : static void
3310 3596587 : cancel_fma_deferring (fma_deferring_state *state)
3311 : {
3312 3596587 : if (!state->m_deferring_p)
3313 : return;
3314 :
3315 2590808 : for (unsigned i = 0; i < state->m_candidates.length (); i++)
3316 : {
3317 941 : if (dump_file && (dump_flags & TDF_DETAILS))
3318 0 : fprintf (dump_file, "Generating deferred FMA\n");
3319 :
3320 941 : const fma_transformation_info &fti = state->m_candidates[i];
3321 941 : convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3322 :
3323 941 : gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3324 941 : gsi_remove (&gsi, true);
3325 941 : release_defs (fti.mul_stmt);
3326 : }
3327 2589867 : state->m_deferring_p = false;
3328 : }
3329 :
3330 : /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3331 : Otherwise return NULL. */
3332 :
3333 : static gphi *
3334 5251 : result_of_phi (tree op)
3335 : {
3336 0 : if (TREE_CODE (op) != SSA_NAME)
3337 : return NULL;
3338 :
3339 5126 : return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3340 : }
3341 :
3342 : /* After processing statements of a BB and recording STATE, return true if the
3343 : initial phi is fed by the last FMA candidate result ore one such result from
3344 : previously processed BBs marked in LAST_RESULT_SET. */
3345 :
3346 : static bool
3347 356 : last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3348 : hash_set<tree> *last_result_set)
3349 : {
3350 356 : ssa_op_iter iter;
3351 356 : use_operand_p use;
3352 876 : FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3353 : {
3354 616 : tree t = USE_FROM_PTR (use);
3355 616 : if (t == state->m_last_result
3356 616 : || last_result_set->contains (t))
3357 96 : return true;
3358 : }
3359 :
3360 : return false;
3361 : }
3362 :
3363 : /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3364 : with uses in additions and subtractions to form fused multiply-add
3365 : operations. Returns true if successful and MUL_STMT should be removed.
3366 : If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3367 : on MUL_COND, otherwise it is unconditional.
3368 :
3369 : If STATE indicates that we are deferring FMA transformation, that means
3370 : that we do not produce FMAs for basic blocks which look like:
3371 :
3372 : <bb 6>
3373 : # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3374 : _65 = _14 * _16;
3375 : accumulator_66 = _65 + accumulator_111;
3376 :
3377 : or its unrolled version, i.e. with several FMA candidates that feed result
3378 : of one into the addend of another. Instead, we add them to a list in STATE
3379 : and if we later discover an FMA candidate that is not part of such a chain,
3380 : we go back and perform all deferred past candidates. */
3381 :
3382 : static bool
3383 714351 : convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3384 : fma_deferring_state *state, tree mul_cond = NULL_TREE,
3385 : tree mul_len = NULL_TREE, tree mul_bias = NULL_TREE)
3386 : {
3387 714351 : tree mul_result = gimple_get_lhs (mul_stmt);
3388 : /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3389 : if the statement was left just for the side-effects. */
3390 714351 : if (!mul_result)
3391 : return false;
3392 714351 : tree type = TREE_TYPE (mul_result);
3393 714351 : gimple *use_stmt, *neguse_stmt;
3394 714351 : use_operand_p use_p;
3395 714351 : imm_use_iterator imm_iter;
3396 :
3397 617160 : if (FLOAT_TYPE_P (type)
3398 739070 : && flag_fp_contract_mode != FP_CONTRACT_FAST)
3399 : return false;
3400 :
3401 : /* We don't want to do bitfield reduction ops. */
3402 709227 : if (INTEGRAL_TYPE_P (type)
3403 709227 : && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3404 : return false;
3405 :
3406 : /* If the target doesn't support it, don't generate it. We assume that
3407 : if fma isn't available then fms, fnma or fnms are not either. */
3408 709038 : optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3409 709038 : if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3410 : return false;
3411 :
3412 : /* If the multiplication has zero uses, it is kept around probably because
3413 : of -fnon-call-exceptions. Don't optimize it away in that case,
3414 : it is DCE job. */
3415 23470 : if (has_zero_uses (mul_result))
3416 : return false;
3417 :
3418 23470 : bool check_defer
3419 23470 : = (state->m_deferring_p
3420 23470 : && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3421 23470 : param_avoid_fma_max_bits));
3422 23470 : bool defer = check_defer;
3423 23470 : bool seen_negate_p = false;
3424 :
3425 : /* There is no numerical difference between fused and unfused integer FMAs,
3426 : and the assumption below that FMA is as cheap as addition is unlikely
3427 : to be true, especially if the multiplication occurs multiple times on
3428 : the same chain. E.g., for something like:
3429 :
3430 : (((a * b) + c) >> 1) + (a * b)
3431 :
3432 : we do not want to duplicate the a * b into two additions, not least
3433 : because the result is not a natural FMA chain. */
3434 23470 : if (ANY_INTEGRAL_TYPE_P (type)
3435 23470 : && !has_single_use (mul_result))
3436 : return false;
3437 :
3438 23470 : if (!dbg_cnt (form_fma))
3439 : return false;
3440 :
3441 : /* Make sure that the multiplication statement becomes dead after
3442 : the transformation, thus that all uses are transformed to FMAs.
3443 : This means we assume that an FMA operation has the same cost
3444 : as an addition. */
3445 42108 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3446 : {
3447 24200 : tree result = mul_result;
3448 24200 : bool negate_p = false;
3449 :
3450 24200 : use_stmt = USE_STMT (use_p);
3451 :
3452 24200 : if (is_gimple_debug (use_stmt))
3453 206 : continue;
3454 :
3455 : /* If the use is a type convert, look further into it if the operations
3456 : are the same under two's complement. */
3457 23994 : tree lhs_type;
3458 23994 : if (gimple_assign_cast_p (use_stmt)
3459 295 : && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
3460 24289 : && tree_nop_conversion_p (lhs_type, TREE_TYPE (op1)))
3461 : {
3462 0 : tree cast_lhs = gimple_get_lhs (use_stmt);
3463 0 : gimple *tmp_use;
3464 0 : use_operand_p tmp_use_p;
3465 0 : if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
3466 0 : use_stmt = tmp_use;
3467 0 : result = cast_lhs;
3468 : }
3469 :
3470 : /* For now restrict this operations to single basic blocks. In theory
3471 : we would want to support sinking the multiplication in
3472 : m = a*b;
3473 : if ()
3474 : ma = m + c;
3475 : else
3476 : d = m;
3477 : to form a fma in the then block and sink the multiplication to the
3478 : else block. */
3479 23994 : if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3480 5562 : return false;
3481 :
3482 : /* A negate on the multiplication leads to FNMA. */
3483 23143 : if (is_gimple_assign (use_stmt)
3484 23143 : && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3485 : {
3486 706 : ssa_op_iter iter;
3487 706 : use_operand_p usep;
3488 :
3489 : /* If (due to earlier missed optimizations) we have two
3490 : negates of the same value, treat them as equivalent
3491 : to a single negate with multiple uses. */
3492 706 : if (seen_negate_p)
3493 0 : return false;
3494 :
3495 706 : result = gimple_assign_lhs (use_stmt);
3496 :
3497 : /* Make sure the negate statement becomes dead with this
3498 : single transformation. */
3499 706 : if (!single_imm_use (gimple_assign_lhs (use_stmt),
3500 : &use_p, &neguse_stmt))
3501 : return false;
3502 :
3503 : /* Make sure the multiplication isn't also used on that stmt. */
3504 2836 : FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3505 1424 : if (USE_FROM_PTR (usep) == mul_result)
3506 : return false;
3507 :
3508 : /* Re-validate. */
3509 706 : use_stmt = neguse_stmt;
3510 706 : if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3511 : return false;
3512 :
3513 706 : negate_p = seen_negate_p = true;
3514 : }
3515 :
3516 23143 : tree cond, else_value, ops[3], len, bias;
3517 23143 : tree_code code;
3518 23143 : if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3519 : &else_value, &len, &bias))
3520 : return false;
3521 :
3522 : /* The multiplication result must be one of the addition operands. */
3523 20571 : if (ops[0] != result && ops[1] != result)
3524 : return false;
3525 :
3526 20093 : switch (code)
3527 : {
3528 5829 : case MINUS_EXPR:
3529 5829 : if (ops[1] == result)
3530 2912 : negate_p = !negate_p;
3531 : break;
3532 : case PLUS_EXPR:
3533 : break;
3534 : default:
3535 : /* FMA can only be formed from PLUS and MINUS. */
3536 : return false;
3537 : }
3538 :
3539 18454 : if (len)
3540 : {
3541 : /* For COND_LEN_* operations, we may have dummpy mask which is
3542 : the all true mask. Such TREE type may be mul_cond != cond
3543 : but we still consider they are equal. */
3544 0 : if (mul_cond && cond != mul_cond
3545 0 : && !(integer_truep (mul_cond) && integer_truep (cond)))
3546 0 : return false;
3547 :
3548 0 : if (else_value == result)
3549 : return false;
3550 :
3551 0 : if (!direct_internal_fn_supported_p (IFN_COND_LEN_FMA, type,
3552 : opt_type))
3553 : return false;
3554 :
3555 0 : if (mul_len)
3556 : {
3557 0 : poly_int64 mul_value, value;
3558 0 : if (poly_int_tree_p (mul_len, &mul_value)
3559 0 : && poly_int_tree_p (len, &value)
3560 0 : && maybe_ne (mul_value, value))
3561 0 : return false;
3562 0 : else if (mul_len != len)
3563 : return false;
3564 :
3565 0 : if (wi::to_widest (mul_bias) != wi::to_widest (bias))
3566 : return false;
3567 : }
3568 : }
3569 : else
3570 : {
3571 18454 : if (mul_cond && cond != mul_cond)
3572 : return false;
3573 :
3574 18442 : if (cond)
3575 : {
3576 104 : if (cond == result || else_value == result)
3577 : return false;
3578 94 : if (!direct_internal_fn_supported_p (IFN_COND_FMA, type,
3579 : opt_type))
3580 : return false;
3581 : }
3582 : }
3583 :
3584 : /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3585 : we'll visit later, we might be able to get a more profitable
3586 : match with fnma.
3587 : OTOH, if we don't, a negate / fma pair has likely lower latency
3588 : that a mult / subtract pair. */
3589 18432 : if (code == MINUS_EXPR
3590 5823 : && !negate_p
3591 2211 : && ops[0] == result
3592 2211 : && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3593 0 : && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3594 0 : && TREE_CODE (ops[1]) == SSA_NAME
3595 18432 : && has_single_use (ops[1]))
3596 : {
3597 0 : gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3598 0 : if (is_gimple_assign (stmt2)
3599 0 : && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3600 : return false;
3601 : }
3602 :
3603 : /* We can't handle a * b + a * b. */
3604 18432 : if (ops[0] == ops[1])
3605 : return false;
3606 : /* If deferring, make sure we are not looking at an instruction that
3607 : wouldn't have existed if we were not. */
3608 18432 : if (state->m_deferring_p
3609 18432 : && (state->m_mul_result_set.contains (ops[0])
3610 6446 : || state->m_mul_result_set.contains (ops[1])))
3611 0 : return false;
3612 :
3613 18432 : if (check_defer)
3614 : {
3615 6308 : tree use_lhs = gimple_get_lhs (use_stmt);
3616 6308 : if (state->m_last_result)
3617 : {
3618 1057 : if (ops[1] == state->m_last_result
3619 1057 : || ops[0] == state->m_last_result)
3620 : defer = true;
3621 : else
3622 6308 : defer = false;
3623 : }
3624 : else
3625 : {
3626 5251 : gcc_checking_assert (!state->m_initial_phi);
3627 5251 : gphi *phi;
3628 5251 : if (ops[0] == result)
3629 3324 : phi = result_of_phi (ops[1]);
3630 : else
3631 : {
3632 1927 : gcc_assert (ops[1] == result);
3633 1927 : phi = result_of_phi (ops[0]);
3634 : }
3635 :
3636 : if (phi)
3637 : {
3638 963 : state->m_initial_phi = phi;
3639 963 : defer = true;
3640 : }
3641 : else
3642 : defer = false;
3643 : }
3644 :
3645 6308 : state->m_last_result = use_lhs;
3646 6308 : check_defer = false;
3647 : }
3648 : else
3649 : defer = false;
3650 :
3651 : /* While it is possible to validate whether or not the exact form that
3652 : we've recognized is available in the backend, the assumption is that
3653 : if the deferring logic above did not trigger, the transformation is
3654 : never a loss. For instance, suppose the target only has the plain FMA
3655 : pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3656 : MUL+SUB for FMA+NEG, which is still two operations. Consider
3657 : -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3658 : form the two NEGs are independent and could be run in parallel. */
3659 5562 : }
3660 :
3661 17908 : if (defer)
3662 : {
3663 1037 : fma_transformation_info fti;
3664 1037 : fti.mul_stmt = mul_stmt;
3665 1037 : fti.mul_result = mul_result;
3666 1037 : fti.op1 = op1;
3667 1037 : fti.op2 = op2;
3668 1037 : state->m_candidates.safe_push (fti);
3669 1037 : state->m_mul_result_set.add (mul_result);
3670 :
3671 1037 : if (dump_file && (dump_flags & TDF_DETAILS))
3672 : {
3673 0 : fprintf (dump_file, "Deferred generating FMA for multiplication ");
3674 0 : print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3675 0 : fprintf (dump_file, "\n");
3676 : }
3677 :
3678 1037 : return false;
3679 : }
3680 : else
3681 : {
3682 16871 : if (state->m_deferring_p)
3683 4911 : cancel_fma_deferring (state);
3684 16871 : convert_mult_to_fma_1 (mul_result, op1, op2);
3685 16871 : return true;
3686 : }
3687 : }
3688 :
3689 :
3690 : /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3691 : a check for non-zero like:
3692 : _1 = x_4(D) * y_5(D);
3693 : *res_7(D) = _1;
3694 : if (x_4(D) != 0)
3695 : goto <bb 3>; [50.00%]
3696 : else
3697 : goto <bb 4>; [50.00%]
3698 :
3699 : <bb 3> [local count: 536870913]:
3700 : _2 = _1 / x_4(D);
3701 : _9 = _2 != y_5(D);
3702 : _10 = (int) _9;
3703 :
3704 : <bb 4> [local count: 1073741824]:
3705 : # iftmp.0_3 = PHI <_10(3), 0(2)>
3706 : then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3707 : optimize the x_4(D) != 0 condition to 1. */
3708 :
3709 : static void
3710 145 : maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3711 : gimple *div_stmt, bool *cfg_changed)
3712 : {
3713 145 : basic_block bb = gimple_bb (cond_stmt);
3714 290 : if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3715 51 : return;
3716 145 : edge pred_edge = single_pred_edge (bb);
3717 145 : basic_block pred_bb = pred_edge->src;
3718 145 : if (EDGE_COUNT (pred_bb->succs) != 2)
3719 : return;
3720 102 : edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3721 102 : edge other_succ_edge = NULL;
3722 102 : if (gimple_code (cond_stmt) == GIMPLE_COND)
3723 : {
3724 48 : if (EDGE_COUNT (bb->succs) != 2)
3725 : return;
3726 48 : other_succ_edge = EDGE_SUCC (bb, 0);
3727 48 : if (gimple_cond_code (cond_stmt) == NE_EXPR)
3728 : {
3729 24 : if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3730 24 : other_succ_edge = EDGE_SUCC (bb, 1);
3731 : }
3732 : else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3733 48 : other_succ_edge = EDGE_SUCC (bb, 0);
3734 48 : if (other_edge->dest != other_succ_edge->dest)
3735 : return;
3736 : }
3737 105 : else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3738 : return;
3739 202 : gcond *zero_cond = safe_dyn_cast <gcond *> (*gsi_last_bb (pred_bb));
3740 101 : if (zero_cond == NULL
3741 101 : || (gimple_cond_code (zero_cond)
3742 101 : != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3743 101 : || !integer_zerop (gimple_cond_rhs (zero_cond)))
3744 0 : return;
3745 101 : tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3746 101 : if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3747 : return;
3748 101 : if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3749 : {
3750 : /* Allow the divisor to be result of a same precision cast
3751 : from zero_cond_lhs. */
3752 53 : tree rhs2 = gimple_assign_rhs2 (div_stmt);
3753 53 : if (TREE_CODE (rhs2) != SSA_NAME)
3754 : return;
3755 53 : gimple *g = SSA_NAME_DEF_STMT (rhs2);
3756 53 : if (!gimple_assign_cast_p (g)
3757 53 : || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3758 53 : || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3759 106 : || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3760 53 : != TYPE_PRECISION (TREE_TYPE (rhs2))))
3761 : return;
3762 : }
3763 101 : gimple_stmt_iterator gsi = gsi_after_labels (bb);
3764 101 : mul_stmts.quick_push (div_stmt);
3765 101 : if (is_gimple_debug (gsi_stmt (gsi)))
3766 0 : gsi_next_nondebug (&gsi);
3767 : unsigned cast_count = 0;
3768 635 : while (gsi_stmt (gsi) != cond_stmt)
3769 : {
3770 : /* If original mul_stmt has a single use, allow it in the same bb,
3771 : we are looking then just at __builtin_mul_overflow_p.
3772 : Though, in that case the original mul_stmt will be replaced
3773 : by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3774 : gimple *mul_stmt;
3775 : unsigned int i;
3776 2274 : bool ok = false;
3777 2274 : FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3778 : {
3779 2127 : if (gsi_stmt (gsi) == mul_stmt)
3780 : {
3781 : ok = true;
3782 : break;
3783 : }
3784 : }
3785 534 : if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3786 : ok = true;
3787 387 : if (!ok)
3788 51 : return;
3789 534 : gsi_next_nondebug (&gsi);
3790 : }
3791 101 : if (gimple_code (cond_stmt) == GIMPLE_COND)
3792 : {
3793 47 : basic_block succ_bb = other_edge->dest;
3794 75 : for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3795 28 : gsi_next (&gpi))
3796 : {
3797 35 : gphi *phi = gpi.phi ();
3798 35 : tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3799 35 : tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3800 35 : if (!operand_equal_p (v1, v2, 0))
3801 7 : return;
3802 : }
3803 : }
3804 : else
3805 : {
3806 54 : tree lhs = gimple_assign_lhs (cond_stmt);
3807 54 : if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3808 : return;
3809 54 : gsi_next_nondebug (&gsi);
3810 54 : if (!gsi_end_p (gsi))
3811 : {
3812 54 : if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3813 : return;
3814 54 : gimple *cast_stmt = gsi_stmt (gsi);
3815 54 : if (!gimple_assign_cast_p (cast_stmt))
3816 : return;
3817 54 : tree new_lhs = gimple_assign_lhs (cast_stmt);
3818 54 : gsi_next_nondebug (&gsi);
3819 54 : if (!gsi_end_p (gsi)
3820 54 : || !new_lhs
3821 54 : || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3822 108 : || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3823 : return;
3824 : lhs = new_lhs;
3825 : }
3826 54 : edge succ_edge = single_succ_edge (bb);
3827 54 : basic_block succ_bb = succ_edge->dest;
3828 54 : gsi = gsi_start_phis (succ_bb);
3829 54 : if (gsi_end_p (gsi))
3830 : return;
3831 54 : gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3832 54 : gsi_next (&gsi);
3833 54 : if (!gsi_end_p (gsi))
3834 : return;
3835 54 : if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3836 : return;
3837 54 : tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3838 54 : if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3839 : {
3840 0 : tree cond = gimple_assign_rhs1 (cond_stmt);
3841 0 : if (TREE_CODE (cond) == NE_EXPR)
3842 : {
3843 0 : if (!operand_equal_p (other_val,
3844 0 : gimple_assign_rhs3 (cond_stmt), 0))
3845 : return;
3846 : }
3847 0 : else if (!operand_equal_p (other_val,
3848 0 : gimple_assign_rhs2 (cond_stmt), 0))
3849 : return;
3850 : }
3851 54 : else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3852 : {
3853 25 : if (!integer_zerop (other_val))
3854 : return;
3855 : }
3856 29 : else if (!integer_onep (other_val))
3857 : return;
3858 : }
3859 94 : if (pred_edge->flags & EDGE_TRUE_VALUE)
3860 41 : gimple_cond_make_true (zero_cond);
3861 : else
3862 53 : gimple_cond_make_false (zero_cond);
3863 94 : update_stmt (zero_cond);
3864 94 : reset_flow_sensitive_info_in_bb (bb);
3865 94 : *cfg_changed = true;
3866 : }
3867 :
3868 : /* Helper function for arith_overflow_check_p. Return true
3869 : if VAL1 is equal to VAL2 cast to corresponding integral type
3870 : with other signedness or vice versa. */
3871 :
3872 : static bool
3873 384 : arith_cast_equal_p (tree val1, tree val2)
3874 : {
3875 384 : if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3876 66 : return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3877 318 : else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3878 : return false;
3879 280 : if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3880 280 : && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3881 : return true;
3882 168 : if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3883 168 : && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3884 120 : return true;
3885 : return false;
3886 : }
3887 :
3888 : /* Helper function of match_arith_overflow. Return 1
3889 : if USE_STMT is unsigned overflow check ovf != 0 for
3890 : STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3891 : and 0 otherwise. */
3892 :
3893 : static int
3894 2890207 : arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3895 : tree maxval, tree *other)
3896 : {
3897 2890207 : enum tree_code ccode = ERROR_MARK;
3898 2890207 : tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3899 2890207 : enum tree_code code = gimple_assign_rhs_code (stmt);
3900 5744734 : tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3901 2890207 : tree rhs1 = gimple_assign_rhs1 (stmt);
3902 2890207 : tree rhs2 = gimple_assign_rhs2 (stmt);
3903 2890207 : tree multop = NULL_TREE, divlhs = NULL_TREE;
3904 2890207 : gimple *cur_use_stmt = use_stmt;
3905 :
3906 2890207 : if (code == MULT_EXPR)
3907 : {
3908 662192 : if (!is_gimple_assign (use_stmt))
3909 661896 : return 0;
3910 528157 : if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3911 : return 0;
3912 2170 : if (gimple_assign_rhs1 (use_stmt) != lhs)
3913 : return 0;
3914 2107 : if (cast_stmt)
3915 : {
3916 156 : if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3917 : multop = rhs2;
3918 82 : else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3919 : multop = rhs1;
3920 : else
3921 : return 0;
3922 : }
3923 1951 : else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3924 : multop = rhs2;
3925 1873 : else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3926 : multop = rhs1;
3927 : else
3928 : return 0;
3929 300 : if (stmt_ends_bb_p (use_stmt))
3930 : return 0;
3931 300 : divlhs = gimple_assign_lhs (use_stmt);
3932 300 : if (!divlhs)
3933 : return 0;
3934 300 : use_operand_p use;
3935 300 : if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3936 : return 0;
3937 296 : if (cast_stmt && gimple_assign_cast_p (cur_use_stmt))
3938 : {
3939 4 : tree cast_lhs = gimple_assign_lhs (cur_use_stmt);
3940 8 : if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3941 4 : && TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3942 4 : && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3943 4 : == TYPE_PRECISION (TREE_TYPE (divlhs)))
3944 8 : && single_imm_use (cast_lhs, &use, &cur_use_stmt))
3945 : {
3946 : cast_stmt = NULL;
3947 : divlhs = cast_lhs;
3948 : }
3949 : else
3950 0 : return 0;
3951 : }
3952 : }
3953 2228311 : if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3954 : {
3955 568974 : ccode = gimple_cond_code (cur_use_stmt);
3956 568974 : crhs1 = gimple_cond_lhs (cur_use_stmt);
3957 568974 : crhs2 = gimple_cond_rhs (cur_use_stmt);
3958 : }
3959 1659337 : else if (is_gimple_assign (cur_use_stmt))
3960 : {
3961 817135 : if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3962 : {
3963 493122 : ccode = gimple_assign_rhs_code (cur_use_stmt);
3964 493122 : crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3965 493122 : crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3966 : }
3967 324013 : else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3968 : {
3969 4869 : tree cond = gimple_assign_rhs1 (cur_use_stmt);
3970 4869 : if (COMPARISON_CLASS_P (cond))
3971 : {
3972 0 : ccode = TREE_CODE (cond);
3973 0 : crhs1 = TREE_OPERAND (cond, 0);
3974 0 : crhs2 = TREE_OPERAND (cond, 1);
3975 : }
3976 : else
3977 : return 0;
3978 : }
3979 : else
3980 : return 0;
3981 : }
3982 : else
3983 : return 0;
3984 :
3985 1062096 : if (maxval
3986 1062096 : && ccode == RSHIFT_EXPR
3987 33 : && crhs1 == lhs
3988 17 : && TREE_CODE (crhs2) == INTEGER_CST
3989 1062113 : && wi::to_widest (crhs2) == TYPE_PRECISION (TREE_TYPE (maxval)))
3990 : {
3991 16 : tree shiftlhs = gimple_assign_lhs (use_stmt);
3992 16 : if (!shiftlhs)
3993 : return 0;
3994 16 : use_operand_p use;
3995 16 : if (!single_imm_use (shiftlhs, &use, &cur_use_stmt))
3996 : return 0;
3997 12 : if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3998 : {
3999 0 : ccode = gimple_cond_code (cur_use_stmt);
4000 0 : crhs1 = gimple_cond_lhs (cur_use_stmt);
4001 0 : crhs2 = gimple_cond_rhs (cur_use_stmt);
4002 : }
4003 12 : else if (is_gimple_assign (cur_use_stmt))
4004 : {
4005 12 : if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
4006 : {
4007 0 : ccode = gimple_assign_rhs_code (cur_use_stmt);
4008 0 : crhs1 = gimple_assign_rhs1 (cur_use_stmt);
4009 0 : crhs2 = gimple_assign_rhs2 (cur_use_stmt);
4010 : }
4011 12 : else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
4012 : {
4013 0 : tree cond = gimple_assign_rhs1 (cur_use_stmt);
4014 0 : if (COMPARISON_CLASS_P (cond))
4015 : {
4016 0 : ccode = TREE_CODE (cond);
4017 0 : crhs1 = TREE_OPERAND (cond, 0);
4018 0 : crhs2 = TREE_OPERAND (cond, 1);
4019 : }
4020 : else
4021 : return 0;
4022 : }
4023 : else
4024 : {
4025 12 : enum tree_code sc = gimple_assign_rhs_code (cur_use_stmt);
4026 12 : tree castlhs = gimple_assign_lhs (cur_use_stmt);
4027 12 : if (!CONVERT_EXPR_CODE_P (sc)
4028 12 : || !castlhs
4029 12 : || !INTEGRAL_TYPE_P (TREE_TYPE (castlhs))
4030 24 : || (TYPE_PRECISION (TREE_TYPE (castlhs))
4031 12 : > TYPE_PRECISION (TREE_TYPE (maxval))))
4032 : return 0;
4033 : return 1;
4034 : }
4035 : }
4036 : else
4037 : return 0;
4038 0 : if ((ccode != EQ_EXPR && ccode != NE_EXPR)
4039 0 : || crhs1 != shiftlhs
4040 0 : || !integer_zerop (crhs2))
4041 0 : return 0;
4042 : return 1;
4043 : }
4044 :
4045 1062080 : if (TREE_CODE_CLASS (ccode) != tcc_comparison)
4046 : return 0;
4047 :
4048 605050 : switch (ccode)
4049 : {
4050 114276 : case GT_EXPR:
4051 114276 : case LE_EXPR:
4052 114276 : if (maxval)
4053 : {
4054 : /* r = a + b; r > maxval or r <= maxval */
4055 45 : if (crhs1 == lhs
4056 44 : && TREE_CODE (crhs2) == INTEGER_CST
4057 67 : && tree_int_cst_equal (crhs2, maxval))
4058 12 : return ccode == GT_EXPR ? 1 : -1;
4059 : break;
4060 : }
4061 : /* r = a - b; r > a or r <= a
4062 : r = a + b; a > r or a <= r or b > r or b <= r. */
4063 114231 : if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
4064 114169 : || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
4065 8875 : && crhs2 == lhs))
4066 9277 : return ccode == GT_EXPR ? 1 : -1;
4067 : /* r = ~a; b > r or b <= r. */
4068 105294 : if (code == BIT_NOT_EXPR && crhs2 == lhs)
4069 : {
4070 190 : if (other)
4071 95 : *other = crhs1;
4072 222 : return ccode == GT_EXPR ? 1 : -1;
4073 : }
4074 : break;
4075 62066 : case LT_EXPR:
4076 62066 : case GE_EXPR:
4077 62066 : if (maxval)
4078 : break;
4079 : /* r = a - b; a < r or a >= r
4080 : r = a + b; r < a or r >= a or r < b or r >= b. */
4081 62060 : if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
4082 61920 : || (code == PLUS_EXPR && crhs1 == lhs
4083 30346 : && (crhs2 == rhs1 || crhs2 == rhs2)))
4084 4151 : return ccode == LT_EXPR ? 1 : -1;
4085 : /* r = ~a; r < b or r >= b. */
4086 57949 : if (code == BIT_NOT_EXPR && crhs1 == lhs)
4087 : {
4088 167 : if (other)
4089 92 : *other = crhs2;
4090 219 : return ccode == LT_EXPR ? 1 : -1;
4091 : }
4092 : break;
4093 428708 : case EQ_EXPR:
4094 428708 : case NE_EXPR:
4095 : /* r = a * b; _1 = r / a; _1 == b
4096 : r = a * b; _1 = r / b; _1 == a
4097 : r = a * b; _1 = r / a; _1 != b
4098 : r = a * b; _1 = r / b; _1 != a. */
4099 428708 : if (code == MULT_EXPR)
4100 : {
4101 293 : if (cast_stmt)
4102 : {
4103 146 : if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
4104 146 : || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
4105 : {
4106 146 : use_stmt = cur_use_stmt;
4107 216 : return ccode == NE_EXPR ? 1 : -1;
4108 : }
4109 : }
4110 96 : else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
4111 147 : || (crhs2 == divlhs && crhs1 == multop))
4112 : {
4113 147 : use_stmt = cur_use_stmt;
4114 223 : return ccode == NE_EXPR ? 1 : -1;
4115 : }
4116 : }
4117 : break;
4118 : default:
4119 : break;
4120 : }
4121 : return 0;
4122 : }
4123 :
4124 : extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
4125 : extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
4126 : extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
4127 : extern bool gimple_unsigned_integer_sat_mul (tree, tree*, tree (*)(tree));
4128 : extern bool gimple_spaceship (tree, tree*, tree (*)(tree));
4129 :
4130 : extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
4131 : extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
4132 : extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree));
4133 :
4134 : static void
4135 159 : build_saturation_binary_arith_call_and_replace (gimple_stmt_iterator *gsi,
4136 : internal_fn fn, tree lhs,
4137 : tree op_0, tree op_1)
4138 : {
4139 159 : if (direct_internal_fn_supported_p (fn, TREE_TYPE (op_0), OPTIMIZE_FOR_BOTH))
4140 : {
4141 157 : gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
4142 157 : gimple_call_set_lhs (call, lhs);
4143 157 : gsi_replace (gsi, call, /* update_eh_info */ true);
4144 : }
4145 159 : }
4146 :
4147 : static bool
4148 51 : build_saturation_binary_arith_call_and_insert (gimple_stmt_iterator *gsi,
4149 : internal_fn fn, tree lhs,
4150 : tree op_0, tree op_1)
4151 : {
4152 51 : if (!direct_internal_fn_supported_p (fn, TREE_TYPE (op_0), OPTIMIZE_FOR_BOTH))
4153 : return false;
4154 :
4155 43 : gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
4156 43 : gimple_call_set_lhs (call, lhs);
4157 43 : gsi_insert_before (gsi, call, GSI_SAME_STMT);
4158 :
4159 43 : return true;
4160 : }
4161 :
4162 : /*
4163 : * Try to match saturation unsigned add with assign.
4164 : * _7 = _4 + _6;
4165 : * _8 = _4 > _7;
4166 : * _9 = (long unsigned int) _8;
4167 : * _10 = -_9;
4168 : * _12 = _7 | _10;
4169 : * =>
4170 : * _12 = .SAT_ADD (_4, _6);
4171 : *
4172 : * Try to match IMM=-1 saturation signed add with assign.
4173 : * <bb 2> [local count: 1073741824]:
4174 : * x.0_1 = (unsigned char) x_5(D);
4175 : * _3 = -x.0_1;
4176 : * _10 = (signed char) _3;
4177 : * _8 = x_5(D) & _10;
4178 : * if (_8 < 0)
4179 : * goto <bb 4>; [1.40%]
4180 : * else
4181 : * goto <bb 3>; [98.60%]
4182 : * <bb 3> [local count: 434070867]:
4183 : * _2 = x.0_1 + 255;
4184 : * <bb 4> [local count: 1073741824]:
4185 : * # _9 = PHI <_2(3), 128(2)>
4186 : * _4 = (int8_t) _9;
4187 : * =>
4188 : * _4 = .SAT_ADD (x_5, -1); */
4189 :
4190 : static void
4191 4802060 : match_saturation_add_with_assign (gimple_stmt_iterator *gsi, gassign *stmt)
4192 : {
4193 4802060 : tree ops[2];
4194 4802060 : tree lhs = gimple_assign_lhs (stmt);
4195 :
4196 4802060 : if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
4197 4802060 : || gimple_signed_integer_sat_add (lhs, ops, NULL))
4198 34 : build_saturation_binary_arith_call_and_replace (gsi, IFN_SAT_ADD, lhs,
4199 : ops[0], ops[1]);
4200 4802060 : }
4201 :
4202 : /*
4203 : * Try to match saturation add with PHI.
4204 : * For unsigned integer:
4205 : * <bb 2> :
4206 : * _1 = x_3(D) + y_4(D);
4207 : * if (_1 >= x_3(D))
4208 : * goto <bb 3>; [INV]
4209 : * else
4210 : * goto <bb 4>; [INV]
4211 : *
4212 : * <bb 3> :
4213 : *
4214 : * <bb 4> :
4215 : * # _2 = PHI <255(2), _1(3)>
4216 : * =>
4217 : * <bb 4> [local count: 1073741824]:
4218 : * _2 = .SAT_ADD (x_4(D), y_5(D));
4219 : *
4220 : * For signed integer:
4221 : * x.0_1 = (long unsigned int) x_7(D);
4222 : * y.1_2 = (long unsigned int) y_8(D);
4223 : * _3 = x.0_1 + y.1_2;
4224 : * sum_9 = (int64_t) _3;
4225 : * _4 = x_7(D) ^ y_8(D);
4226 : * _5 = x_7(D) ^ sum_9;
4227 : * _15 = ~_4;
4228 : * _16 = _5 & _15;
4229 : * if (_16 < 0)
4230 : * goto <bb 3>; [41.00%]
4231 : * else
4232 : * goto <bb 4>; [59.00%]
4233 : * _11 = x_7(D) < 0;
4234 : * _12 = (long int) _11;
4235 : * _13 = -_12;
4236 : * _14 = _13 ^ 9223372036854775807;
4237 : * # _6 = PHI <_14(3), sum_9(2)>
4238 : * =>
4239 : * _6 = .SAT_ADD (x_5(D), y_6(D)); [tail call] */
4240 :
4241 : static bool
4242 4090316 : match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
4243 : {
4244 4090316 : if (gimple_phi_num_args (phi) != 2)
4245 : return false;
4246 :
4247 3267921 : tree ops[2];
4248 3267921 : tree phi_result = gimple_phi_result (phi);
4249 :
4250 3267921 : if (!gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
4251 3267921 : && !gimple_signed_integer_sat_add (phi_result, ops, NULL))
4252 : return false;
4253 :
4254 21 : if (!TYPE_UNSIGNED (TREE_TYPE (ops[0])) && TREE_CODE (ops[1]) == INTEGER_CST)
4255 0 : ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4256 :
4257 21 : return build_saturation_binary_arith_call_and_insert (gsi, IFN_SAT_ADD,
4258 : phi_result, ops[0],
4259 21 : ops[1]);
4260 : }
4261 :
4262 : /*
4263 : * Try to match saturation unsigned sub.
4264 : * _1 = _4 >= _5;
4265 : * _3 = _4 - _5;
4266 : * _6 = _1 ? _3 : 0;
4267 : * =>
4268 : * _6 = .SAT_SUB (_4, _5); */
4269 :
4270 : static void
4271 3291467 : match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
4272 : {
4273 3291467 : tree ops[2];
4274 3291467 : tree lhs = gimple_assign_lhs (stmt);
4275 :
4276 3291467 : if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL))
4277 125 : build_saturation_binary_arith_call_and_replace (gsi, IFN_SAT_SUB, lhs,
4278 : ops[0], ops[1]);
4279 3291467 : }
4280 :
4281 : /*
4282 : * Try to match saturation unsigned mul.
4283 : * _1 = (unsigned int) a_6(D);
4284 : * _2 = (unsigned int) b_7(D);
4285 : * x_8 = _1 * _2;
4286 : * overflow_9 = x_8 > 255;
4287 : * _3 = (unsigned char) overflow_9;
4288 : * _4 = -_3;
4289 : * _5 = (unsigned char) x_8;
4290 : * _10 = _4 | _5;
4291 : * =>
4292 : * _10 = .SAT_SUB (a_6, b_7); */
4293 :
4294 : static void
4295 2551391 : match_unsigned_saturation_mul (gimple_stmt_iterator *gsi, gassign *stmt)
4296 : {
4297 2551391 : tree ops[2];
4298 2551391 : tree lhs = gimple_assign_lhs (stmt);
4299 :
4300 2551391 : if (gimple_unsigned_integer_sat_mul (lhs, ops, NULL))
4301 0 : build_saturation_binary_arith_call_and_replace (gsi, IFN_SAT_MUL, lhs,
4302 : ops[0], ops[1]);
4303 2551391 : }
4304 :
4305 : /* Try to match saturation unsigned mul, aka:
4306 : _6 = .MUL_OVERFLOW (a_4(D), b_5(D));
4307 : _2 = IMAGPART_EXPR <_6>;
4308 : if (_2 != 0)
4309 : goto <bb 4>; [35.00%]
4310 : else
4311 : goto <bb 3>; [65.00%]
4312 :
4313 : <bb 3> [local count: 697932184]:
4314 : _1 = REALPART_EXPR <_6>;
4315 :
4316 : <bb 4> [local count: 1073741824]:
4317 : # _3 = PHI <18446744073709551615(2), _1(3)>
4318 : =>
4319 : _3 = .SAT_MUL (a_4(D), b_5(D)); */
4320 :
4321 : static bool
4322 4090273 : match_saturation_mul (gimple_stmt_iterator *gsi, gphi *phi)
4323 : {
4324 4090273 : if (gimple_phi_num_args (phi) != 2)
4325 : return false;
4326 :
4327 3267878 : tree ops[2];
4328 3267878 : tree phi_result = gimple_phi_result (phi);
4329 :
4330 3267878 : if (!gimple_unsigned_integer_sat_mul (phi_result, ops, NULL))
4331 : return false;
4332 :
4333 0 : return build_saturation_binary_arith_call_and_insert (gsi, IFN_SAT_MUL,
4334 : phi_result, ops[0],
4335 0 : ops[1]);
4336 : }
4337 :
4338 : /* Try to match variants of spaceship operation:
4339 : <bb 2>
4340 : if (a_3(D) >= b_4(D)) -- CMP_1
4341 : goto <bb 3>;
4342 : else
4343 : goto <bb 4>;
4344 :
4345 : <bb 3>
4346 : _1 = a_3(D) > b_4(D); -- CMP_2
4347 : _5 = (int) _1;
4348 :
4349 : <bb 4>
4350 : # _2 = PHI <-1(2), _5(3)>
4351 : =>
4352 : _2 = .SPACESHIP (a_3(D), b_4(D), -1);
4353 :
4354 : All possible canonical variants of the comparison operator in CMP_1 and
4355 : CMP_2 has been included in gimple_spaceship function. */
4356 : static bool
4357 4090273 : match_spaceship (gimple_stmt_iterator *gsi, gphi *phi)
4358 : {
4359 4090273 : if (gimple_phi_num_args (phi) != 2)
4360 : return false;
4361 3267878 : tree ops[2];
4362 3267878 : tree phi_result = gimple_phi_result (phi);
4363 :
4364 3267878 : if (!gimple_spaceship (phi_result, ops, NULL))
4365 : return false;
4366 :
4367 : /* Allow different modes as long as both are integral types. */
4368 228 : if (!INTEGRAL_TYPE_P (TREE_TYPE (phi_result))
4369 228 : || !INTEGRAL_TYPE_P (TREE_TYPE (ops[0])))
4370 : return false;
4371 :
4372 114 : tree ops_type = TREE_TYPE (ops[0]);
4373 114 : machine_mode ops_mode = TYPE_MODE (ops_type);
4374 114 : machine_mode promoted_mode = ops_mode;
4375 114 : tree promoted_type = ops_type;
4376 114 : bool is_unsigned = TYPE_UNSIGNED (ops_type);
4377 :
4378 : /* Check if spaceship optab is available for the operand mode.
4379 : If not, try promoting to a wider mode that is supported. */
4380 114 : if (optab_handler (spaceship_optab, ops_mode) == CODE_FOR_nothing)
4381 : {
4382 : /* Try promoting to wider modes (e.g., QI/HI -> SI -> DI). */
4383 : machine_mode wider_mode;
4384 5 : FOR_EACH_WIDER_MODE_FROM (wider_mode, ops_mode)
4385 : {
4386 4 : if (optab_handler (spaceship_optab, wider_mode)
4387 : != CODE_FOR_nothing)
4388 : {
4389 : /* Check if we can get a type for this mode with matching
4390 : signedness. */
4391 0 : promoted_type = lang_hooks.types.type_for_mode (wider_mode,
4392 : is_unsigned);
4393 0 : if (promoted_type != NULL_TREE && INTEGRAL_TYPE_P (promoted_type))
4394 : {
4395 : promoted_mode = wider_mode;
4396 : break;
4397 : }
4398 : }
4399 : }
4400 :
4401 : // If no suitable promoted mode found, give up.
4402 1 : if (promoted_mode == ops_mode)
4403 4090160 : return false;
4404 : }
4405 :
4406 : /* If promotion is needed, insert conversion statements.
4407 : We must use GIMPLE assignments rather than fold_convert because
4408 : gimple_call arguments must be valid GIMPLE values (SSA names or
4409 : constants), not tree expressions. */
4410 113 : ops[0] = gimple_convert (gsi, true, GSI_SAME_STMT, UNKNOWN_LOCATION,
4411 : promoted_type, ops[0]);
4412 113 : ops[1] = gimple_convert (gsi, true, GSI_SAME_STMT, UNKNOWN_LOCATION,
4413 : promoted_type, ops[1]);
4414 :
4415 113 : tree spaceship_arg_3 = is_unsigned ? build_one_cst (integer_type_node)
4416 97 : : build_minus_one_cst (integer_type_node);
4417 :
4418 113 : gcall *call = gimple_build_call_internal (IFN_SPACESHIP, 3, ops[0], ops[1],
4419 : spaceship_arg_3);
4420 :
4421 : /* SPACESHIP optab always returns signed int (SI mode).
4422 : Cast to phi_result's type if needed. */
4423 113 : tree call_result_type = integer_type_node;
4424 113 : if (!types_compatible_p (TREE_TYPE (phi_result), call_result_type))
4425 : {
4426 49 : tree call_result = make_ssa_name (call_result_type);
4427 49 : gimple_call_set_lhs (call, call_result);
4428 49 : gsi_insert_before (gsi, call, GSI_SAME_STMT);
4429 49 : gassign *cast_stmt = gimple_build_assign (phi_result, NOP_EXPR,
4430 : call_result);
4431 49 : gsi_insert_before (gsi, cast_stmt, GSI_SAME_STMT);
4432 : }
4433 : else
4434 : {
4435 64 : gimple_call_set_lhs (call, phi_result);
4436 64 : gsi_insert_before (gsi, call, GSI_SAME_STMT);
4437 : }
4438 : return true;
4439 : }
4440 :
4441 :
4442 : /*
4443 : * Try to match saturation unsigned sub.
4444 : * <bb 2> [local count: 1073741824]:
4445 : * if (x_2(D) > y_3(D))
4446 : * goto <bb 3>; [50.00%]
4447 : * else
4448 : * goto <bb 4>; [50.00%]
4449 : *
4450 : * <bb 3> [local count: 536870912]:
4451 : * _4 = x_2(D) - y_3(D);
4452 : *
4453 : * <bb 4> [local count: 1073741824]:
4454 : * # _1 = PHI <0(2), _4(3)>
4455 : * =>
4456 : * <bb 4> [local count: 1073741824]:
4457 : * _1 = .SAT_SUB (x_2(D), y_3(D)); */
4458 : static bool
4459 4090299 : match_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
4460 : {
4461 4090299 : if (gimple_phi_num_args (phi) != 2)
4462 : return false;
4463 :
4464 3267904 : tree ops[2];
4465 3267904 : tree phi_result = gimple_phi_result (phi);
4466 :
4467 3267904 : if (!gimple_unsigned_integer_sat_sub (phi_result, ops, NULL)
4468 3267904 : && !gimple_signed_integer_sat_sub (phi_result, ops, NULL))
4469 : return false;
4470 :
4471 30 : return build_saturation_binary_arith_call_and_insert (gsi, IFN_SAT_SUB,
4472 : phi_result, ops[0],
4473 30 : ops[1]);
4474 : }
4475 :
4476 : /*
4477 : * Try to match saturation unsigned sub.
4478 : * uint16_t x_4(D);
4479 : * uint8_t _6;
4480 : * overflow_5 = x_4(D) > 255;
4481 : * _1 = (unsigned char) x_4(D);
4482 : * _2 = (unsigned char) overflow_5;
4483 : * _3 = -_2;
4484 : * _6 = _1 | _3;
4485 : * =>
4486 : * _6 = .SAT_TRUNC (x_4(D));
4487 : * */
4488 : static void
4489 2551391 : match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
4490 : {
4491 2551391 : tree ops[1];
4492 2551391 : tree lhs = gimple_assign_lhs (stmt);
4493 2551391 : tree type = TREE_TYPE (lhs);
4494 :
4495 2551391 : if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
4496 2551491 : && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
4497 100 : tree_pair (type, TREE_TYPE (ops[0])),
4498 : OPTIMIZE_FOR_BOTH))
4499 : {
4500 73 : gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
4501 73 : gimple_call_set_lhs (call, lhs);
4502 73 : gsi_replace (gsi, call, /* update_eh_info */ true);
4503 : }
4504 2551391 : }
4505 :
4506 : /*
4507 : * Try to match saturation truncate.
4508 : * Aka:
4509 : * x.0_1 = (unsigned long) x_4(D);
4510 : * _2 = x.0_1 + 2147483648;
4511 : * if (_2 > 4294967295)
4512 : * goto <bb 4>; [50.00%]
4513 : * else
4514 : * goto <bb 3>; [50.00%]
4515 : * ;; succ: 4
4516 : * ;; 3
4517 : *
4518 : * ;; basic block 3, loop depth 0
4519 : * ;; pred: 2
4520 : * trunc_5 = (int32_t) x_4(D);
4521 : * goto <bb 5>; [100.00%]
4522 : * ;; succ: 5
4523 : *
4524 : * ;; basic block 4, loop depth 0
4525 : * ;; pred: 2
4526 : * _7 = x_4(D) < 0;
4527 : * _8 = (int) _7;
4528 : * _9 = -_8;
4529 : * _10 = _9 ^ 2147483647;
4530 : * ;; succ: 5
4531 : *
4532 : * ;; basic block 5, loop depth 0
4533 : * ;; pred: 3
4534 : * ;; 4
4535 : * # _3 = PHI <trunc_5(3), _10(4)>
4536 : * =>
4537 : * _6 = .SAT_TRUNC (x_4(D));
4538 : */
4539 :
4540 : static bool
4541 4090273 : match_saturation_trunc (gimple_stmt_iterator *gsi, gphi *phi)
4542 : {
4543 4090273 : if (gimple_phi_num_args (phi) != 2)
4544 : return false;
4545 :
4546 3267878 : tree ops[1];
4547 3267878 : tree phi_result = gimple_phi_result (phi);
4548 3267878 : tree type = TREE_TYPE (phi_result);
4549 :
4550 3267878 : if (!gimple_unsigned_integer_sat_trunc (phi_result, ops, NULL)
4551 3267878 : && !gimple_signed_integer_sat_trunc (phi_result, ops, NULL))
4552 : return false;
4553 :
4554 0 : if (!direct_internal_fn_supported_p (IFN_SAT_TRUNC,
4555 0 : tree_pair (type, TREE_TYPE (ops[0])),
4556 : OPTIMIZE_FOR_BOTH))
4557 : return false;
4558 :
4559 0 : gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
4560 0 : gimple_call_set_lhs (call, phi_result);
4561 0 : gsi_insert_before (gsi, call, GSI_SAME_STMT);
4562 :
4563 0 : return true;
4564 : }
4565 :
4566 : /* Recognize for unsigned x
4567 : x = y - z;
4568 : if (x > y)
4569 : where there are other uses of x and replace it with
4570 : _7 = .SUB_OVERFLOW (y, z);
4571 : x = REALPART_EXPR <_7>;
4572 : _8 = IMAGPART_EXPR <_7>;
4573 : if (_8)
4574 : and similarly for addition.
4575 :
4576 : Also recognize:
4577 : yc = (type) y;
4578 : zc = (type) z;
4579 : x = yc + zc;
4580 : if (x > max)
4581 : where y and z have unsigned types with maximum max
4582 : and there are other uses of x and all of those cast x
4583 : back to that unsigned type and again replace it with
4584 : _7 = .ADD_OVERFLOW (y, z);
4585 : _9 = REALPART_EXPR <_7>;
4586 : _8 = IMAGPART_EXPR <_7>;
4587 : if (_8)
4588 : and replace (utype) x with _9.
4589 : Or with x >> popcount (max) instead of x > max.
4590 :
4591 : Also recognize:
4592 : x = ~z;
4593 : if (y > x)
4594 : and replace it with
4595 : _7 = .ADD_OVERFLOW (y, z);
4596 : _8 = IMAGPART_EXPR <_7>;
4597 : if (_8)
4598 :
4599 : And also recognize:
4600 : z = x * y;
4601 : if (x != 0)
4602 : goto <bb 3>; [50.00%]
4603 : else
4604 : goto <bb 4>; [50.00%]
4605 :
4606 : <bb 3> [local count: 536870913]:
4607 : _2 = z / x;
4608 : _9 = _2 != y;
4609 : _10 = (int) _9;
4610 :
4611 : <bb 4> [local count: 1073741824]:
4612 : # iftmp.0_3 = PHI <_10(3), 0(2)>
4613 : and replace it with
4614 : _7 = .MUL_OVERFLOW (x, y);
4615 : z = IMAGPART_EXPR <_7>;
4616 : _8 = IMAGPART_EXPR <_7>;
4617 : _9 = _8 != 0;
4618 : iftmp.0_3 = (int) _9; */
4619 :
4620 : static bool
4621 3290802 : match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
4622 : enum tree_code code, bool *cfg_changed)
4623 : {
4624 3290802 : tree lhs = gimple_assign_lhs (stmt);
4625 3290802 : tree type = TREE_TYPE (lhs);
4626 3290802 : use_operand_p use_p;
4627 3290802 : imm_use_iterator iter;
4628 3290802 : bool use_seen = false;
4629 3290802 : bool ovf_use_seen = false;
4630 3290802 : gimple *use_stmt;
4631 3290802 : gimple *add_stmt = NULL;
4632 3290802 : bool add_first = false;
4633 3290802 : gimple *cond_stmt = NULL;
4634 3290802 : gimple *cast_stmt = NULL;
4635 3290802 : tree cast_lhs = NULL_TREE;
4636 :
4637 3290802 : gcc_checking_assert (code == PLUS_EXPR
4638 : || code == MINUS_EXPR
4639 : || code == MULT_EXPR
4640 : || code == BIT_NOT_EXPR);
4641 3290802 : if (!INTEGRAL_TYPE_P (type)
4642 2770102 : || !TYPE_UNSIGNED (type)
4643 1929743 : || has_zero_uses (lhs)
4644 3290802 : || (code != PLUS_EXPR
4645 1929396 : && code != MULT_EXPR
4646 171308 : && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
4647 147248 : TYPE_MODE (type)) == CODE_FOR_nothing))
4648 1363397 : return false;
4649 :
4650 1927405 : tree rhs1 = gimple_assign_rhs1 (stmt);
4651 1927405 : tree rhs2 = gimple_assign_rhs2 (stmt);
4652 7263572 : FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4653 : {
4654 3415182 : use_stmt = USE_STMT (use_p);
4655 3415182 : if (is_gimple_debug (use_stmt))
4656 579767 : continue;
4657 :
4658 2835415 : tree other = NULL_TREE;
4659 2835415 : if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
4660 : {
4661 6906 : if (code == BIT_NOT_EXPR)
4662 : {
4663 187 : gcc_assert (other);
4664 187 : if (TREE_CODE (other) != SSA_NAME)
4665 0 : return false;
4666 187 : if (rhs2 == NULL)
4667 187 : rhs2 = other;
4668 : else
4669 : return false;
4670 187 : cond_stmt = use_stmt;
4671 : }
4672 : ovf_use_seen = true;
4673 : }
4674 : else
4675 : {
4676 2828509 : use_seen = true;
4677 2828509 : if (code == MULT_EXPR
4678 2828509 : && cast_stmt == NULL
4679 2828509 : && gimple_assign_cast_p (use_stmt))
4680 : {
4681 33041 : cast_lhs = gimple_assign_lhs (use_stmt);
4682 66082 : if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
4683 32493 : && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
4684 62197 : && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
4685 29156 : == TYPE_PRECISION (TREE_TYPE (lhs))))
4686 : cast_stmt = use_stmt;
4687 : else
4688 : cast_lhs = NULL_TREE;
4689 : }
4690 : }
4691 2835415 : if (ovf_use_seen && use_seen)
4692 : break;
4693 0 : }
4694 :
4695 1927405 : if (!ovf_use_seen
4696 1927405 : && code == MULT_EXPR
4697 449559 : && cast_stmt)
4698 : {
4699 28788 : if (TREE_CODE (rhs1) != SSA_NAME
4700 28788 : || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
4701 : return false;
4702 94556 : FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
4703 : {
4704 36980 : use_stmt = USE_STMT (use_p);
4705 36980 : if (is_gimple_debug (use_stmt))
4706 1397 : continue;
4707 :
4708 35583 : if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4709 : NULL_TREE, NULL))
4710 36980 : ovf_use_seen = true;
4711 28788 : }
4712 28788 : }
4713 : else
4714 : {
4715 : cast_stmt = NULL;
4716 : cast_lhs = NULL_TREE;
4717 : }
4718 :
4719 1927405 : tree maxval = NULL_TREE;
4720 1927405 : if (!ovf_use_seen
4721 13814 : || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
4722 6538 : || (code == PLUS_EXPR
4723 6268 : && optab_handler (uaddv4_optab,
4724 6268 : TYPE_MODE (type)) == CODE_FOR_nothing)
4725 1940924 : || (code == MULT_EXPR
4726 221 : && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
4727 148 : TYPE_MODE (type)) == CODE_FOR_nothing
4728 3 : && (use_seen
4729 3 : || cast_stmt
4730 0 : || !can_mult_highpart_p (TYPE_MODE (type), true))))
4731 : {
4732 1920722 : if (code != PLUS_EXPR)
4733 : return false;
4734 1326248 : if (TREE_CODE (rhs1) != SSA_NAME
4735 1326248 : || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
4736 : return false;
4737 322651 : rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4738 322651 : tree type1 = TREE_TYPE (rhs1);
4739 322651 : if (!INTEGRAL_TYPE_P (type1)
4740 174946 : || !TYPE_UNSIGNED (type1)
4741 32319 : || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4742 336913 : || (TYPE_PRECISION (type1)
4743 28524 : != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4744 312865 : return false;
4745 9786 : if (TREE_CODE (rhs2) == INTEGER_CST)
4746 : {
4747 3995 : if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4748 3995 : TYPE_PRECISION (type1),
4749 7990 : UNSIGNED), 0))
4750 : return false;
4751 1475 : rhs2 = fold_convert (type1, rhs2);
4752 : }
4753 : else
4754 : {
4755 5791 : if (TREE_CODE (rhs2) != SSA_NAME
4756 5791 : || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4757 : return false;
4758 2427 : rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4759 2427 : tree type2 = TREE_TYPE (rhs2);
4760 2427 : if (!INTEGRAL_TYPE_P (type2)
4761 1210 : || !TYPE_UNSIGNED (type2)
4762 387 : || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4763 2786 : || (TYPE_PRECISION (type2)
4764 718 : != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4765 2082 : return false;
4766 : }
4767 1820 : if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4768 : type = type1;
4769 : else
4770 5 : type = TREE_TYPE (rhs2);
4771 :
4772 1820 : if (TREE_CODE (type) != INTEGER_TYPE
4773 3640 : || optab_handler (uaddv4_optab,
4774 1820 : TYPE_MODE (type)) == CODE_FOR_nothing)
4775 0 : return false;
4776 :
4777 1820 : maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4778 : UNSIGNED));
4779 1820 : ovf_use_seen = false;
4780 1820 : use_seen = false;
4781 1820 : basic_block use_bb = NULL;
4782 3823 : FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4783 : {
4784 1943 : use_stmt = USE_STMT (use_p);
4785 1943 : if (is_gimple_debug (use_stmt))
4786 119 : continue;
4787 :
4788 1824 : if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4789 : {
4790 12 : ovf_use_seen = true;
4791 12 : use_bb = gimple_bb (use_stmt);
4792 : }
4793 : else
4794 : {
4795 1812 : if (!gimple_assign_cast_p (use_stmt)
4796 1812 : || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4797 : return false;
4798 114 : tree use_lhs = gimple_assign_lhs (use_stmt);
4799 228 : if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4800 228 : || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4801 114 : > TYPE_PRECISION (type)))
4802 : return false;
4803 : use_seen = true;
4804 : }
4805 1760 : }
4806 60 : if (!ovf_use_seen)
4807 : return false;
4808 12 : if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4809 : {
4810 2 : if (!use_seen)
4811 : return false;
4812 2 : tree new_rhs1 = make_ssa_name (type);
4813 2 : gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4814 2 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4815 2 : rhs1 = new_rhs1;
4816 : }
4817 10 : else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4818 : {
4819 2 : if (!use_seen)
4820 : return false;
4821 2 : tree new_rhs2 = make_ssa_name (type);
4822 2 : gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4823 2 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4824 2 : rhs2 = new_rhs2;
4825 : }
4826 8 : else if (!use_seen)
4827 : {
4828 : /* If there are no uses of the wider addition, check if
4829 : forwprop has not created a narrower addition.
4830 : Require it to be in the same bb as the overflow check. */
4831 18 : FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4832 : {
4833 10 : use_stmt = USE_STMT (use_p);
4834 10 : if (is_gimple_debug (use_stmt))
4835 0 : continue;
4836 :
4837 10 : if (use_stmt == stmt)
4838 0 : continue;
4839 :
4840 10 : if (!is_gimple_assign (use_stmt)
4841 10 : || gimple_bb (use_stmt) != use_bb
4842 20 : || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4843 2 : continue;
4844 :
4845 8 : if (gimple_assign_rhs1 (use_stmt) == rhs1)
4846 : {
4847 8 : if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4848 : rhs2, 0))
4849 0 : continue;
4850 : }
4851 0 : else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4852 : {
4853 0 : if (gimple_assign_rhs1 (use_stmt) != rhs2)
4854 0 : continue;
4855 : }
4856 : else
4857 0 : continue;
4858 :
4859 8 : add_stmt = use_stmt;
4860 8 : break;
4861 8 : }
4862 8 : if (add_stmt == NULL)
4863 : return false;
4864 :
4865 : /* If stmt and add_stmt are in the same bb, we need to find out
4866 : which one is earlier. If they are in different bbs, we've
4867 : checked add_stmt is in the same bb as one of the uses of the
4868 : stmt lhs, so stmt needs to dominate add_stmt too. */
4869 8 : if (gimple_bb (stmt) == gimple_bb (add_stmt))
4870 : {
4871 8 : gimple_stmt_iterator gsif = *gsi;
4872 8 : gimple_stmt_iterator gsib = *gsi;
4873 8 : int i;
4874 : /* Search both forward and backward from stmt and have a small
4875 : upper bound. */
4876 20 : for (i = 0; i < 128; i++)
4877 : {
4878 20 : if (!gsi_end_p (gsib))
4879 : {
4880 18 : gsi_prev_nondebug (&gsib);
4881 18 : if (gsi_stmt (gsib) == add_stmt)
4882 : {
4883 : add_first = true;
4884 : break;
4885 : }
4886 : }
4887 2 : else if (gsi_end_p (gsif))
4888 : break;
4889 18 : if (!gsi_end_p (gsif))
4890 : {
4891 18 : gsi_next_nondebug (&gsif);
4892 18 : if (gsi_stmt (gsif) == add_stmt)
4893 : break;
4894 : }
4895 : }
4896 8 : if (i == 128)
4897 0 : return false;
4898 8 : if (add_first)
4899 2 : *gsi = gsi_for_stmt (add_stmt);
4900 : }
4901 : }
4902 : }
4903 :
4904 6695 : if (code == BIT_NOT_EXPR)
4905 170 : *gsi = gsi_for_stmt (cond_stmt);
4906 :
4907 6695 : auto_vec<gimple *, 8> mul_stmts;
4908 6695 : if (code == MULT_EXPR && cast_stmt)
4909 : {
4910 75 : type = TREE_TYPE (cast_lhs);
4911 75 : gimple *g = SSA_NAME_DEF_STMT (rhs1);
4912 75 : if (gimple_assign_cast_p (g)
4913 38 : && useless_type_conversion_p (type,
4914 38 : TREE_TYPE (gimple_assign_rhs1 (g)))
4915 113 : && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4916 : rhs1 = gimple_assign_rhs1 (g);
4917 : else
4918 : {
4919 37 : g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4920 37 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4921 37 : rhs1 = gimple_assign_lhs (g);
4922 37 : mul_stmts.quick_push (g);
4923 : }
4924 75 : if (TREE_CODE (rhs2) == INTEGER_CST)
4925 32 : rhs2 = fold_convert (type, rhs2);
4926 : else
4927 : {
4928 43 : g = SSA_NAME_DEF_STMT (rhs2);
4929 43 : if (gimple_assign_cast_p (g)
4930 22 : && useless_type_conversion_p (type,
4931 22 : TREE_TYPE (gimple_assign_rhs1 (g)))
4932 65 : && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4933 : rhs2 = gimple_assign_rhs1 (g);
4934 : else
4935 : {
4936 21 : g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4937 21 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4938 21 : rhs2 = gimple_assign_lhs (g);
4939 21 : mul_stmts.quick_push (g);
4940 : }
4941 : }
4942 : }
4943 6695 : tree ctype = build_complex_type (type);
4944 13245 : gcall *g = gimple_build_call_internal (code == MULT_EXPR
4945 : ? IFN_MUL_OVERFLOW
4946 : : code != MINUS_EXPR
4947 6550 : ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4948 : 2, rhs1, rhs2);
4949 6695 : tree ctmp = make_ssa_name (ctype);
4950 6695 : gimple_call_set_lhs (g, ctmp);
4951 6695 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
4952 6695 : tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4953 6695 : gassign *g2;
4954 6695 : if (code != BIT_NOT_EXPR)
4955 : {
4956 6525 : g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4957 : build1 (REALPART_EXPR, type, ctmp));
4958 6525 : if (maxval || cast_stmt)
4959 : {
4960 87 : gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4961 87 : if (add_first)
4962 2 : *gsi = gsi_for_stmt (stmt);
4963 : }
4964 : else
4965 6438 : gsi_replace (gsi, g2, true);
4966 6525 : if (code == MULT_EXPR)
4967 : {
4968 145 : mul_stmts.quick_push (g);
4969 145 : mul_stmts.quick_push (g2);
4970 145 : if (cast_stmt)
4971 : {
4972 75 : g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4973 75 : gsi_replace (gsi, g2, true);
4974 75 : mul_stmts.quick_push (g2);
4975 : }
4976 : }
4977 : }
4978 6695 : tree ovf = make_ssa_name (type);
4979 6695 : g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4980 : build1 (IMAGPART_EXPR, type, ctmp));
4981 6695 : if (code != BIT_NOT_EXPR)
4982 6525 : gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4983 : else
4984 170 : gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4985 6695 : if (code == MULT_EXPR)
4986 145 : mul_stmts.quick_push (g2);
4987 :
4988 36277 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4989 : {
4990 22962 : if (is_gimple_debug (use_stmt))
4991 5577 : continue;
4992 :
4993 17385 : gimple *orig_use_stmt = use_stmt;
4994 17385 : int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4995 : maxval, NULL);
4996 17385 : if (ovf_use == 0)
4997 : {
4998 10656 : gcc_assert (code != BIT_NOT_EXPR);
4999 10656 : if (maxval)
5000 : {
5001 4 : tree use_lhs = gimple_assign_lhs (use_stmt);
5002 4 : gimple_assign_set_rhs1 (use_stmt, new_lhs);
5003 4 : if (useless_type_conversion_p (TREE_TYPE (use_lhs),
5004 4 : TREE_TYPE (new_lhs)))
5005 4 : gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
5006 4 : update_stmt (use_stmt);
5007 : }
5008 10656 : continue;
5009 10656 : }
5010 6729 : if (gimple_code (use_stmt) == GIMPLE_COND)
5011 : {
5012 4490 : gcond *cond_stmt = as_a <gcond *> (use_stmt);
5013 4490 : gimple_cond_set_lhs (cond_stmt, ovf);
5014 4490 : gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
5015 4640 : gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
5016 : }
5017 : else
5018 : {
5019 2239 : gcc_checking_assert (is_gimple_assign (use_stmt));
5020 2239 : if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
5021 : {
5022 2239 : if (gimple_assign_rhs_code (use_stmt) == RSHIFT_EXPR)
5023 : {
5024 6 : g2 = gimple_build_assign (make_ssa_name (boolean_type_node),
5025 : ovf_use == 1 ? NE_EXPR : EQ_EXPR,
5026 : ovf, build_int_cst (type, 0));
5027 6 : gimple_stmt_iterator gsiu = gsi_for_stmt (use_stmt);
5028 6 : gsi_insert_before (&gsiu, g2, GSI_SAME_STMT);
5029 6 : gimple_assign_set_rhs_with_ops (&gsiu, NOP_EXPR,
5030 : gimple_assign_lhs (g2));
5031 6 : update_stmt (use_stmt);
5032 6 : use_operand_p use;
5033 6 : single_imm_use (gimple_assign_lhs (use_stmt), &use,
5034 : &use_stmt);
5035 6 : if (gimple_code (use_stmt) == GIMPLE_COND)
5036 : {
5037 0 : gcond *cond_stmt = as_a <gcond *> (use_stmt);
5038 0 : gimple_cond_set_lhs (cond_stmt, ovf);
5039 0 : gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
5040 : }
5041 : else
5042 : {
5043 6 : gcc_checking_assert (is_gimple_assign (use_stmt));
5044 6 : if (gimple_assign_rhs_class (use_stmt)
5045 : == GIMPLE_BINARY_RHS)
5046 : {
5047 0 : gimple_assign_set_rhs1 (use_stmt, ovf);
5048 0 : gimple_assign_set_rhs2 (use_stmt,
5049 : build_int_cst (type, 0));
5050 : }
5051 6 : else if (gimple_assign_cast_p (use_stmt))
5052 6 : gimple_assign_set_rhs1 (use_stmt, ovf);
5053 : else
5054 : {
5055 0 : tree_code sc = gimple_assign_rhs_code (use_stmt);
5056 0 : gcc_checking_assert (sc == COND_EXPR);
5057 0 : tree cond = gimple_assign_rhs1 (use_stmt);
5058 0 : cond = build2 (TREE_CODE (cond),
5059 : boolean_type_node, ovf,
5060 : build_int_cst (type, 0));
5061 0 : gimple_assign_set_rhs1 (use_stmt, cond);
5062 : }
5063 : }
5064 6 : update_stmt (use_stmt);
5065 6 : gsi_remove (&gsiu, true);
5066 6 : gsiu = gsi_for_stmt (g2);
5067 6 : gsi_remove (&gsiu, true);
5068 6 : continue;
5069 6 : }
5070 : else
5071 : {
5072 2233 : gimple_assign_set_rhs1 (use_stmt, ovf);
5073 2233 : gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
5074 2382 : gimple_assign_set_rhs_code (use_stmt,
5075 : ovf_use == 1
5076 : ? NE_EXPR : EQ_EXPR);
5077 : }
5078 : }
5079 : else
5080 : {
5081 0 : gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
5082 : == COND_EXPR);
5083 0 : tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
5084 : boolean_type_node, ovf,
5085 : build_int_cst (type, 0));
5086 0 : gimple_assign_set_rhs1 (use_stmt, cond);
5087 : }
5088 : }
5089 6723 : update_stmt (use_stmt);
5090 6723 : if (code == MULT_EXPR && use_stmt != orig_use_stmt)
5091 : {
5092 145 : gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
5093 145 : maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
5094 : cfg_changed);
5095 145 : use_operand_p use;
5096 145 : gimple *cast_stmt;
5097 145 : if (single_imm_use (gimple_assign_lhs (orig_use_stmt), &use,
5098 : &cast_stmt)
5099 145 : && gimple_assign_cast_p (cast_stmt))
5100 : {
5101 2 : gimple_stmt_iterator gsi3 = gsi_for_stmt (cast_stmt);
5102 2 : gsi_remove (&gsi3, true);
5103 2 : release_ssa_name (gimple_assign_lhs (cast_stmt));
5104 : }
5105 145 : gsi_remove (&gsi2, true);
5106 145 : release_ssa_name (gimple_assign_lhs (orig_use_stmt));
5107 : }
5108 6695 : }
5109 6695 : if (maxval)
5110 : {
5111 12 : gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
5112 12 : gsi_remove (&gsi2, true);
5113 12 : if (add_stmt)
5114 : {
5115 8 : gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
5116 : new_lhs);
5117 8 : gsi2 = gsi_for_stmt (add_stmt);
5118 8 : gsi_replace (&gsi2, g, true);
5119 : }
5120 : }
5121 6683 : else if (code == BIT_NOT_EXPR)
5122 : {
5123 170 : *gsi = gsi_for_stmt (stmt);
5124 170 : gsi_remove (gsi, true);
5125 170 : release_ssa_name (lhs);
5126 170 : return true;
5127 : }
5128 : return false;
5129 6695 : }
5130 :
5131 : /* Helper of match_uaddc_usubc. Look through an integral cast
5132 : which should preserve [0, 1] range value (unless source has
5133 : 1-bit signed type) and the cast has single use. */
5134 :
5135 : static gimple *
5136 2039847 : uaddc_cast (gimple *g)
5137 : {
5138 2039847 : if (!gimple_assign_cast_p (g))
5139 : return g;
5140 490022 : tree op = gimple_assign_rhs1 (g);
5141 490022 : if (TREE_CODE (op) == SSA_NAME
5142 413722 : && INTEGRAL_TYPE_P (TREE_TYPE (op))
5143 289469 : && (TYPE_PRECISION (TREE_TYPE (op)) > 1
5144 5120 : || TYPE_UNSIGNED (TREE_TYPE (op)))
5145 779491 : && has_single_use (gimple_assign_lhs (g)))
5146 175873 : return SSA_NAME_DEF_STMT (op);
5147 : return g;
5148 : }
5149 :
5150 : /* Helper of match_uaddc_usubc. Look through a NE_EXPR
5151 : comparison with 0 which also preserves [0, 1] value range. */
5152 :
5153 : static gimple *
5154 2040006 : uaddc_ne0 (gimple *g)
5155 : {
5156 2040006 : if (is_gimple_assign (g)
5157 1246867 : && gimple_assign_rhs_code (g) == NE_EXPR
5158 53737 : && integer_zerop (gimple_assign_rhs2 (g))
5159 5372 : && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
5160 2045378 : && has_single_use (gimple_assign_lhs (g)))
5161 5103 : return SSA_NAME_DEF_STMT (gimple_assign_rhs1 (g));
5162 : return g;
5163 : }
5164 :
5165 : /* Return true if G is {REAL,IMAG}PART_EXPR PART with SSA_NAME
5166 : operand. */
5167 :
5168 : static bool
5169 2040843 : uaddc_is_cplxpart (gimple *g, tree_code part)
5170 : {
5171 2040843 : return (is_gimple_assign (g)
5172 1246319 : && gimple_assign_rhs_code (g) == part
5173 2043170 : && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (g), 0)) == SSA_NAME);
5174 : }
5175 :
5176 : /* Try to match e.g.
5177 : _29 = .ADD_OVERFLOW (_3, _4);
5178 : _30 = REALPART_EXPR <_29>;
5179 : _31 = IMAGPART_EXPR <_29>;
5180 : _32 = .ADD_OVERFLOW (_30, _38);
5181 : _33 = REALPART_EXPR <_32>;
5182 : _34 = IMAGPART_EXPR <_32>;
5183 : _35 = _31 + _34;
5184 : as
5185 : _36 = .UADDC (_3, _4, _38);
5186 : _33 = REALPART_EXPR <_36>;
5187 : _35 = IMAGPART_EXPR <_36>;
5188 : or
5189 : _22 = .SUB_OVERFLOW (_6, _5);
5190 : _23 = REALPART_EXPR <_22>;
5191 : _24 = IMAGPART_EXPR <_22>;
5192 : _25 = .SUB_OVERFLOW (_23, _37);
5193 : _26 = REALPART_EXPR <_25>;
5194 : _27 = IMAGPART_EXPR <_25>;
5195 : _28 = _24 | _27;
5196 : as
5197 : _29 = .USUBC (_6, _5, _37);
5198 : _26 = REALPART_EXPR <_29>;
5199 : _288 = IMAGPART_EXPR <_29>;
5200 : provided _38 or _37 above have [0, 1] range
5201 : and _3, _4 and _30 or _6, _5 and _23 are unsigned
5202 : integral types with the same precision. Whether + or | or ^ is
5203 : used on the IMAGPART_EXPR results doesn't matter, with one of
5204 : added or subtracted operands in [0, 1] range at most one
5205 : .ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow. */
5206 :
5207 : static bool
5208 2753475 : match_uaddc_usubc (gimple_stmt_iterator *gsi, gimple *stmt, tree_code code)
5209 : {
5210 2753475 : tree rhs[4];
5211 2753475 : rhs[0] = gimple_assign_rhs1 (stmt);
5212 2753475 : rhs[1] = gimple_assign_rhs2 (stmt);
5213 2753475 : rhs[2] = NULL_TREE;
5214 2753475 : rhs[3] = NULL_TREE;
5215 2753475 : tree type = TREE_TYPE (rhs[0]);
5216 2753475 : if (!INTEGRAL_TYPE_P (type) || !TYPE_UNSIGNED (type))
5217 : return false;
5218 :
5219 1614203 : auto_vec<gimple *, 2> temp_stmts;
5220 1614203 : if (code != BIT_IOR_EXPR && code != BIT_XOR_EXPR)
5221 : {
5222 : /* If overflow flag is ignored on the MSB limb, we can end up with
5223 : the most significant limb handled as r = op1 + op2 + ovf1 + ovf2;
5224 : or r = op1 - op2 - ovf1 - ovf2; or various equivalent expressions
5225 : thereof. Handle those like the ovf = ovf1 + ovf2; case to recognize
5226 : the limb below the MSB, but also create another .UADDC/.USUBC call
5227 : for the last limb.
5228 :
5229 : First look through assignments with the same rhs code as CODE,
5230 : with the exception that subtraction of a constant is canonicalized
5231 : into addition of its negation. rhs[0] will be minuend for
5232 : subtractions and one of addends for addition, all other assigned
5233 : rhs[i] operands will be subtrahends or other addends. */
5234 1494452 : while (TREE_CODE (rhs[0]) == SSA_NAME && !rhs[3])
5235 : {
5236 1466375 : gimple *g = SSA_NAME_DEF_STMT (rhs[0]);
5237 1466375 : if (has_single_use (rhs[0])
5238 485624 : && is_gimple_assign (g)
5239 1893978 : && (gimple_assign_rhs_code (g) == code
5240 395713 : || (code == MINUS_EXPR
5241 52554 : && gimple_assign_rhs_code (g) == PLUS_EXPR
5242 16773 : && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST)))
5243 : {
5244 45000 : tree r2 = gimple_assign_rhs2 (g);
5245 45000 : if (gimple_assign_rhs_code (g) != code)
5246 : {
5247 13110 : r2 = const_unop (NEGATE_EXPR, TREE_TYPE (r2), r2);
5248 13110 : if (!r2)
5249 : break;
5250 : }
5251 45000 : rhs[0] = gimple_assign_rhs1 (g);
5252 45000 : tree &r = rhs[2] ? rhs[3] : rhs[2];
5253 45000 : r = r2;
5254 45000 : temp_stmts.quick_push (g);
5255 : }
5256 : else
5257 : break;
5258 : }
5259 4348356 : for (int i = 1; i <= 2; ++i)
5260 2943432 : while (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME && !rhs[3])
5261 : {
5262 529815 : gimple *g = SSA_NAME_DEF_STMT (rhs[i]);
5263 529815 : if (has_single_use (rhs[i])
5264 267963 : && is_gimple_assign (g)
5265 780861 : && gimple_assign_rhs_code (g) == PLUS_EXPR)
5266 : {
5267 44528 : rhs[i] = gimple_assign_rhs1 (g);
5268 44528 : if (rhs[2])
5269 9332 : rhs[3] = gimple_assign_rhs2 (g);
5270 : else
5271 35196 : rhs[2] = gimple_assign_rhs2 (g);
5272 44528 : temp_stmts.quick_push (g);
5273 : }
5274 : else
5275 : break;
5276 : }
5277 : /* If there are just 3 addends or one minuend and two subtrahends,
5278 : check for UADDC or USUBC being pattern recognized earlier.
5279 : Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part
5280 : got pattern matched earlier as __imag__ .UADDC (arg1, arg2, arg3)
5281 : etc. */
5282 1449452 : if (rhs[2] && !rhs[3])
5283 : {
5284 300608 : for (int i = (code == MINUS_EXPR ? 1 : 0); i < 3; ++i)
5285 174691 : if (TREE_CODE (rhs[i]) == SSA_NAME)
5286 : {
5287 136791 : gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
5288 136791 : im = uaddc_ne0 (im);
5289 136791 : if (uaddc_is_cplxpart (im, IMAGPART_EXPR))
5290 : {
5291 : /* We found one of the 3 addends or 2 subtrahends to be
5292 : __imag__ of something, verify it is .UADDC/.USUBC. */
5293 217 : tree rhs1 = gimple_assign_rhs1 (im);
5294 217 : gimple *ovf = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs1, 0));
5295 217 : tree ovf_lhs = NULL_TREE;
5296 217 : tree ovf_arg1 = NULL_TREE, ovf_arg2 = NULL_TREE;
5297 237 : if (gimple_call_internal_p (ovf, code == PLUS_EXPR
5298 : ? IFN_ADD_OVERFLOW
5299 : : IFN_SUB_OVERFLOW))
5300 : {
5301 : /* Or verify it is .ADD_OVERFLOW/.SUB_OVERFLOW.
5302 : This is for the case of 2 chained .UADDC/.USUBC,
5303 : where the first one uses 0 carry-in and the second
5304 : one ignores the carry-out.
5305 : So, something like:
5306 : _16 = .ADD_OVERFLOW (_1, _2);
5307 : _17 = REALPART_EXPR <_16>;
5308 : _18 = IMAGPART_EXPR <_16>;
5309 : _15 = _3 + _4;
5310 : _12 = _15 + _18;
5311 : where the first 3 statements come from the lower
5312 : limb addition and the last 2 from the higher limb
5313 : which ignores carry-out. */
5314 199 : ovf_lhs = gimple_call_lhs (ovf);
5315 199 : tree ovf_lhs_type = TREE_TYPE (TREE_TYPE (ovf_lhs));
5316 199 : ovf_arg1 = gimple_call_arg (ovf, 0);
5317 199 : ovf_arg2 = gimple_call_arg (ovf, 1);
5318 : /* In that case we need to punt if the types don't
5319 : mismatch. */
5320 199 : if (!types_compatible_p (type, ovf_lhs_type)
5321 199 : || !types_compatible_p (type, TREE_TYPE (ovf_arg1))
5322 395 : || !types_compatible_p (type,
5323 196 : TREE_TYPE (ovf_arg2)))
5324 : ovf_lhs = NULL_TREE;
5325 : else
5326 : {
5327 485 : for (int i = (code == PLUS_EXPR ? 1 : 0);
5328 485 : i >= 0; --i)
5329 : {
5330 343 : tree r = gimple_call_arg (ovf, i);
5331 343 : if (TREE_CODE (r) != SSA_NAME)
5332 0 : continue;
5333 343 : if (uaddc_is_cplxpart (SSA_NAME_DEF_STMT (r),
5334 : REALPART_EXPR))
5335 : {
5336 : /* Punt if one of the args which isn't
5337 : subtracted isn't __real__; that could
5338 : then prevent better match later.
5339 : Consider:
5340 : _3 = .ADD_OVERFLOW (_1, _2);
5341 : _4 = REALPART_EXPR <_3>;
5342 : _5 = IMAGPART_EXPR <_3>;
5343 : _7 = .ADD_OVERFLOW (_4, _6);
5344 : _8 = REALPART_EXPR <_7>;
5345 : _9 = IMAGPART_EXPR <_7>;
5346 : _12 = _10 + _11;
5347 : _13 = _12 + _9;
5348 : _14 = _13 + _5;
5349 : We want to match this when called on
5350 : the last stmt as a pair of .UADDC calls,
5351 : but without this check we could turn
5352 : that prematurely on _13 = _12 + _9;
5353 : stmt into .UADDC with 0 carry-in just
5354 : on the second .ADD_OVERFLOW call and
5355 : another replacing the _12 and _13
5356 : additions. */
5357 : ovf_lhs = NULL_TREE;
5358 : break;
5359 : }
5360 : }
5361 : }
5362 192 : if (ovf_lhs)
5363 : {
5364 142 : use_operand_p use_p;
5365 142 : imm_use_iterator iter;
5366 142 : tree re_lhs = NULL_TREE;
5367 568 : FOR_EACH_IMM_USE_FAST (use_p, iter, ovf_lhs)
5368 : {
5369 284 : gimple *use_stmt = USE_STMT (use_p);
5370 284 : if (is_gimple_debug (use_stmt))
5371 0 : continue;
5372 284 : if (use_stmt == im)
5373 142 : continue;
5374 142 : if (!uaddc_is_cplxpart (use_stmt,
5375 : REALPART_EXPR))
5376 : {
5377 : ovf_lhs = NULL_TREE;
5378 : break;
5379 : }
5380 142 : re_lhs = gimple_assign_lhs (use_stmt);
5381 142 : }
5382 142 : if (ovf_lhs && re_lhs)
5383 : {
5384 520 : FOR_EACH_IMM_USE_FAST (use_p, iter, re_lhs)
5385 : {
5386 290 : gimple *use_stmt = USE_STMT (use_p);
5387 290 : if (is_gimple_debug (use_stmt))
5388 109 : continue;
5389 181 : internal_fn ifn
5390 181 : = gimple_call_internal_fn (ovf);
5391 : /* Punt if the __real__ of lhs is used
5392 : in the same .*_OVERFLOW call.
5393 : Consider:
5394 : _3 = .ADD_OVERFLOW (_1, _2);
5395 : _4 = REALPART_EXPR <_3>;
5396 : _5 = IMAGPART_EXPR <_3>;
5397 : _7 = .ADD_OVERFLOW (_4, _6);
5398 : _8 = REALPART_EXPR <_7>;
5399 : _9 = IMAGPART_EXPR <_7>;
5400 : _12 = _10 + _11;
5401 : _13 = _12 + _5;
5402 : _14 = _13 + _9;
5403 : We want to match this when called on
5404 : the last stmt as a pair of .UADDC calls,
5405 : but without this check we could turn
5406 : that prematurely on _13 = _12 + _5;
5407 : stmt into .UADDC with 0 carry-in just
5408 : on the first .ADD_OVERFLOW call and
5409 : another replacing the _12 and _13
5410 : additions. */
5411 181 : if (gimple_call_internal_p (use_stmt, ifn))
5412 : {
5413 : ovf_lhs = NULL_TREE;
5414 : break;
5415 : }
5416 142 : }
5417 : }
5418 : }
5419 : }
5420 142 : if ((ovf_lhs
5421 138 : || gimple_call_internal_p (ovf,
5422 : code == PLUS_EXPR
5423 : ? IFN_UADDC : IFN_USUBC))
5424 250 : && (optab_handler (code == PLUS_EXPR
5425 : ? uaddc5_optab : usubc5_optab,
5426 94 : TYPE_MODE (type))
5427 : != CODE_FOR_nothing))
5428 : {
5429 : /* And in that case build another .UADDC/.USUBC
5430 : call for the most significand limb addition.
5431 : Overflow bit is ignored here. */
5432 63 : if (i != 2)
5433 63 : std::swap (rhs[i], rhs[2]);
5434 63 : gimple *g
5435 77 : = gimple_build_call_internal (code == PLUS_EXPR
5436 : ? IFN_UADDC
5437 : : IFN_USUBC,
5438 : 3, rhs[0], rhs[1],
5439 : rhs[2]);
5440 63 : tree nlhs = make_ssa_name (build_complex_type (type));
5441 63 : gimple_call_set_lhs (g, nlhs);
5442 63 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5443 63 : tree ilhs = gimple_assign_lhs (stmt);
5444 63 : g = gimple_build_assign (ilhs, REALPART_EXPR,
5445 : build1 (REALPART_EXPR,
5446 63 : TREE_TYPE (ilhs),
5447 : nlhs));
5448 63 : gsi_replace (gsi, g, true);
5449 : /* And if it is initialized from result of __imag__
5450 : of .{ADD,SUB}_OVERFLOW call, replace that
5451 : call with .U{ADD,SUB}C call with the same arguments,
5452 : just 0 added as third argument. This isn't strictly
5453 : necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5454 : produce the same result, but may result in better
5455 : generated code on some targets where the backend can
5456 : better prepare in how the result will be used. */
5457 63 : if (ovf_lhs)
5458 : {
5459 57 : tree zero = build_zero_cst (type);
5460 57 : g = gimple_build_call_internal (code == PLUS_EXPR
5461 : ? IFN_UADDC
5462 : : IFN_USUBC,
5463 : 3, ovf_arg1,
5464 : ovf_arg2, zero);
5465 57 : gimple_call_set_lhs (g, ovf_lhs);
5466 57 : gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf);
5467 57 : gsi_replace (&gsi2, g, true);
5468 : }
5469 63 : return true;
5470 : }
5471 : }
5472 : }
5473 : return false;
5474 : }
5475 1386462 : if (code == MINUS_EXPR && !rhs[2])
5476 : return false;
5477 320 : if (code == MINUS_EXPR)
5478 : /* Code below expects rhs[0] and rhs[1] to have the IMAGPART_EXPRs.
5479 : So, for MINUS_EXPR swap the single added rhs operand (others are
5480 : subtracted) to rhs[3]. */
5481 320 : std::swap (rhs[0], rhs[3]);
5482 : }
5483 : /* Walk from both operands of STMT (for +/- even sometimes from
5484 : all the 4 addends or 3 subtrahends), see through casts and != 0
5485 : statements which would preserve [0, 1] range of values and
5486 : check which is initialized from __imag__. */
5487 7211755 : gimple *im1 = NULL, *im2 = NULL;
5488 14422232 : for (int i = 0; i < (code == MINUS_EXPR ? 3 : 4); i++)
5489 5769501 : if (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME)
5490 : {
5491 1902964 : gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
5492 1902964 : im = uaddc_ne0 (im);
5493 1902964 : if (uaddc_is_cplxpart (im, IMAGPART_EXPR))
5494 : {
5495 1588 : if (im1 == NULL)
5496 : {
5497 1200 : im1 = im;
5498 1200 : if (i != 0)
5499 326 : std::swap (rhs[0], rhs[i]);
5500 : }
5501 : else
5502 : {
5503 388 : im2 = im;
5504 388 : if (i != 1)
5505 22 : std::swap (rhs[1], rhs[i]);
5506 : break;
5507 : }
5508 : }
5509 : }
5510 : /* If we don't find at least two, punt. */
5511 1442642 : if (!im2)
5512 : return false;
5513 : /* Check they are __imag__ of .ADD_OVERFLOW or .SUB_OVERFLOW call results,
5514 : either both .ADD_OVERFLOW or both .SUB_OVERFLOW and that we have
5515 : uaddc5/usubc5 named pattern for the corresponding mode. */
5516 388 : gimple *ovf1
5517 388 : = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im1), 0));
5518 388 : gimple *ovf2
5519 388 : = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im2), 0));
5520 388 : internal_fn ifn;
5521 388 : if (!is_gimple_call (ovf1)
5522 388 : || !gimple_call_internal_p (ovf1)
5523 388 : || ((ifn = gimple_call_internal_fn (ovf1)) != IFN_ADD_OVERFLOW
5524 60 : && ifn != IFN_SUB_OVERFLOW)
5525 365 : || !gimple_call_internal_p (ovf2, ifn)
5526 394 : || optab_handler (ifn == IFN_ADD_OVERFLOW ? uaddc5_optab : usubc5_optab,
5527 361 : TYPE_MODE (type)) == CODE_FOR_nothing
5528 94 : || (rhs[2]
5529 17 : && optab_handler (code == PLUS_EXPR ? uaddc5_optab : usubc5_optab,
5530 15 : TYPE_MODE (type)) == CODE_FOR_nothing)
5531 94 : || !types_compatible_p (type,
5532 94 : TREE_TYPE (TREE_TYPE (gimple_call_lhs (ovf1))))
5533 481 : || !types_compatible_p (type,
5534 93 : TREE_TYPE (TREE_TYPE (gimple_call_lhs (ovf2)))))
5535 295 : return false;
5536 93 : tree arg1, arg2, arg3 = NULL_TREE;
5537 93 : gimple *re1 = NULL, *re2 = NULL;
5538 : /* On one of the two calls, one of the .ADD_OVERFLOW/.SUB_OVERFLOW arguments
5539 : should be initialized from __real__ of the other of the two calls.
5540 : Though, for .SUB_OVERFLOW, it has to be the first argument, not the
5541 : second one. */
5542 340 : for (int i = (ifn == IFN_ADD_OVERFLOW ? 1 : 0); i >= 0; --i)
5543 349 : for (gimple *ovf = ovf1; ovf; ovf = (ovf == ovf1 ? ovf2 : NULL))
5544 : {
5545 288 : tree arg = gimple_call_arg (ovf, i);
5546 288 : if (TREE_CODE (arg) != SSA_NAME)
5547 2 : continue;
5548 286 : re1 = SSA_NAME_DEF_STMT (arg);
5549 286 : if (uaddc_is_cplxpart (re1, REALPART_EXPR)
5550 379 : && (SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (re1), 0))
5551 93 : == (ovf == ovf1 ? ovf2 : ovf1)))
5552 : {
5553 93 : if (ovf == ovf1)
5554 : {
5555 : /* Make sure ovf2 is the .*_OVERFLOW call with argument
5556 : initialized from __real__ of ovf1. */
5557 20 : std::swap (rhs[0], rhs[1]);
5558 20 : std::swap (im1, im2);
5559 20 : std::swap (ovf1, ovf2);
5560 : }
5561 93 : arg3 = gimple_call_arg (ovf, 1 - i);
5562 93 : i = -1;
5563 93 : break;
5564 : }
5565 : }
5566 93 : if (!arg3)
5567 : return false;
5568 93 : arg1 = gimple_call_arg (ovf1, 0);
5569 93 : arg2 = gimple_call_arg (ovf1, 1);
5570 93 : if (!types_compatible_p (type, TREE_TYPE (arg1)))
5571 : return false;
5572 93 : int kind[2] = { 0, 0 };
5573 93 : tree arg_im[2] = { NULL_TREE, NULL_TREE };
5574 : /* At least one of arg2 and arg3 should have type compatible
5575 : with arg1/rhs[0], and the other one should have value in [0, 1]
5576 : range. If both are in [0, 1] range and type compatible with
5577 : arg1/rhs[0], try harder to find after looking through casts,
5578 : != 0 comparisons which one is initialized to __imag__ of
5579 : .{ADD,SUB}_OVERFLOW or .U{ADD,SUB}C call results. */
5580 279 : for (int i = 0; i < 2; ++i)
5581 : {
5582 186 : tree arg = i == 0 ? arg2 : arg3;
5583 186 : if (types_compatible_p (type, TREE_TYPE (arg)))
5584 161 : kind[i] = 1;
5585 372 : if (!INTEGRAL_TYPE_P (TREE_TYPE (arg))
5586 372 : || (TYPE_PRECISION (TREE_TYPE (arg)) == 1
5587 25 : && !TYPE_UNSIGNED (TREE_TYPE (arg))))
5588 0 : continue;
5589 186 : if (tree_zero_one_valued_p (arg))
5590 51 : kind[i] |= 2;
5591 186 : if (TREE_CODE (arg) == SSA_NAME)
5592 : {
5593 184 : gimple *g = SSA_NAME_DEF_STMT (arg);
5594 184 : if (gimple_assign_cast_p (g))
5595 : {
5596 30 : tree op = gimple_assign_rhs1 (g);
5597 30 : if (TREE_CODE (op) == SSA_NAME
5598 30 : && INTEGRAL_TYPE_P (TREE_TYPE (op)))
5599 30 : g = SSA_NAME_DEF_STMT (op);
5600 : }
5601 184 : g = uaddc_ne0 (g);
5602 184 : if (!uaddc_is_cplxpart (g, IMAGPART_EXPR))
5603 124 : continue;
5604 60 : arg_im[i] = gimple_assign_lhs (g);
5605 60 : g = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g), 0));
5606 60 : if (!is_gimple_call (g) || !gimple_call_internal_p (g))
5607 0 : continue;
5608 60 : switch (gimple_call_internal_fn (g))
5609 : {
5610 60 : case IFN_ADD_OVERFLOW:
5611 60 : case IFN_SUB_OVERFLOW:
5612 60 : case IFN_UADDC:
5613 60 : case IFN_USUBC:
5614 60 : break;
5615 0 : default:
5616 0 : continue;
5617 : }
5618 60 : kind[i] |= 4;
5619 : }
5620 : }
5621 : /* Make arg2 the one with compatible type and arg3 the one
5622 : with [0, 1] range. If both is true for both operands,
5623 : prefer as arg3 result of __imag__ of some ifn. */
5624 93 : if ((kind[0] & 1) == 0 || ((kind[1] & 1) != 0 && kind[0] > kind[1]))
5625 : {
5626 1 : std::swap (arg2, arg3);
5627 1 : std::swap (kind[0], kind[1]);
5628 1 : std::swap (arg_im[0], arg_im[1]);
5629 : }
5630 93 : if ((kind[0] & 1) == 0 || (kind[1] & 6) == 0)
5631 : return false;
5632 69 : if (!has_single_use (gimple_assign_lhs (im1))
5633 67 : || !has_single_use (gimple_assign_lhs (im2))
5634 67 : || !has_single_use (gimple_assign_lhs (re1))
5635 136 : || num_imm_uses (gimple_call_lhs (ovf1)) != 2)
5636 : return false;
5637 : /* Check that ovf2's result is used in __real__ and set re2
5638 : to that statement. */
5639 67 : use_operand_p use_p;
5640 67 : imm_use_iterator iter;
5641 67 : tree lhs = gimple_call_lhs (ovf2);
5642 267 : FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
5643 : {
5644 133 : gimple *use_stmt = USE_STMT (use_p);
5645 133 : if (is_gimple_debug (use_stmt))
5646 0 : continue;
5647 133 : if (use_stmt == im2)
5648 67 : continue;
5649 66 : if (re2)
5650 : return false;
5651 66 : if (!uaddc_is_cplxpart (use_stmt, REALPART_EXPR))
5652 : return false;
5653 : re2 = use_stmt;
5654 0 : }
5655 : /* Build .UADDC/.USUBC call which will be placed before the stmt. */
5656 67 : gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf2);
5657 67 : gimple *g;
5658 67 : if ((kind[1] & 4) != 0 && types_compatible_p (type, TREE_TYPE (arg_im[1])))
5659 : arg3 = arg_im[1];
5660 67 : if ((kind[1] & 1) == 0)
5661 : {
5662 25 : if (TREE_CODE (arg3) == INTEGER_CST)
5663 0 : arg3 = fold_convert (type, arg3);
5664 : else
5665 : {
5666 25 : g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, arg3);
5667 25 : gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5668 25 : arg3 = gimple_assign_lhs (g);
5669 : }
5670 : }
5671 89 : g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
5672 : ? IFN_UADDC : IFN_USUBC,
5673 : 3, arg1, arg2, arg3);
5674 67 : tree nlhs = make_ssa_name (TREE_TYPE (lhs));
5675 67 : gimple_call_set_lhs (g, nlhs);
5676 67 : gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5677 : /* In the case where stmt is | or ^ of two overflow flags
5678 : or addition of those, replace stmt with __imag__ of the above
5679 : added call. In case of arg1 + arg2 + (ovf1 + ovf2) or
5680 : arg1 - arg2 - (ovf1 + ovf2) just emit it before stmt. */
5681 67 : tree ilhs = rhs[2] ? make_ssa_name (type) : gimple_assign_lhs (stmt);
5682 67 : g = gimple_build_assign (ilhs, IMAGPART_EXPR,
5683 67 : build1 (IMAGPART_EXPR, TREE_TYPE (ilhs), nlhs));
5684 67 : if (rhs[2])
5685 : {
5686 15 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5687 : /* Remove some further statements which can't be kept in the IL because
5688 : they can use SSA_NAMEs whose setter is going to be removed too. */
5689 75 : for (gimple *g2 : temp_stmts)
5690 : {
5691 30 : gsi2 = gsi_for_stmt (g2);
5692 30 : gsi_remove (&gsi2, true);
5693 30 : release_defs (g2);
5694 : }
5695 : }
5696 : else
5697 52 : gsi_replace (gsi, g, true);
5698 : /* Remove some statements which can't be kept in the IL because they
5699 : use SSA_NAME whose setter is going to be removed too. */
5700 67 : tree rhs1 = rhs[1];
5701 103 : for (int i = 0; i < 2; i++)
5702 85 : if (rhs1 == gimple_assign_lhs (im2))
5703 : break;
5704 : else
5705 : {
5706 36 : g = SSA_NAME_DEF_STMT (rhs1);
5707 36 : rhs1 = gimple_assign_rhs1 (g);
5708 36 : gsi2 = gsi_for_stmt (g);
5709 36 : gsi_remove (&gsi2, true);
5710 36 : release_defs (g);
5711 : }
5712 67 : gcc_checking_assert (rhs1 == gimple_assign_lhs (im2));
5713 67 : gsi2 = gsi_for_stmt (im2);
5714 67 : gsi_remove (&gsi2, true);
5715 67 : release_defs (im2);
5716 : /* Replace the re2 statement with __real__ of the newly added
5717 : .UADDC/.USUBC call. */
5718 67 : if (re2)
5719 : {
5720 66 : gsi2 = gsi_for_stmt (re2);
5721 66 : tree rlhs = gimple_assign_lhs (re2);
5722 66 : g = gimple_build_assign (rlhs, REALPART_EXPR,
5723 66 : build1 (REALPART_EXPR, TREE_TYPE (rlhs), nlhs));
5724 66 : gsi_replace (&gsi2, g, true);
5725 : }
5726 67 : if (rhs[2])
5727 : {
5728 : /* If this is the arg1 + arg2 + (ovf1 + ovf2) or
5729 : arg1 - arg2 - (ovf1 + ovf2) case for the most significant limb,
5730 : replace stmt with __real__ of another .UADDC/.USUBC call which
5731 : handles the most significant limb. Overflow flag from this is
5732 : ignored. */
5733 17 : g = gimple_build_call_internal (code == PLUS_EXPR
5734 : ? IFN_UADDC : IFN_USUBC,
5735 : 3, rhs[3], rhs[2], ilhs);
5736 15 : nlhs = make_ssa_name (TREE_TYPE (lhs));
5737 15 : gimple_call_set_lhs (g, nlhs);
5738 15 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5739 15 : ilhs = gimple_assign_lhs (stmt);
5740 15 : g = gimple_build_assign (ilhs, REALPART_EXPR,
5741 15 : build1 (REALPART_EXPR, TREE_TYPE (ilhs), nlhs));
5742 15 : gsi_replace (gsi, g, true);
5743 : }
5744 67 : if (TREE_CODE (arg3) == SSA_NAME)
5745 : {
5746 : /* When pattern recognizing the second least significant limb
5747 : above (i.e. first pair of .{ADD,SUB}_OVERFLOW calls for one limb),
5748 : check if the [0, 1] range argument (i.e. carry in) isn't the
5749 : result of another .{ADD,SUB}_OVERFLOW call (one handling the
5750 : least significant limb). Again look through casts and != 0. */
5751 67 : gimple *im3 = SSA_NAME_DEF_STMT (arg3);
5752 92 : for (int i = 0; i < 2; ++i)
5753 : {
5754 92 : gimple *im4 = uaddc_cast (im3);
5755 92 : if (im4 == im3)
5756 : break;
5757 : else
5758 25 : im3 = im4;
5759 : }
5760 67 : im3 = uaddc_ne0 (im3);
5761 67 : if (uaddc_is_cplxpart (im3, IMAGPART_EXPR))
5762 : {
5763 60 : gimple *ovf3
5764 60 : = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im3), 0));
5765 60 : if (gimple_call_internal_p (ovf3, ifn))
5766 : {
5767 25 : lhs = gimple_call_lhs (ovf3);
5768 25 : arg1 = gimple_call_arg (ovf3, 0);
5769 25 : arg2 = gimple_call_arg (ovf3, 1);
5770 25 : if (types_compatible_p (type, TREE_TYPE (TREE_TYPE (lhs)))
5771 25 : && types_compatible_p (type, TREE_TYPE (arg1))
5772 50 : && types_compatible_p (type, TREE_TYPE (arg2)))
5773 : {
5774 : /* And if it is initialized from result of __imag__
5775 : of .{ADD,SUB}_OVERFLOW call, replace that
5776 : call with .U{ADD,SUB}C call with the same arguments,
5777 : just 0 added as third argument. This isn't strictly
5778 : necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5779 : produce the same result, but may result in better
5780 : generated code on some targets where the backend can
5781 : better prepare in how the result will be used. */
5782 25 : g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
5783 : ? IFN_UADDC : IFN_USUBC,
5784 : 3, arg1, arg2,
5785 : build_zero_cst (type));
5786 25 : gimple_call_set_lhs (g, lhs);
5787 25 : gsi2 = gsi_for_stmt (ovf3);
5788 25 : gsi_replace (&gsi2, g, true);
5789 : }
5790 : }
5791 : }
5792 : }
5793 : return true;
5794 1614203 : }
5795 :
5796 : /* Replace .POPCOUNT (x) == 1 or .POPCOUNT (x) != 1 with
5797 : (x & (x - 1)) > x - 1 or (x & (x - 1)) <= x - 1 if .POPCOUNT
5798 : isn't a direct optab. Also handle `<=`/`>` to be
5799 : `x & (x - 1) !=/== x`. */
5800 :
5801 : static void
5802 4363177 : match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt)
5803 : {
5804 4363177 : tree clhs, crhs;
5805 4363177 : enum tree_code code;
5806 4363177 : bool was_le = false;
5807 4363177 : if (gimple_code (stmt) == GIMPLE_COND)
5808 : {
5809 4075183 : clhs = gimple_cond_lhs (stmt);
5810 4075183 : crhs = gimple_cond_rhs (stmt);
5811 4075183 : code = gimple_cond_code (stmt);
5812 : }
5813 : else
5814 : {
5815 287994 : clhs = gimple_assign_rhs1 (stmt);
5816 287994 : crhs = gimple_assign_rhs2 (stmt);
5817 287994 : code = gimple_assign_rhs_code (stmt);
5818 : }
5819 4363177 : if (code != LE_EXPR && code != GT_EXPR
5820 4363177 : && code != EQ_EXPR && code != NE_EXPR)
5821 4363171 : return;
5822 2025164 : if (code == LE_EXPR || code == GT_EXPR)
5823 4104396 : was_le = true;
5824 4104396 : if (TREE_CODE (clhs) != SSA_NAME || !integer_onep (crhs))
5825 3950518 : return;
5826 153878 : gimple *call = SSA_NAME_DEF_STMT (clhs);
5827 153878 : combined_fn cfn = gimple_call_combined_fn (call);
5828 153878 : switch (cfn)
5829 : {
5830 15 : CASE_CFN_POPCOUNT:
5831 15 : break;
5832 : default:
5833 : return;
5834 : }
5835 15 : if (!has_single_use (clhs))
5836 : return;
5837 14 : tree arg = gimple_call_arg (call, 0);
5838 14 : tree type = TREE_TYPE (arg);
5839 14 : if (!INTEGRAL_TYPE_P (type))
5840 : return;
5841 14 : bool nonzero_arg = tree_expr_nonzero_p (arg);
5842 14 : if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, OPTIMIZE_FOR_BOTH))
5843 : {
5844 : /* Tell expand_POPCOUNT the popcount result is only used in equality
5845 : comparison with one, so that it can decide based on rtx costs. */
5846 16 : gimple *g = gimple_build_call_internal (IFN_POPCOUNT, 2, arg,
5847 : was_le ? integer_minus_one_node
5848 8 : : (nonzero_arg ? integer_zero_node
5849 : : integer_one_node));
5850 8 : gimple_call_set_lhs (g, gimple_call_lhs (call));
5851 8 : gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
5852 8 : gsi_replace (&gsi2, g, true);
5853 8 : return;
5854 : }
5855 6 : tree argm1 = make_ssa_name (type);
5856 6 : gimple *g = gimple_build_assign (argm1, PLUS_EXPR, arg,
5857 : build_int_cst (type, -1));
5858 6 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5859 6 : g = gimple_build_assign (make_ssa_name (type),
5860 6 : (nonzero_arg || was_le) ? BIT_AND_EXPR : BIT_XOR_EXPR,
5861 : arg, argm1);
5862 6 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
5863 6 : tree_code cmpcode;
5864 6 : if (was_le)
5865 : {
5866 0 : argm1 = build_zero_cst (type);
5867 0 : cmpcode = code == LE_EXPR ? EQ_EXPR : NE_EXPR;
5868 : }
5869 6 : else if (nonzero_arg)
5870 : {
5871 2 : argm1 = build_zero_cst (type);
5872 2 : cmpcode = code;
5873 : }
5874 : else
5875 4 : cmpcode = code == EQ_EXPR ? GT_EXPR : LE_EXPR;
5876 6 : if (gcond *cond = dyn_cast <gcond *> (stmt))
5877 : {
5878 2 : gimple_cond_set_lhs (cond, gimple_assign_lhs (g));
5879 2 : gimple_cond_set_rhs (cond, argm1);
5880 2 : gimple_cond_set_code (cond, cmpcode);
5881 : }
5882 : else
5883 : {
5884 4 : gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (g));
5885 4 : gimple_assign_set_rhs2 (stmt, argm1);
5886 4 : gimple_assign_set_rhs_code (stmt, cmpcode);
5887 : }
5888 6 : update_stmt (stmt);
5889 6 : gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
5890 6 : gsi_remove (&gsi2, true);
5891 6 : release_defs (call);
5892 : }
5893 :
5894 : /* Return true if target has support for divmod. */
5895 :
5896 : static bool
5897 28897 : target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
5898 : {
5899 : /* If target supports hardware divmod insn, use it for divmod. */
5900 28897 : if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
5901 : return true;
5902 :
5903 : /* Check if libfunc for divmod is available. */
5904 2565 : rtx libfunc = optab_libfunc (divmod_optab, mode);
5905 2565 : if (libfunc != NULL_RTX)
5906 : {
5907 : /* If optab_handler exists for div_optab, perhaps in a wider mode,
5908 : we don't want to use the libfunc even if it exists for given mode. */
5909 : machine_mode div_mode;
5910 10693 : FOR_EACH_MODE_FROM (div_mode, mode)
5911 8128 : if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
5912 : return false;
5913 :
5914 2565 : return targetm.expand_divmod_libfunc != NULL;
5915 : }
5916 :
5917 : return false;
5918 : }
5919 :
5920 : /* Check if stmt is candidate for divmod transform. */
5921 :
5922 : static bool
5923 47614 : divmod_candidate_p (gassign *stmt)
5924 : {
5925 47614 : tree type = TREE_TYPE (gimple_assign_lhs (stmt));
5926 47614 : machine_mode mode = TYPE_MODE (type);
5927 47614 : optab divmod_optab, div_optab;
5928 :
5929 47614 : if (TYPE_UNSIGNED (type))
5930 : {
5931 : divmod_optab = udivmod_optab;
5932 : div_optab = udiv_optab;
5933 : }
5934 : else
5935 : {
5936 20248 : divmod_optab = sdivmod_optab;
5937 20248 : div_optab = sdiv_optab;
5938 : }
5939 :
5940 47614 : tree op1 = gimple_assign_rhs1 (stmt);
5941 47614 : tree op2 = gimple_assign_rhs2 (stmt);
5942 :
5943 : /* Disable the transform if either is a constant, since division-by-constant
5944 : may have specialized expansion. */
5945 47614 : if (CONSTANT_CLASS_P (op1))
5946 : return false;
5947 :
5948 43857 : if (CONSTANT_CLASS_P (op2))
5949 : {
5950 17183 : if (integer_pow2p (op2))
5951 : return false;
5952 :
5953 15091 : if (element_precision (type) <= HOST_BITS_PER_WIDE_INT
5954 16182 : && element_precision (type) <= BITS_PER_WORD)
5955 : return false;
5956 :
5957 : /* If the divisor is not power of 2 and the precision wider than
5958 : HWI, expand_divmod punts on that, so in that case it is better
5959 : to use divmod optab or libfunc. Similarly if choose_multiplier
5960 : might need pre/post shifts of BITS_PER_WORD or more. */
5961 : }
5962 :
5963 : /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
5964 : expand using the [su]divv optabs. */
5965 28897 : if (TYPE_OVERFLOW_TRAPS (type))
5966 : return false;
5967 :
5968 28897 : if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
5969 : return false;
5970 :
5971 : return true;
5972 : }
5973 :
5974 : /* This function looks for:
5975 : t1 = a TRUNC_DIV_EXPR b;
5976 : t2 = a TRUNC_MOD_EXPR b;
5977 : and transforms it to the following sequence:
5978 : complex_tmp = DIVMOD (a, b);
5979 : t1 = REALPART_EXPR(a);
5980 : t2 = IMAGPART_EXPR(b);
5981 : For conditions enabling the transform see divmod_candidate_p().
5982 :
5983 : The pass has three parts:
5984 : 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
5985 : other trunc_div_expr and trunc_mod_expr stmts.
5986 : 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
5987 : to stmts vector.
5988 : 3) Insert DIVMOD call just before top_stmt and update entries in
5989 : stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
5990 : IMAGPART_EXPR for mod). */
5991 :
5992 : static bool
5993 47633 : convert_to_divmod (gassign *stmt)
5994 : {
5995 47633 : if (stmt_can_throw_internal (cfun, stmt)
5996 47633 : || !divmod_candidate_p (stmt))
5997 18736 : return false;
5998 :
5999 28897 : tree op1 = gimple_assign_rhs1 (stmt);
6000 28897 : tree op2 = gimple_assign_rhs2 (stmt);
6001 :
6002 28897 : imm_use_iterator use_iter;
6003 28897 : gimple *use_stmt;
6004 28897 : auto_vec<gimple *> stmts;
6005 :
6006 28897 : gimple *top_stmt = stmt;
6007 28897 : basic_block top_bb = gimple_bb (stmt);
6008 :
6009 : /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
6010 : at-least stmt and possibly other trunc_div/trunc_mod stmts
6011 : having same operands as stmt. */
6012 :
6013 123590 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
6014 : {
6015 94693 : if (is_gimple_assign (use_stmt)
6016 58530 : && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
6017 46763 : || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
6018 48255 : && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
6019 142831 : && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
6020 : {
6021 40557 : if (stmt_can_throw_internal (cfun, use_stmt))
6022 0 : continue;
6023 :
6024 40557 : basic_block bb = gimple_bb (use_stmt);
6025 :
6026 40557 : if (bb == top_bb)
6027 : {
6028 39838 : if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
6029 5155 : top_stmt = use_stmt;
6030 : }
6031 719 : else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
6032 : {
6033 195 : top_bb = bb;
6034 195 : top_stmt = use_stmt;
6035 : }
6036 : }
6037 28897 : }
6038 :
6039 28897 : tree top_op1 = gimple_assign_rhs1 (top_stmt);
6040 28897 : tree top_op2 = gimple_assign_rhs2 (top_stmt);
6041 :
6042 28897 : stmts.safe_push (top_stmt);
6043 28897 : bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
6044 :
6045 : /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
6046 : to stmts vector. The 2nd loop will always add stmt to stmts vector, since
6047 : gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
6048 : 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
6049 :
6050 123590 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
6051 : {
6052 94693 : if (is_gimple_assign (use_stmt)
6053 58530 : && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
6054 46763 : || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
6055 48255 : && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
6056 142831 : && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
6057 : {
6058 69538 : if (use_stmt == top_stmt
6059 11660 : || stmt_can_throw_internal (cfun, use_stmt)
6060 52217 : || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
6061 28981 : continue;
6062 :
6063 11576 : stmts.safe_push (use_stmt);
6064 11576 : if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
6065 94693 : div_seen = true;
6066 : }
6067 28897 : }
6068 :
6069 28897 : if (!div_seen)
6070 : return false;
6071 :
6072 : /* Part 3: Create libcall to internal fn DIVMOD:
6073 : divmod_tmp = DIVMOD (op1, op2). */
6074 :
6075 11549 : gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
6076 11549 : tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
6077 : call_stmt, "divmod_tmp");
6078 11549 : gimple_call_set_lhs (call_stmt, res);
6079 : /* We rejected throwing statements above. */
6080 11549 : gimple_call_set_nothrow (call_stmt, true);
6081 :
6082 : /* Insert the call before top_stmt. */
6083 11549 : gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
6084 11549 : gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
6085 :
6086 11549 : widen_mul_stats.divmod_calls_inserted++;
6087 :
6088 : /* Update all statements in stmts vector:
6089 : lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
6090 : lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
6091 :
6092 63569 : for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
6093 : {
6094 23123 : tree new_rhs;
6095 :
6096 23123 : switch (gimple_assign_rhs_code (use_stmt))
6097 : {
6098 11559 : case TRUNC_DIV_EXPR:
6099 11559 : new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
6100 11559 : break;
6101 :
6102 11564 : case TRUNC_MOD_EXPR:
6103 11564 : new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
6104 11564 : break;
6105 :
6106 0 : default:
6107 0 : gcc_unreachable ();
6108 : }
6109 :
6110 23123 : gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
6111 23123 : gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
6112 23123 : update_stmt (use_stmt);
6113 : }
6114 :
6115 : return true;
6116 28897 : }
6117 :
6118 : /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
6119 : its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
6120 : value is true iff we converted the statement. */
6121 :
6122 : static bool
6123 169355 : convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
6124 : {
6125 169355 : tree lhs = gimple_assign_lhs (stmt);
6126 169355 : tree stype = TREE_TYPE (lhs);
6127 169355 : tree sarg0 = gimple_assign_rhs1 (stmt);
6128 169355 : tree sarg1 = gimple_assign_rhs2 (stmt);
6129 :
6130 169355 : if (TREE_CODE (stype) != INTEGER_TYPE
6131 162452 : || TREE_CODE (sarg1) != INTEGER_CST
6132 146571 : || TREE_CODE (sarg0) != SSA_NAME
6133 146570 : || !tree_fits_uhwi_p (sarg1)
6134 315925 : || !has_single_use (sarg0))
6135 : return false;
6136 :
6137 43757 : gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
6138 40701 : if (!def)
6139 : return false;
6140 :
6141 40701 : enum tree_code mcode = gimple_assign_rhs_code (def);
6142 40701 : if (mcode == NOP_EXPR)
6143 : {
6144 8938 : tree tmp = gimple_assign_rhs1 (def);
6145 8938 : if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
6146 : return false;
6147 171415 : def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
6148 2909 : if (!def)
6149 : return false;
6150 2909 : mcode = gimple_assign_rhs_code (def);
6151 : }
6152 :
6153 34672 : if (mcode != WIDEN_MULT_EXPR
6154 34672 : || gimple_bb (def) != gimple_bb (stmt))
6155 : return false;
6156 850 : tree mtype = TREE_TYPE (gimple_assign_lhs (def));
6157 850 : if (TREE_CODE (mtype) != INTEGER_TYPE
6158 850 : || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
6159 : return false;
6160 :
6161 850 : tree mop1 = gimple_assign_rhs1 (def);
6162 850 : tree mop2 = gimple_assign_rhs2 (def);
6163 850 : tree optype = TREE_TYPE (mop1);
6164 850 : bool unsignedp = TYPE_UNSIGNED (optype);
6165 850 : unsigned int prec = TYPE_PRECISION (optype);
6166 :
6167 850 : if (unsignedp != TYPE_UNSIGNED (mtype)
6168 850 : || TYPE_PRECISION (mtype) != 2 * prec)
6169 : return false;
6170 :
6171 850 : unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
6172 850 : if (bits < prec || bits >= 2 * prec)
6173 : return false;
6174 :
6175 : /* For the time being, require operands to have the same sign. */
6176 849 : if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
6177 : return false;
6178 :
6179 849 : machine_mode mode = TYPE_MODE (optype);
6180 849 : optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
6181 849 : if (optab_handler (tab, mode) == CODE_FOR_nothing)
6182 : return false;
6183 :
6184 849 : location_t loc = gimple_location (stmt);
6185 849 : tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
6186 : MULT_HIGHPART_EXPR, mop1, mop2);
6187 849 : tree highpart2 = highpart1;
6188 849 : tree ntype = optype;
6189 :
6190 849 : if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
6191 : {
6192 16 : ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
6193 7 : : signed_type_for (optype);
6194 16 : highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
6195 : }
6196 849 : if (bits > prec)
6197 29 : highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
6198 : RSHIFT_EXPR, highpart2,
6199 29 : build_int_cst (ntype, bits - prec));
6200 :
6201 849 : gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
6202 849 : gsi_replace (gsi, new_stmt, true);
6203 :
6204 849 : widen_mul_stats.highpart_mults_inserted++;
6205 849 : return true;
6206 : }
6207 :
6208 : /* If target has spaceship<MODE>3 expander, pattern recognize
6209 : <bb 2> [local count: 1073741824]:
6210 : if (a_2(D) == b_3(D))
6211 : goto <bb 6>; [34.00%]
6212 : else
6213 : goto <bb 3>; [66.00%]
6214 :
6215 : <bb 3> [local count: 708669601]:
6216 : if (a_2(D) < b_3(D))
6217 : goto <bb 6>; [1.04%]
6218 : else
6219 : goto <bb 4>; [98.96%]
6220 :
6221 : <bb 4> [local count: 701299439]:
6222 : if (a_2(D) > b_3(D))
6223 : goto <bb 5>; [48.89%]
6224 : else
6225 : goto <bb 6>; [51.11%]
6226 :
6227 : <bb 5> [local count: 342865295]:
6228 :
6229 : <bb 6> [local count: 1073741824]:
6230 : and turn it into:
6231 : <bb 2> [local count: 1073741824]:
6232 : _1 = .SPACESHIP (a_2(D), b_3(D), 0);
6233 : if (_1 == 0)
6234 : goto <bb 6>; [34.00%]
6235 : else
6236 : goto <bb 3>; [66.00%]
6237 :
6238 : <bb 3> [local count: 708669601]:
6239 : if (_1 == -1)
6240 : goto <bb 6>; [1.04%]
6241 : else
6242 : goto <bb 4>; [98.96%]
6243 :
6244 : <bb 4> [local count: 701299439]:
6245 : if (_1 == 1)
6246 : goto <bb 5>; [48.89%]
6247 : else
6248 : goto <bb 6>; [51.11%]
6249 :
6250 : <bb 5> [local count: 342865295]:
6251 :
6252 : <bb 6> [local count: 1073741824]:
6253 : so that the backend can emit optimal comparison and
6254 : conditional jump sequence. If the
6255 : <bb 6> [local count: 1073741824]:
6256 : above has a single PHI like:
6257 : # _27 = PHI<0(2), -1(3), -128(4), 1(5)>
6258 : then replace it with effectively
6259 : _1 = .SPACESHIP (a_2(D), b_3(D), -128);
6260 : _27 = _1; */
6261 :
6262 : static void
6263 4075183 : optimize_spaceship (gcond *stmt)
6264 : {
6265 4075183 : enum tree_code code = gimple_cond_code (stmt);
6266 4075183 : if (code != EQ_EXPR && code != NE_EXPR)
6267 4074991 : return;
6268 3300658 : tree arg1 = gimple_cond_lhs (stmt);
6269 3300658 : tree arg2 = gimple_cond_rhs (stmt);
6270 3300658 : if ((!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
6271 3190196 : && !INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
6272 2563601 : || optab_handler (spaceship_optab,
6273 2563601 : TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
6274 5823620 : || operand_equal_p (arg1, arg2, 0))
6275 779019 : return;
6276 :
6277 2521639 : basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL;
6278 2521639 : edge em1 = NULL, e1 = NULL, e2 = NULL;
6279 2521639 : bb1 = EDGE_SUCC (bb0, 1)->dest;
6280 2521639 : if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
6281 1527614 : bb1 = EDGE_SUCC (bb0, 0)->dest;
6282 :
6283 7515429 : gcond *g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb1));
6284 1094566 : if (g == NULL
6285 1094566 : || !single_pred_p (bb1)
6286 679952 : || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
6287 572389 : ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
6288 464826 : : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
6289 965 : || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
6290 582262 : || !cond_only_block_p (bb1))
6291 2512465 : return;
6292 :
6293 9174 : enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
6294 9174 : ? LT_EXPR : GT_EXPR);
6295 9174 : switch (gimple_cond_code (g))
6296 : {
6297 : case LT_EXPR:
6298 : case LE_EXPR:
6299 : break;
6300 7708 : case GT_EXPR:
6301 7708 : case GE_EXPR:
6302 7708 : ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
6303 : break;
6304 : default:
6305 : return;
6306 : }
6307 :
6308 27423 : for (int i = 0; i < 2; ++i)
6309 : {
6310 : /* With NaNs, </<=/>/>= are false, so we need to look for the
6311 : third comparison on the false edge from whatever non-equality
6312 : comparison the second comparison is. */
6313 18336 : if (HONOR_NANS (TREE_TYPE (arg1))
6314 18336 : && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
6315 199 : continue;
6316 :
6317 18137 : bb2 = EDGE_SUCC (bb1, i)->dest;
6318 54061 : g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb2));
6319 12223 : if (g == NULL
6320 12223 : || !single_pred_p (bb2)
6321 17174 : || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
6322 9680 : ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
6323 2186 : : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
6324 19 : || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
6325 85 : || !cond_only_block_p (bb2)
6326 9765 : || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
6327 18052 : continue;
6328 :
6329 85 : enum tree_code ccode2
6330 85 : = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
6331 85 : switch (gimple_cond_code (g))
6332 : {
6333 : case LT_EXPR:
6334 : case LE_EXPR:
6335 : break;
6336 55 : case GT_EXPR:
6337 55 : case GE_EXPR:
6338 55 : ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
6339 : break;
6340 1 : default:
6341 1 : continue;
6342 : }
6343 84 : if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
6344 0 : continue;
6345 :
6346 168 : if ((ccode == LT_EXPR)
6347 84 : ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
6348 : {
6349 55 : em1 = EDGE_SUCC (bb1, 1 - i);
6350 55 : e1 = EDGE_SUCC (bb2, 0);
6351 55 : e2 = EDGE_SUCC (bb2, 1);
6352 55 : if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
6353 0 : std::swap (e1, e2);
6354 : }
6355 : else
6356 : {
6357 29 : e1 = EDGE_SUCC (bb1, 1 - i);
6358 29 : em1 = EDGE_SUCC (bb2, 0);
6359 29 : e2 = EDGE_SUCC (bb2, 1);
6360 29 : if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
6361 : std::swap (em1, e2);
6362 : }
6363 : break;
6364 : }
6365 :
6366 9142 : if (em1 == NULL)
6367 : {
6368 18174 : if ((ccode == LT_EXPR)
6369 9087 : ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
6370 : {
6371 3144 : em1 = EDGE_SUCC (bb1, 1);
6372 3144 : e1 = EDGE_SUCC (bb1, 0);
6373 3144 : e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
6374 : }
6375 : else
6376 : {
6377 5943 : em1 = EDGE_SUCC (bb1, 0);
6378 5943 : e1 = EDGE_SUCC (bb1, 1);
6379 5943 : e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
6380 : }
6381 : }
6382 :
6383 : /* Check if there is a single bb into which all failed conditions
6384 : jump to (perhaps through an empty block) and if it results in
6385 : a single integral PHI which just sets it to -1, 0, 1, X
6386 : (or -1, 0, 1 when NaNs can't happen). In that case use 1 rather
6387 : than 0 as last .SPACESHIP argument to tell backends it might
6388 : consider different code generation and just cast the result
6389 : of .SPACESHIP to the PHI result. X above is some value
6390 : other than -1, 0, 1, for libstdc++ -128, for libc++ -127. */
6391 9171 : tree arg3 = integer_zero_node;
6392 9171 : edge e = EDGE_SUCC (bb0, 0);
6393 9171 : if (e->dest == bb1)
6394 6481 : e = EDGE_SUCC (bb0, 1);
6395 9171 : basic_block bbp = e->dest;
6396 9171 : gphi *phi = NULL;
6397 9171 : for (gphi_iterator psi = gsi_start_phis (bbp);
6398 11106 : !gsi_end_p (psi); gsi_next (&psi))
6399 : {
6400 3500 : gphi *gp = psi.phi ();
6401 3500 : tree res = gimple_phi_result (gp);
6402 :
6403 3500 : if (phi != NULL
6404 3158 : || virtual_operand_p (res)
6405 2223 : || !INTEGRAL_TYPE_P (TREE_TYPE (res))
6406 5578 : || TYPE_PRECISION (TREE_TYPE (res)) < 2)
6407 : {
6408 : phi = NULL;
6409 : break;
6410 : }
6411 1935 : phi = gp;
6412 : }
6413 9171 : if (phi
6414 1593 : && integer_zerop (gimple_phi_arg_def_from_edge (phi, e))
6415 9744 : && EDGE_COUNT (bbp->preds) == (HONOR_NANS (TREE_TYPE (arg1)) ? 4 : 3))
6416 : {
6417 122 : HOST_WIDE_INT argval
6418 122 : = SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) ? -128 : -1;
6419 724 : for (unsigned i = 0; phi && i < EDGE_COUNT (bbp->preds) - 1; ++i)
6420 : {
6421 259 : edge e3 = i == 0 ? e1 : i == 1 ? em1 : e2;
6422 259 : if (e3->dest != bbp)
6423 : {
6424 121 : if (!empty_block_p (e3->dest)
6425 112 : || !single_succ_p (e3->dest)
6426 233 : || single_succ (e3->dest) != bbp)
6427 : {
6428 : phi = NULL;
6429 : break;
6430 : }
6431 : e3 = single_succ_edge (e3->dest);
6432 : }
6433 250 : tree a = gimple_phi_arg_def_from_edge (phi, e3);
6434 250 : if (TREE_CODE (a) != INTEGER_CST
6435 250 : || (i == 0 && !integer_onep (a))
6436 494 : || (i == 1 && !integer_all_onesp (a)))
6437 : {
6438 : phi = NULL;
6439 : break;
6440 : }
6441 244 : if (i == 2)
6442 : {
6443 30 : tree minv = TYPE_MIN_VALUE (signed_char_type_node);
6444 30 : tree maxv = TYPE_MAX_VALUE (signed_char_type_node);
6445 30 : widest_int w = widest_int::from (wi::to_wide (a), SIGNED);
6446 41 : if ((w >= -1 && w <= 1)
6447 26 : || w < wi::to_widest (minv)
6448 56 : || w > wi::to_widest (maxv))
6449 : {
6450 4 : phi = NULL;
6451 4 : break;
6452 : }
6453 26 : argval = w.to_shwi ();
6454 26 : }
6455 : }
6456 122 : if (phi)
6457 103 : arg3 = build_int_cst (integer_type_node,
6458 127 : TYPE_UNSIGNED (TREE_TYPE (arg1)) ? 1 : argval);
6459 : }
6460 :
6461 : /* For integral <=> comparisons only use .SPACESHIP if it is turned
6462 : into an integer (-1, 0, 1). */
6463 9171 : if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) && arg3 == integer_zero_node)
6464 : return;
6465 :
6466 295 : gcall *gc = gimple_build_call_internal (IFN_SPACESHIP, 3, arg1, arg2, arg3);
6467 295 : tree lhs = make_ssa_name (integer_type_node);
6468 295 : gimple_call_set_lhs (gc, lhs);
6469 295 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6470 295 : gsi_insert_before (&gsi, gc, GSI_SAME_STMT);
6471 :
6472 487 : wide_int wmin = wi::minus_one (TYPE_PRECISION (integer_type_node));
6473 487 : wide_int wmax = wi::one (TYPE_PRECISION (integer_type_node));
6474 295 : if (HONOR_NANS (TREE_TYPE (arg1)))
6475 : {
6476 199 : if (arg3 == integer_zero_node)
6477 173 : wmin = wi::shwi (-128, TYPE_PRECISION (integer_type_node));
6478 26 : else if (tree_int_cst_sgn (arg3) < 0)
6479 19 : wmin = wi::to_wide (arg3);
6480 : else
6481 7 : wmax = wi::to_wide (arg3);
6482 : }
6483 487 : int_range<1> vr (TREE_TYPE (lhs), wmin, wmax);
6484 295 : set_range_info (lhs, vr);
6485 :
6486 295 : if (arg3 != integer_zero_node)
6487 : {
6488 103 : tree type = TREE_TYPE (gimple_phi_result (phi));
6489 103 : if (!useless_type_conversion_p (type, integer_type_node))
6490 : {
6491 63 : tree tem = make_ssa_name (type);
6492 63 : gimple *gcv = gimple_build_assign (tem, NOP_EXPR, lhs);
6493 63 : gsi_insert_before (&gsi, gcv, GSI_SAME_STMT);
6494 63 : lhs = tem;
6495 : }
6496 103 : SET_PHI_ARG_DEF_ON_EDGE (phi, e, lhs);
6497 103 : gimple_cond_set_lhs (stmt, boolean_false_node);
6498 103 : gimple_cond_set_rhs (stmt, boolean_false_node);
6499 193 : gimple_cond_set_code (stmt, (e->flags & EDGE_TRUE_VALUE)
6500 : ? EQ_EXPR : NE_EXPR);
6501 103 : update_stmt (stmt);
6502 103 : return;
6503 : }
6504 :
6505 192 : gimple_cond_set_lhs (stmt, lhs);
6506 192 : gimple_cond_set_rhs (stmt, integer_zero_node);
6507 192 : update_stmt (stmt);
6508 :
6509 384 : gcond *cond = as_a <gcond *> (*gsi_last_bb (bb1));
6510 192 : gimple_cond_set_lhs (cond, lhs);
6511 192 : if (em1->src == bb1 && e2 != em1)
6512 : {
6513 112 : gimple_cond_set_rhs (cond, integer_minus_one_node);
6514 118 : gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
6515 : ? EQ_EXPR : NE_EXPR);
6516 : }
6517 : else
6518 : {
6519 80 : gcc_assert (e1->src == bb1 && e2 != e1);
6520 80 : gimple_cond_set_rhs (cond, integer_one_node);
6521 80 : gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
6522 : ? EQ_EXPR : NE_EXPR);
6523 : }
6524 192 : update_stmt (cond);
6525 :
6526 192 : if (e2 != e1 && e2 != em1)
6527 : {
6528 116 : cond = as_a <gcond *> (*gsi_last_bb (bb2));
6529 58 : gimple_cond_set_lhs (cond, lhs);
6530 58 : if (em1->src == bb2)
6531 29 : gimple_cond_set_rhs (cond, integer_minus_one_node);
6532 : else
6533 : {
6534 29 : gcc_assert (e1->src == bb2);
6535 29 : gimple_cond_set_rhs (cond, integer_one_node);
6536 : }
6537 58 : gimple_cond_set_code (cond,
6538 58 : (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
6539 58 : update_stmt (cond);
6540 : }
6541 : }
6542 :
6543 :
6544 : /* Find integer multiplications where the operands are extended from
6545 : smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
6546 : or MULT_HIGHPART_EXPR where appropriate. */
6547 :
6548 : namespace {
6549 :
6550 : const pass_data pass_data_optimize_widening_mul =
6551 : {
6552 : GIMPLE_PASS, /* type */
6553 : "widening_mul", /* name */
6554 : OPTGROUP_NONE, /* optinfo_flags */
6555 : TV_TREE_WIDEN_MUL, /* tv_id */
6556 : PROP_ssa, /* properties_required */
6557 : 0, /* properties_provided */
6558 : 0, /* properties_destroyed */
6559 : 0, /* todo_flags_start */
6560 : TODO_update_ssa, /* todo_flags_finish */
6561 : };
6562 :
6563 : class pass_optimize_widening_mul : public gimple_opt_pass
6564 : {
6565 : public:
6566 298828 : pass_optimize_widening_mul (gcc::context *ctxt)
6567 597656 : : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
6568 : {}
6569 :
6570 : /* opt_pass methods: */
6571 1039819 : bool gate (function *) final override
6572 : {
6573 1039819 : return flag_expensive_optimizations && optimize;
6574 : }
6575 :
6576 : unsigned int execute (function *) final override;
6577 :
6578 : }; // class pass_optimize_widening_mul
6579 :
6580 : /* Walker class to perform the transformation in reverse dominance order. */
6581 :
6582 : class math_opts_dom_walker : public dom_walker
6583 : {
6584 : public:
6585 : /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
6586 : if walking modidifes the CFG. */
6587 :
6588 961530 : math_opts_dom_walker (bool *cfg_changed_p)
6589 2884590 : : dom_walker (CDI_DOMINATORS), m_last_result_set (),
6590 961530 : m_cfg_changed_p (cfg_changed_p) {}
6591 :
6592 : /* The actual actions performed in the walk. */
6593 :
6594 : void after_dom_children (basic_block) final override;
6595 :
6596 : /* Set of results of chains of multiply and add statement combinations that
6597 : were not transformed into FMAs because of active deferring. */
6598 : hash_set<tree> m_last_result_set;
6599 :
6600 : /* Pointer to a flag of the user that needs to be set if CFG has been
6601 : modified. */
6602 : bool *m_cfg_changed_p;
6603 : };
6604 :
6605 : void
6606 10065745 : math_opts_dom_walker::after_dom_children (basic_block bb)
6607 : {
6608 10065745 : gimple_stmt_iterator gsi;
6609 :
6610 10065745 : fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
6611 :
6612 14156061 : for (gphi_iterator psi_next, psi = gsi_start_phis (bb); !gsi_end_p (psi);
6613 4090316 : psi = psi_next)
6614 : {
6615 4090316 : psi_next = psi;
6616 4090316 : gsi_next (&psi_next);
6617 :
6618 4090316 : gimple_stmt_iterator gsi = gsi_after_labels (bb);
6619 4090316 : gphi *phi = psi.phi ();
6620 :
6621 4090316 : if (match_saturation_add (&gsi, phi)
6622 4090299 : || match_saturation_sub (&gsi, phi)
6623 4090273 : || match_saturation_trunc (&gsi, phi)
6624 4090273 : || match_saturation_mul (&gsi, phi)
6625 8180589 : || match_spaceship (&gsi, phi))
6626 156 : remove_phi_node (&psi, /* release_lhs_p */ false);
6627 : }
6628 :
6629 89261760 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
6630 : {
6631 79196015 : gimple *stmt = gsi_stmt (gsi);
6632 79196015 : enum tree_code code;
6633 :
6634 79196015 : if (is_gimple_assign (stmt))
6635 : {
6636 20751762 : code = gimple_assign_rhs_code (stmt);
6637 20751762 : switch (code)
6638 : {
6639 724340 : case MULT_EXPR:
6640 724340 : if (!convert_mult_to_widen (stmt, &gsi)
6641 714265 : && !convert_expand_mult_copysign (stmt, &gsi)
6642 1438562 : && convert_mult_to_fma (stmt,
6643 : gimple_assign_rhs1 (stmt),
6644 : gimple_assign_rhs2 (stmt),
6645 : &fma_state))
6646 : {
6647 16787 : gsi_remove (&gsi, true);
6648 16787 : release_defs (stmt);
6649 16787 : continue;
6650 : }
6651 707553 : match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
6652 707553 : match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
6653 707553 : break;
6654 :
6655 2250669 : case PLUS_EXPR:
6656 2250669 : match_saturation_add_with_assign (&gsi, as_a<gassign *> (stmt));
6657 2250669 : match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
6658 : /* fall-through */
6659 2546833 : case MINUS_EXPR:
6660 2546833 : if (!convert_plusminus_to_widen (&gsi, stmt, code))
6661 : {
6662 2546833 : match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
6663 2546833 : if (gsi_stmt (gsi) == stmt)
6664 2540316 : match_uaddc_usubc (&gsi, stmt, code);
6665 : }
6666 : break;
6667 :
6668 36416 : case BIT_NOT_EXPR:
6669 36416 : if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
6670 170 : continue;
6671 : break;
6672 :
6673 47633 : case TRUNC_MOD_EXPR:
6674 47633 : convert_to_divmod (as_a<gassign *> (stmt));
6675 47633 : break;
6676 :
6677 169355 : case RSHIFT_EXPR:
6678 169355 : convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
6679 169355 : break;
6680 :
6681 183045 : case BIT_IOR_EXPR:
6682 183045 : match_unsigned_saturation_mul (&gsi, as_a<gassign *> (stmt));
6683 183045 : match_saturation_add_with_assign (&gsi, as_a<gassign *> (stmt));
6684 183045 : match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
6685 : /* fall-through */
6686 213159 : case BIT_XOR_EXPR:
6687 213159 : match_uaddc_usubc (&gsi, stmt, code);
6688 213159 : break;
6689 :
6690 287994 : case EQ_EXPR:
6691 287994 : case NE_EXPR:
6692 287994 : case LE_EXPR:
6693 287994 : case GT_EXPR:
6694 287994 : match_single_bit_test (&gsi, stmt);
6695 287994 : break;
6696 :
6697 333245 : case COND_EXPR:
6698 333245 : case BIT_AND_EXPR:
6699 333245 : match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
6700 333245 : break;
6701 :
6702 2368346 : case NOP_EXPR:
6703 2368346 : match_unsigned_saturation_mul (&gsi, as_a<gassign *> (stmt));
6704 2368346 : match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
6705 2368346 : match_saturation_add_with_assign (&gsi, as_a<gassign *> (stmt));
6706 2368346 : break;
6707 :
6708 : default:;
6709 : }
6710 : }
6711 58444253 : else if (is_gimple_call (stmt))
6712 : {
6713 4732818 : switch (gimple_call_combined_fn (stmt))
6714 : {
6715 129 : case CFN_COND_MUL:
6716 129 : if (convert_mult_to_fma (stmt,
6717 : gimple_call_arg (stmt, 1),
6718 : gimple_call_arg (stmt, 2),
6719 : &fma_state,
6720 : gimple_call_arg (stmt, 0)))
6721 :
6722 : {
6723 84 : gsi_remove (&gsi, true);
6724 84 : release_defs (stmt);
6725 84 : continue;
6726 : }
6727 : break;
6728 :
6729 0 : case CFN_COND_LEN_MUL:
6730 0 : if (convert_mult_to_fma (stmt,
6731 : gimple_call_arg (stmt, 1),
6732 : gimple_call_arg (stmt, 2),
6733 : &fma_state,
6734 : gimple_call_arg (stmt, 0),
6735 : gimple_call_arg (stmt, 4),
6736 : gimple_call_arg (stmt, 5)))
6737 :
6738 : {
6739 0 : gsi_remove (&gsi, true);
6740 0 : release_defs (stmt);
6741 0 : continue;
6742 : }
6743 : break;
6744 :
6745 3591416 : case CFN_LAST:
6746 3591416 : cancel_fma_deferring (&fma_state);
6747 3591416 : break;
6748 :
6749 : default:
6750 : break;
6751 : }
6752 : }
6753 53711435 : else if (gimple_code (stmt) == GIMPLE_COND)
6754 : {
6755 4075183 : match_single_bit_test (&gsi, stmt);
6756 4075183 : optimize_spaceship (as_a <gcond *> (stmt));
6757 : }
6758 79178974 : gsi_next (&gsi);
6759 : }
6760 10065745 : if (fma_state.m_deferring_p
6761 7375294 : && fma_state.m_initial_phi)
6762 : {
6763 356 : gcc_checking_assert (fma_state.m_last_result);
6764 356 : if (!last_fma_candidate_feeds_initial_phi (&fma_state,
6765 : &m_last_result_set))
6766 260 : cancel_fma_deferring (&fma_state);
6767 : else
6768 96 : m_last_result_set.add (fma_state.m_last_result);
6769 : }
6770 10065745 : }
6771 :
6772 :
6773 : unsigned int
6774 961530 : pass_optimize_widening_mul::execute (function *fun)
6775 : {
6776 961530 : bool cfg_changed = false;
6777 :
6778 961530 : memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
6779 961530 : calculate_dominance_info (CDI_DOMINATORS);
6780 961530 : renumber_gimple_stmt_uids (cfun);
6781 :
6782 961530 : math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6783 :
6784 961530 : statistics_counter_event (fun, "widening multiplications inserted",
6785 : widen_mul_stats.widen_mults_inserted);
6786 961530 : statistics_counter_event (fun, "widening maccs inserted",
6787 : widen_mul_stats.maccs_inserted);
6788 961530 : statistics_counter_event (fun, "fused multiply-adds inserted",
6789 : widen_mul_stats.fmas_inserted);
6790 961530 : statistics_counter_event (fun, "divmod calls inserted",
6791 : widen_mul_stats.divmod_calls_inserted);
6792 961530 : statistics_counter_event (fun, "highpart multiplications inserted",
6793 : widen_mul_stats.highpart_mults_inserted);
6794 :
6795 961530 : return cfg_changed ? TODO_cleanup_cfg : 0;
6796 : }
6797 :
6798 : } // anon namespace
6799 :
6800 : gimple_opt_pass *
6801 298828 : make_pass_optimize_widening_mul (gcc::context *ctxt)
6802 : {
6803 298828 : return new pass_optimize_widening_mul (ctxt);
6804 : }
|