Line data Source code
1 : /* SLP - Pattern matcher on SLP trees
2 : Copyright (C) 2020-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "target.h"
25 : #include "rtl.h"
26 : #include "tree.h"
27 : #include "gimple.h"
28 : #include "tree-pass.h"
29 : #include "ssa.h"
30 : #include "optabs-tree.h"
31 : #include "insn-config.h"
32 : #include "recog.h" /* FIXME: for insn_data */
33 : #include "fold-const.h"
34 : #include "stor-layout.h"
35 : #include "gimple-iterator.h"
36 : #include "cfgloop.h"
37 : #include "tree-vectorizer.h"
38 : #include "langhooks.h"
39 : #include "gimple-walk.h"
40 : #include "dbgcnt.h"
41 : #include "tree-vector-builder.h"
42 : #include "vec-perm-indices.h"
43 : #include "gimple-fold.h"
44 : #include "internal-fn.h"
45 :
46 : /* SLP Pattern matching mechanism.
47 :
48 : This extension to the SLP vectorizer allows one to transform the generated SLP
49 : tree based on any pattern. The difference between this and the normal vect
50 : pattern matcher is that unlike the former, this matcher allows you to match
51 : with instructions that do not belong to the same SSA dominator graph.
52 :
53 : The only requirement that this pattern matcher has is that you are only
54 : only allowed to either match an entire group or none.
55 :
56 : The pattern matcher currently only allows you to perform replacements to
57 : internal functions.
58 :
59 : Once the patterns are matched it is one way, these cannot be undone. It is
60 : currently not supported to match patterns recursively.
61 :
62 : To add a new pattern, implement the vect_pattern class and add the type to
63 : slp_patterns.
64 :
65 : */
66 :
67 : /*******************************************************************************
68 : * vect_pattern class
69 : ******************************************************************************/
70 :
71 : /* Default implementation of recognize that performs matching, validation and
72 : replacement of nodes but that can be overridden if required. */
73 :
74 : static bool
75 4732 : vect_pattern_validate_optab (internal_fn ifn, slp_tree node)
76 : {
77 4732 : tree vectype = SLP_TREE_VECTYPE (node);
78 4732 : if (ifn == IFN_LAST || !vectype)
79 : return false;
80 :
81 4732 : if (dump_enabled_p ())
82 708 : dump_printf_loc (MSG_NOTE, vect_location,
83 : "Found %s pattern in SLP tree\n",
84 : internal_fn_name (ifn));
85 :
86 4732 : if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
87 : {
88 1107 : if (dump_enabled_p ())
89 18 : dump_printf_loc (MSG_NOTE, vect_location,
90 : "Target supports %s vectorization with mode %T\n",
91 : internal_fn_name (ifn), vectype);
92 : }
93 : else
94 : {
95 3625 : if (dump_enabled_p ())
96 : {
97 690 : if (!vectype)
98 : dump_printf_loc (MSG_NOTE, vect_location,
99 : "Target does not support vector type for %G\n",
100 : STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node)));
101 : else
102 690 : dump_printf_loc (MSG_NOTE, vect_location,
103 : "Target does not support %s for vector type "
104 : "%T\n", internal_fn_name (ifn), vectype);
105 : }
106 3625 : return false;
107 : }
108 : return true;
109 : }
110 :
111 : /*******************************************************************************
112 : * General helper types
113 : ******************************************************************************/
114 :
115 : /* The COMPLEX_OPERATION enum denotes the possible pair of operations that can
116 : be matched when looking for expressions that we are interested matching for
117 : complex numbers addition and mla. */
118 :
119 : typedef enum _complex_operation : unsigned {
120 : PLUS_PLUS,
121 : MINUS_PLUS,
122 : PLUS_MINUS,
123 : MULT_MULT,
124 : CMPLX_NONE
125 : } complex_operation_t;
126 :
127 : /*******************************************************************************
128 : * General helper functions
129 : ******************************************************************************/
130 :
131 : /* Helper function of linear_loads_p that checks to see if the load permutation
132 : is sequential and in monotonically increasing order of loads with no gaps.
133 : */
134 :
135 : static inline complex_perm_kinds_t
136 2311 : is_linear_load_p (load_permutation_t loads)
137 : {
138 2385 : if (loads.length() == 0)
139 : return PERM_UNKNOWN;
140 :
141 2311 : unsigned load, i;
142 2311 : complex_perm_kinds_t candidates[4]
143 : = { PERM_ODDODD
144 : , PERM_EVENEVEN
145 : , PERM_EVENODD
146 : , PERM_ODDEVEN
147 : };
148 :
149 2311 : int valid_patterns = 4;
150 8394 : FOR_EACH_VEC_ELT (loads, i, load)
151 : {
152 6157 : unsigned adj_load = load % 2;
153 6157 : if (candidates[0] != PERM_UNKNOWN && adj_load != 1)
154 : {
155 2005 : candidates[0] = PERM_UNKNOWN;
156 2005 : valid_patterns--;
157 : }
158 6157 : if (candidates[1] != PERM_UNKNOWN && adj_load != 0)
159 : {
160 1237 : candidates[1] = PERM_UNKNOWN;
161 1237 : valid_patterns--;
162 : }
163 6157 : if (candidates[2] != PERM_UNKNOWN && load != i)
164 : {
165 2270 : candidates[2] = PERM_UNKNOWN;
166 2270 : valid_patterns--;
167 : }
168 6157 : if (candidates[3] != PERM_UNKNOWN
169 4904 : && load != (i % 2 == 0 ? i + 1 : i - 1))
170 : {
171 1495 : candidates[3] = PERM_UNKNOWN;
172 1495 : valid_patterns--;
173 : }
174 :
175 6157 : if (valid_patterns == 0)
176 : return PERM_UNKNOWN;
177 : }
178 :
179 3604 : for (i = 0; i < sizeof(candidates); i++)
180 5841 : if (candidates[i] != PERM_UNKNOWN)
181 : return candidates[i];
182 :
183 : return PERM_UNKNOWN;
184 : }
185 :
186 : /* Combine complex_perm_kinds A and B into a new permute kind that describes the
187 : resulting operation. */
188 :
189 : static inline complex_perm_kinds_t
190 18338 : vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
191 : {
192 18338 : if (a == b)
193 : return a;
194 :
195 16173 : if (a == PERM_TOP)
196 : return b;
197 :
198 1955 : if (b == PERM_TOP)
199 : return a;
200 :
201 : return PERM_UNKNOWN;
202 : }
203 :
204 : /* Check to see if all loads rooted in ROOT are linear. Linearity is
205 : defined as having no gaps between values loaded. */
206 :
207 : static complex_perm_kinds_t
208 29887 : linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
209 : {
210 29887 : if (!root)
211 : return PERM_UNKNOWN;
212 :
213 29882 : unsigned i;
214 29882 : complex_perm_kinds_t *tmp;
215 :
216 29882 : if ((tmp = perm_cache->get (root)) != NULL)
217 7554 : return *tmp;
218 :
219 22328 : complex_perm_kinds_t retval = PERM_UNKNOWN;
220 22328 : perm_cache->put (root, retval);
221 :
222 : /* If it's a load node, then just read the load permute. */
223 22328 : if (SLP_TREE_DEF_TYPE (root) == vect_internal_def
224 19938 : && !SLP_TREE_PERMUTE_P (root)
225 17122 : && STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root))
226 3641 : && DR_IS_READ (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root))))
227 : {
228 3641 : if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
229 2311 : retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
230 : else
231 1330 : retval = PERM_EVENODD;
232 3641 : perm_cache->put (root, retval);
233 3641 : return retval;
234 : }
235 18687 : else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
236 : {
237 2390 : retval = PERM_TOP;
238 2390 : perm_cache->put (root, retval);
239 2390 : return retval;
240 : }
241 :
242 : complex_perm_kinds_t kind = PERM_TOP;
243 :
244 : slp_tree child;
245 18865 : FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
246 : {
247 18338 : complex_perm_kinds_t res = linear_loads_p (perm_cache, child);
248 18338 : kind = vect_merge_perms (kind, res);
249 : /* Unknown and Top are not valid on blends as they produce no permute. */
250 18338 : retval = kind;
251 18338 : if (kind == PERM_UNKNOWN || kind == PERM_TOP)
252 : return retval;
253 : }
254 :
255 527 : retval = kind;
256 :
257 527 : perm_cache->put (root, retval);
258 527 : return retval;
259 : }
260 :
261 :
262 : /* This function attempts to make a node rooted in NODE is linear. If the node
263 : if already linear than the node itself is returned in RESULT.
264 :
265 : If the node is not linear then a new VEC_PERM_EXPR node is created with a
266 : lane permute that when applied will make the node linear. If such a
267 : permute cannot be created then FALSE is returned from the function.
268 :
269 : Here linearity is defined as having a sequential, monotically increasing
270 : load position inside the load permute generated by the loads reachable from
271 : NODE. */
272 :
273 : static slp_tree
274 0 : vect_build_swap_evenodd_node (slp_tree node)
275 : {
276 : /* Attempt to linearise the permute. */
277 0 : lane_permutation_t zipped;
278 0 : zipped.create (SLP_TREE_LANES (node));
279 :
280 0 : for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2)
281 : {
282 0 : zipped.quick_push (std::make_pair (0, x+1));
283 0 : zipped.quick_push (std::make_pair (0, x));
284 : }
285 :
286 : /* Create the new permute node and store it instead. */
287 0 : slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR);
288 0 : SLP_TREE_LANE_PERMUTATION (vnode) = zipped;
289 0 : SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node);
290 0 : SLP_TREE_CHILDREN (vnode).quick_push (node);
291 0 : SLP_TREE_REF_COUNT (vnode) = 1;
292 0 : SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node);
293 0 : SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node);
294 0 : SLP_TREE_REF_COUNT (node)++;
295 0 : return vnode;
296 : }
297 :
298 : /* Checks to see of the expression represented by NODE is a gimple assign with
299 : code CODE. */
300 :
301 : static inline bool
302 11844841 : vect_match_expression_p (slp_tree node, code_helper code)
303 : {
304 11844841 : if (!node
305 10962460 : || SLP_TREE_PERMUTE_P (node)
306 10904840 : || !SLP_TREE_REPRESENTATIVE (node))
307 : return false;
308 :
309 8469286 : gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
310 8469286 : if (is_gimple_assign (expr)
311 7216901 : && code.is_tree_code ()
312 15677139 : && gimple_assign_rhs_code (expr) == (tree_code) code)
313 : return true;
314 7945607 : if (is_a <gcall *> (expr)
315 91295 : && !code.is_tree_code ()
316 7945655 : && gimple_call_combined_fn (expr) == (combined_fn) code)
317 : return true;
318 :
319 : return false;
320 : }
321 :
322 : /* Check if the given lane permute in PERMUTES matches an alternating sequence
323 : of {even odd even odd ...}. This to account for unrolled loops. Further
324 : mode there resulting permute must be linear. */
325 :
326 : static inline bool
327 7192 : vect_check_evenodd_blend (lane_permutation_t &permutes,
328 : unsigned even, unsigned odd)
329 : {
330 7530 : if (permutes.length () == 0
331 6951 : || permutes.length () % 2 != 0)
332 : return false;
333 :
334 6929 : unsigned val[2] = {even, odd};
335 6929 : unsigned seed = 0;
336 23847 : for (unsigned i = 0; i < permutes.length (); i++)
337 17234 : if (permutes[i].first != val[i % 2]
338 17234 : || permutes[i].second != seed++)
339 : return false;
340 :
341 : return true;
342 : }
343 :
344 : /* This function will match the two gimple expressions representing NODE1 and
345 : NODE2 in parallel and returns the pair operation that represents the two
346 : expressions in the two statements.
347 :
348 : If match is successful then the corresponding complex_operation is
349 : returned and the arguments to the two matched operations are returned in OPS.
350 :
351 : If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select
352 : from the two nodes alternatingly.
353 :
354 : If unsuccessful then CMPLX_NONE is returned and OPS is untouched.
355 :
356 : e.g. the following gimple statements
357 :
358 : stmt 0 _39 = _37 + _12;
359 : stmt 1 _6 = _38 - _36;
360 :
361 : will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}.
362 : */
363 :
364 : static complex_operation_t
365 1530041 : vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
366 : bool two_operands = true, vec<slp_tree> *ops = NULL)
367 : {
368 1530041 : complex_operation_t result = CMPLX_NONE;
369 :
370 1530041 : if (vect_match_expression_p (node1, MINUS_EXPR)
371 46579 : && vect_match_expression_p (node2, PLUS_EXPR)
372 1533286 : && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
373 : result = MINUS_PLUS;
374 1527129 : else if (vect_match_expression_p (node1, PLUS_EXPR)
375 157203 : && vect_match_expression_p (node2, MINUS_EXPR)
376 1531076 : && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
377 : result = PLUS_MINUS;
378 1523428 : else if (vect_match_expression_p (node1, PLUS_EXPR)
379 1523428 : && vect_match_expression_p (node2, PLUS_EXPR))
380 : result = PLUS_PLUS;
381 1520629 : else if (vect_match_expression_p (node1, MULT_EXPR)
382 1520629 : && vect_match_expression_p (node2, MULT_EXPR))
383 4100 : result = MULT_MULT;
384 :
385 1530041 : if (result != CMPLX_NONE && ops != NULL)
386 : {
387 13477 : if (two_operands)
388 : {
389 13477 : auto l0node = SLP_TREE_CHILDREN (node1);
390 13477 : auto l1node = SLP_TREE_CHILDREN (node2);
391 :
392 : /* Check if the tree is connected as we expect it. */
393 20519 : if (!((l0node[0] == l1node[0] && l0node[1] == l1node[1])
394 6647 : || (l0node[0] == l1node[1] && l0node[1] == l1node[0])))
395 1530041 : return CMPLX_NONE;
396 : }
397 6890 : ops->safe_push (node1);
398 6890 : ops->safe_push (node2);
399 : }
400 : return result;
401 : }
402 :
403 : /* Overload of vect_detect_pair_op that matches against the representative
404 : statements in the children of NODE. It is expected that NODE has exactly
405 : two children and when TWO_OPERANDS then NODE must be a VEC_PERM. */
406 :
407 : static complex_operation_t
408 5223772 : vect_detect_pair_op (slp_tree node, bool two_operands = true,
409 : vec<slp_tree> *ops = NULL)
410 : {
411 5223772 : if (!two_operands && SLP_TREE_PERMUTE_P (node))
412 : return CMPLX_NONE;
413 :
414 5223772 : if (SLP_TREE_CHILDREN (node).length () != 2)
415 : return CMPLX_NONE;
416 :
417 1530041 : vec<slp_tree> children = SLP_TREE_CHILDREN (node);
418 1530041 : lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node);
419 :
420 1530041 : return vect_detect_pair_op (children[0], children[1], lanes, two_operands,
421 1530041 : ops);
422 : }
423 :
424 : /*******************************************************************************
425 : * complex_pattern class
426 : ******************************************************************************/
427 :
428 : /* SLP Complex Numbers pattern matching.
429 :
430 : As an example, the following simple loop:
431 :
432 : double a[restrict N]; double b[restrict N]; double c[restrict N];
433 :
434 : for (int i=0; i < N; i+=2)
435 : {
436 : c[i] = a[i] - b[i+1];
437 : c[i+1] = a[i+1] + b[i];
438 : }
439 :
440 : which represents a complex addition on with a rotation of 90* around the
441 : argand plane. i.e. if `a` and `b` were complex numbers then this would be the
442 : same as `a + (b * I)`.
443 :
444 : Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
445 : both recognized in order for the pattern to work. As an SLP tree this is
446 : represented as
447 :
448 : +--------------------------------+
449 : | stmt 0 *_9 = _10; |
450 : | stmt 1 *_15 = _16; |
451 : +--------------------------------+
452 : |
453 : |
454 : v
455 : +--------------------------------+
456 : | stmt 0 _10 = _4 - _8; |
457 : | stmt 1 _16 = _12 + _14; |
458 : | lane permutation { 0[0] 1[1] } |
459 : +--------------------------------+
460 : | |
461 : | |
462 : | |
463 : +-----+ | | +-----+
464 : | | | | | |
465 : +-----| { } |<-----+ +----->| { } --------+
466 : | | | +------------------| | |
467 : | +-----+ | +-----+ |
468 : | | | |
469 : | | | |
470 : | +------|------------------+ |
471 : | | | |
472 : v v v v
473 : +--------------------------+ +--------------------------------+
474 : | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; |
475 : | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; |
476 : | load permutation { 1 0 } | | load permutation { 0 1 } |
477 : +--------------------------+ +--------------------------------+
478 :
479 : The pattern matcher allows you to replace both statements 0 and 1 or none at
480 : all. Because this operation is a two operands operation the actual nodes
481 : being replaced are those in the { } nodes. The actual scalar statements
482 : themselves are not replaced or used during the matching but instead the
483 : SLP_TREE_REPRESENTATIVE statements are inspected. You are also allowed to
484 : replace and match on any number of nodes.
485 :
486 : Because the pattern matcher matches on the representative statement for the
487 : SLP node the case of two_operators it allows you to match the children of the
488 : node. This is done using the method `recognize ()`.
489 :
490 : */
491 :
492 : /* The complex_pattern class contains common code for pattern matchers that work
493 : on complex numbers. These provide functionality to allow de-construction and
494 : validation of sequences depicting/transforming REAL and IMAG pairs. */
495 :
496 : class complex_pattern : public vect_pattern
497 : {
498 : protected:
499 : auto_vec<slp_tree> m_workset;
500 32 : complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
501 64 : : vect_pattern (node, m_ops, ifn)
502 : {
503 32 : this->m_workset.safe_push (*node);
504 32 : }
505 :
506 : public:
507 : void build (vec_info *) override;
508 :
509 : static internal_fn
510 : matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
511 : vec<slp_tree> *);
512 : };
513 :
514 : /* Create a replacement pattern statement for each node in m_node and inserts
515 : the new statement into m_node as the new representative statement. The old
516 : statement is marked as being in a pattern defined by the new statement. The
517 : statement is created as call to internal function IFN with m_num_args
518 : arguments.
519 :
520 : Furthermore the new pattern is also added to the vectorization information
521 : structure VINFO and the old statement STMT_INFO is marked as unused while
522 : the new statement is marked as used and the number of SLP uses of the new
523 : statement is incremented.
524 :
525 : The newly created SLP nodes are marked as SLP only and will be dissolved
526 : if SLP is aborted.
527 :
528 : The newly created gimple call is returned and the BB remains unchanged.
529 :
530 : This default method is designed to only match against simple operands where
531 : all the input and output types are the same.
532 : */
533 :
534 : void
535 32 : complex_pattern::build (vec_info *vinfo)
536 : {
537 32 : stmt_vec_info stmt_info;
538 :
539 32 : auto_vec<tree> args;
540 32 : args.create (this->m_num_args);
541 32 : args.quick_grow_cleared (this->m_num_args);
542 32 : slp_tree node;
543 32 : unsigned ix;
544 32 : stmt_vec_info call_stmt_info;
545 32 : gcall *call_stmt = NULL;
546 :
547 : /* Now modify the nodes themselves. */
548 96 : FOR_EACH_VEC_ELT (this->m_workset, ix, node)
549 : {
550 : /* Calculate the location of the statement in NODE to replace. */
551 32 : stmt_info = SLP_TREE_REPRESENTATIVE (node);
552 32 : gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
553 32 : tree lhs_old_stmt = gimple_get_lhs (old_stmt);
554 32 : tree type = TREE_TYPE (lhs_old_stmt);
555 :
556 : /* Create the argument set for use by gimple_build_call_internal_vec. */
557 112 : for (unsigned i = 0; i < this->m_num_args; i++)
558 80 : args[i] = lhs_old_stmt;
559 :
560 : /* Create the new pattern statements. */
561 32 : call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
562 32 : tree var = make_temp_ssa_name (type, call_stmt, "slp_patt");
563 32 : gimple_call_set_lhs (call_stmt, var);
564 32 : gimple_set_location (call_stmt, gimple_location (old_stmt));
565 32 : gimple_call_set_nothrow (call_stmt, true);
566 :
567 : /* Adjust the book-keeping for the new and old statements for use during
568 : SLP. This is required to get the right VF and statement during SLP
569 : analysis. These changes are created after relevancy has been set for
570 : the nodes as such we need to manually update them. Any changes will be
571 : undone if SLP is cancelled. */
572 32 : call_stmt_info
573 32 : = vinfo->add_pattern_stmt (call_stmt, vect_orig_stmt (stmt_info));
574 :
575 : /* Make sure to mark the representative statement pure_slp and
576 : relevant and transfer reduction info. */
577 32 : STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope;
578 32 : STMT_SLP_TYPE (call_stmt_info) = pure_slp;
579 :
580 32 : gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt));
581 32 : STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node);
582 :
583 : /* Since we are replacing all the statements in the group with the same
584 : thing it doesn't really matter. So just set it every time a new stmt
585 : is created. */
586 32 : SLP_TREE_REPRESENTATIVE (node) = call_stmt_info;
587 32 : SLP_TREE_LANE_PERMUTATION (node).release ();
588 32 : SLP_TREE_CODE (node) = ERROR_MARK;
589 : }
590 32 : }
591 :
592 : /*******************************************************************************
593 : * complex_add_pattern class
594 : ******************************************************************************/
595 :
596 : class complex_add_pattern : public complex_pattern
597 : {
598 : protected:
599 0 : complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
600 0 : : complex_pattern (node, m_ops, ifn)
601 : {
602 0 : this->m_num_args = 2;
603 : }
604 :
605 : public:
606 : void build (vec_info *) final override;
607 : static internal_fn
608 : matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
609 : slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
610 :
611 : static vect_pattern*
612 : recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
613 : slp_tree *);
614 :
615 : static vect_pattern*
616 0 : mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
617 : {
618 0 : return new complex_add_pattern (node, m_ops, ifn);
619 : }
620 : };
621 :
622 : /* Perform a replacement of the detected complex add pattern with the new
623 : instruction sequences. */
624 :
625 : void
626 0 : complex_add_pattern::build (vec_info *vinfo)
627 : {
628 0 : SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
629 :
630 0 : slp_tree node = this->m_ops[0];
631 0 : vec<slp_tree> children = SLP_TREE_CHILDREN (node);
632 :
633 : /* First re-arrange the children. */
634 0 : SLP_TREE_CHILDREN (*this->m_node)[0] = children[0];
635 0 : SLP_TREE_CHILDREN (*this->m_node)[1] =
636 0 : vect_build_swap_evenodd_node (children[1]);
637 :
638 0 : SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++;
639 0 : SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++;
640 0 : vect_free_slp_tree (this->m_ops[0]);
641 0 : vect_free_slp_tree (this->m_ops[1]);
642 :
643 0 : complex_pattern::build (vinfo);
644 0 : }
645 :
646 : /* Pattern matcher for trying to match complex addition pattern in SLP tree.
647 :
648 : If no match is found then IFN is set to IFN_LAST.
649 : This function matches the patterns shaped as:
650 :
651 : c[i] = a[i] - b[i+1];
652 : c[i+1] = a[i+1] + b[i];
653 :
654 : If a match occurred then TRUE is returned, else FALSE. The initial match is
655 : expected to be in OP1 and the initial match operands in args0. */
656 :
657 : internal_fn
658 5209386 : complex_add_pattern::matches (complex_operation_t op,
659 : slp_tree_to_load_perm_map_t *perm_cache,
660 : slp_compat_nodes_map_t * /* compat_cache */,
661 : slp_tree *node, vec<slp_tree> *ops)
662 : {
663 5209386 : internal_fn ifn = IFN_LAST;
664 :
665 : /* Find the two components. Rotation in the complex plane will modify
666 : the operations:
667 :
668 : * Rotation 0: + +
669 : * Rotation 90: - +
670 : * Rotation 180: - -
671 : * Rotation 270: + -
672 :
673 : Rotation 0 and 180 can be handled by normal SIMD code, so we don't need
674 : to care about them here. */
675 5209386 : if (op == MINUS_PLUS)
676 : ifn = IFN_COMPLEX_ADD_ROT90;
677 5206508 : else if (op == PLUS_MINUS)
678 : ifn = IFN_COMPLEX_ADD_ROT270;
679 : else
680 : return ifn;
681 :
682 : /* verify that there is a permute, otherwise this isn't a pattern we
683 : we support. */
684 6556 : gcc_assert (ops->length () == 2);
685 :
686 6556 : vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
687 :
688 : /* First node must be unpermuted. */
689 6556 : if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD)
690 : return IFN_LAST;
691 :
692 : /* Second node must be permuted. */
693 524 : if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN)
694 : return IFN_LAST;
695 :
696 348 : if (!vect_pattern_validate_optab (ifn, *node))
697 : return IFN_LAST;
698 :
699 : return ifn;
700 : }
701 :
702 : /* Attempt to recognize a complex add pattern. */
703 :
704 : vect_pattern*
705 0 : complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
706 : slp_compat_nodes_map_t *compat_cache,
707 : slp_tree *node)
708 : {
709 0 : auto_vec<slp_tree> ops;
710 0 : complex_operation_t op
711 0 : = vect_detect_pair_op (*node, true, &ops);
712 0 : internal_fn ifn
713 0 : = complex_add_pattern::matches (op, perm_cache, compat_cache, node, &ops);
714 0 : if (ifn == IFN_LAST)
715 : return NULL;
716 :
717 0 : return new complex_add_pattern (node, &ops, ifn);
718 0 : }
719 :
720 : /*******************************************************************************
721 : * complex_mul_pattern
722 : ******************************************************************************/
723 :
724 : /* Helper function to check if PERM is KIND or PERM_TOP. */
725 :
726 : static inline bool
727 549 : is_eq_or_top (slp_tree_to_load_perm_map_t *perm_cache,
728 : slp_tree op1, complex_perm_kinds_t kind1,
729 : slp_tree op2, complex_perm_kinds_t kind2)
730 : {
731 549 : complex_perm_kinds_t perm1 = linear_loads_p (perm_cache, op1);
732 549 : if (perm1 != kind1 && perm1 != PERM_TOP)
733 : return false;
734 :
735 189 : complex_perm_kinds_t perm2 = linear_loads_p (perm_cache, op2);
736 189 : if (perm2 != kind2 && perm2 != PERM_TOP)
737 : return false;
738 :
739 : return true;
740 : }
741 :
742 : enum _conj_status { CONJ_NONE, CONJ_FST, CONJ_SND };
743 :
744 : static inline bool
745 393 : compatible_complex_nodes_p (slp_compat_nodes_map_t *compat_cache,
746 : slp_tree a, int *pa, slp_tree b, int *pb)
747 : {
748 393 : bool *tmp;
749 393 : std::pair<slp_tree, slp_tree> key = std::make_pair(a, b);
750 393 : if ((tmp = compat_cache->get (key)) != NULL)
751 27 : return *tmp;
752 :
753 366 : compat_cache->put (key, false);
754 :
755 422 : if (SLP_TREE_CHILDREN (a).length () != SLP_TREE_CHILDREN (b).length ())
756 : return false;
757 :
758 364 : if (SLP_TREE_DEF_TYPE (a) != SLP_TREE_DEF_TYPE (b))
759 : return false;
760 :
761 : /* Only internal nodes can be loads, as such we can't check further if they
762 : are externals. */
763 364 : if (SLP_TREE_DEF_TYPE (a) != vect_internal_def)
764 : {
765 174 : for (unsigned i = 0; i < SLP_TREE_SCALAR_OPS (a).length (); i++)
766 : {
767 122 : tree op1 = SLP_TREE_SCALAR_OPS (a)[pa[i % 2]];
768 122 : tree op2 = SLP_TREE_SCALAR_OPS (b)[pb[i % 2]];
769 122 : if (!operand_equal_p (op1, op2, 0))
770 : return false;
771 : }
772 :
773 52 : compat_cache->put (key, true);
774 52 : return true;
775 : }
776 :
777 310 : auto a_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (a));
778 310 : auto b_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (b));
779 :
780 310 : if (gimple_code (a_stmt) != gimple_code (b_stmt))
781 : return false;
782 :
783 : /* code, children, type, externals, loads, constants */
784 310 : if (gimple_num_args (a_stmt) != gimple_num_args (b_stmt))
785 : return false;
786 :
787 : /* At this point, a and b are known to be the same gimple operations. */
788 310 : if (is_gimple_call (a_stmt))
789 : {
790 0 : if (!compatible_calls_p (dyn_cast <gcall *> (a_stmt),
791 : dyn_cast <gcall *> (b_stmt), false))
792 : return false;
793 : }
794 310 : else if (!is_gimple_assign (a_stmt))
795 : return false;
796 : else
797 : {
798 310 : tree_code acode = gimple_assign_rhs_code (a_stmt);
799 310 : tree_code bcode = gimple_assign_rhs_code (b_stmt);
800 310 : if ((acode == REALPART_EXPR || acode == IMAGPART_EXPR)
801 205 : && (bcode == REALPART_EXPR || bcode == IMAGPART_EXPR))
802 : return true;
803 :
804 105 : if (acode != bcode)
805 : return false;
806 : }
807 :
808 105 : if (!STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a))
809 78 : || !STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b)))
810 : {
811 80 : for (unsigned i = 0; i < gimple_num_args (a_stmt); i++)
812 : {
813 53 : tree t1 = gimple_arg (a_stmt, i);
814 53 : tree t2 = gimple_arg (b_stmt, i);
815 53 : if (TREE_CODE (t1) != TREE_CODE (t2))
816 : return false;
817 :
818 : /* If SSA name then we will need to inspect the children
819 : so we can punt here. */
820 53 : if (TREE_CODE (t1) == SSA_NAME)
821 39 : continue;
822 :
823 14 : if (!operand_equal_p (t1, t2, 0))
824 : return false;
825 : }
826 : }
827 : else
828 : {
829 78 : auto dr1 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a));
830 78 : auto dr2 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b));
831 : /* Don't check the last dimension as that's checked by the lineary
832 : checks. This check is also much stricter than what we need
833 : because it doesn't consider loading from adjacent elements
834 : in the same struct as loading from the same base object.
835 : But for now, I'll play it safe. */
836 78 : if (!same_data_refs (dr1, dr2, 1))
837 : return false;
838 : }
839 :
840 136 : for (unsigned i = 0; i < SLP_TREE_CHILDREN (a).length (); i++)
841 : {
842 53 : if (!compatible_complex_nodes_p (compat_cache,
843 53 : SLP_TREE_CHILDREN (a)[i], pa,
844 53 : SLP_TREE_CHILDREN (b)[i], pb))
845 : return false;
846 : }
847 :
848 83 : compat_cache->put (key, true);
849 83 : return true;
850 : }
851 :
852 :
853 : /* Check to see if the operands to two multiplies, 2 each in LEFT_OP and
854 : RIGHT_OP match a complex multiplication or complex multiply-and-accumulate
855 : or complex multiply-and-subtract pattern. Do this using the permute cache
856 : PERM_CACHE and the combination compatibility list COMPAT_CACHE. If
857 : the operation is successful the matching operands are returned in OPS and
858 : _STATUS indicates if the operation matched includes a conjugate of one of the
859 : operands. If the operation succeeds True is returned, otherwise False and
860 : the values in ops are meaningless. */
861 : static inline bool
862 1612 : vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
863 : slp_compat_nodes_map_t *compat_cache,
864 : const vec<slp_tree> &left_op,
865 : const vec<slp_tree> &right_op,
866 : bool subtract, vec<slp_tree> &ops,
867 : enum _conj_status *_status)
868 : {
869 1612 : enum _conj_status stats = CONJ_NONE;
870 :
871 : /* The complex operations can occur in two layouts and two permute sequences
872 : so declare them and re-use them. */
873 1612 : int styles[][4] = { { 0, 2, 1, 3} /* {L1, R1} + {L2, R2}. */
874 : , { 0, 3, 1, 2} /* {L1, R2} + {L2, R1}. */
875 : };
876 :
877 : /* Now for the corresponding permutes that go with these values. */
878 1612 : complex_perm_kinds_t perms[][4]
879 : = { { PERM_EVENEVEN, PERM_ODDODD, PERM_EVENODD, PERM_ODDEVEN }
880 : , { PERM_EVENODD, PERM_ODDEVEN, PERM_EVENEVEN, PERM_ODDODD }
881 : };
882 :
883 : /* These permutes are used during comparisons of externals on which
884 : we require strict equality. */
885 1612 : int cq[][4][2]
886 : = { { { 0, 0 }, { 1, 1 }, { 0, 1 }, { 1, 0 } }
887 : , { { 0, 1 }, { 1, 0 }, { 0, 0 }, { 1, 1 } }
888 : };
889 :
890 : /* Default to style and perm 0, most operations use this one. */
891 1612 : int style = 0;
892 1612 : int perm = subtract ? 1 : 0;
893 :
894 : /* Check if we have a negate operation, if so absorb the node and continue
895 : looking. */
896 1612 : bool neg0 = vect_match_expression_p (right_op[0], NEGATE_EXPR);
897 1612 : bool neg1 = vect_match_expression_p (right_op[1], NEGATE_EXPR);
898 :
899 : /* Create the combined inputs after remapping and flattening. */
900 1612 : ops.create (4);
901 1612 : ops.safe_splice (left_op);
902 1612 : ops.safe_splice (right_op);
903 :
904 : /* Determine which style we're looking at. We only have different ones
905 : whenever a conjugate is involved. */
906 1612 : if (neg0 && neg1)
907 : ;
908 1612 : else if (neg0)
909 : {
910 1 : ops[2] = SLP_TREE_CHILDREN (right_op[0])[0];
911 1 : stats = CONJ_FST;
912 1 : if (subtract)
913 0 : perm = 0;
914 : }
915 1611 : else if (neg1)
916 : {
917 21 : ops[3] = SLP_TREE_CHILDREN (right_op[1])[0];
918 21 : stats = CONJ_SND;
919 21 : perm = 1;
920 : }
921 :
922 1612 : *_status = stats;
923 :
924 : /* Extract out the elements to check. */
925 1612 : slp_tree op0 = ops[styles[style][0]];
926 1612 : slp_tree op1 = ops[styles[style][1]];
927 1612 : slp_tree op2 = ops[styles[style][2]];
928 1612 : slp_tree op3 = ops[styles[style][3]];
929 :
930 : /* Do cheapest test first. If failed no need to analyze further. */
931 1612 : if (linear_loads_p (perm_cache, op0) != perms[perm][0]
932 628 : || linear_loads_p (perm_cache, op1) != perms[perm][1]
933 2161 : || !is_eq_or_top (perm_cache, op2, perms[perm][2], op3, perms[perm][3]))
934 1423 : return false;
935 :
936 189 : return compatible_complex_nodes_p (compat_cache, op0, cq[perm][0], op1,
937 189 : cq[perm][1])
938 340 : && compatible_complex_nodes_p (compat_cache, op2, cq[perm][2], op3,
939 151 : cq[perm][3]);
940 : }
941 :
942 : /* This function combines two nodes containing only even and only odd lanes
943 : together into a single node which contains the nodes in even/odd order
944 : by using a lane permute.
945 :
946 : The lanes in EVEN and ODD are duplicated 2 times inside the vectors.
947 : So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}.
948 :
949 : The tree REPRESENTATION is taken from the supplied REP along with the
950 : vectype which must be the same between all three nodes.
951 : */
952 :
953 : static slp_tree
954 32 : vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep)
955 : {
956 32 : lane_permutation_t perm;
957 32 : perm.create (SLP_TREE_LANES (rep));
958 :
959 64 : for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2)
960 : {
961 32 : perm.quick_push (std::make_pair (0, x));
962 32 : perm.quick_push (std::make_pair (1, x+1));
963 : }
964 :
965 32 : slp_tree vnode = vect_create_new_slp_node (2, VEC_PERM_EXPR);
966 32 : SLP_TREE_LANE_PERMUTATION (vnode) = perm;
967 :
968 32 : SLP_TREE_CHILDREN (vnode).create (2);
969 32 : SLP_TREE_CHILDREN (vnode).quick_push (even);
970 32 : SLP_TREE_CHILDREN (vnode).quick_push (odd);
971 32 : SLP_TREE_REF_COUNT (even)++;
972 32 : SLP_TREE_REF_COUNT (odd)++;
973 32 : SLP_TREE_REF_COUNT (vnode) = 1;
974 :
975 32 : SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep);
976 64 : gcc_assert (perm.length () == SLP_TREE_LANES (vnode));
977 : /* Representation is set to that of the current node as the vectorizer
978 : can't deal with VEC_PERMs with no representation, as would be the
979 : case with invariants. */
980 32 : SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep);
981 32 : SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep);
982 32 : return vnode;
983 : }
984 :
985 : class complex_mul_pattern : public complex_pattern
986 : {
987 : protected:
988 32 : complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
989 64 : : complex_pattern (node, m_ops, ifn)
990 : {
991 32 : this->m_num_args = 2;
992 : }
993 :
994 : public:
995 : void build (vec_info *) final override;
996 : static internal_fn
997 : matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
998 : slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
999 :
1000 : static vect_pattern*
1001 : recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1002 : slp_tree *);
1003 :
1004 : static vect_pattern*
1005 32 : mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1006 : {
1007 32 : return new complex_mul_pattern (node, m_ops, ifn);
1008 : }
1009 :
1010 : };
1011 :
1012 : /* Pattern matcher for trying to match complex multiply and complex multiply
1013 : and accumulate pattern in SLP tree. If the operation matches then IFN
1014 : is set to the operation it matched and the arguments to the two
1015 : replacement statements are put in m_ops.
1016 :
1017 : If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1018 :
1019 : This function matches the patterns shaped as:
1020 :
1021 : double ax = (b[i+1] * a[i]);
1022 : double bx = (a[i+1] * b[i]);
1023 :
1024 : c[i] = c[i] - ax;
1025 : c[i+1] = c[i+1] + bx;
1026 :
1027 : If a match occurred then TRUE is returned, else FALSE. The initial match is
1028 : expected to be in OP1 and the initial match operands in args0. */
1029 :
1030 : internal_fn
1031 5209418 : complex_mul_pattern::matches (complex_operation_t op,
1032 : slp_tree_to_load_perm_map_t *perm_cache,
1033 : slp_compat_nodes_map_t *compat_cache,
1034 : slp_tree *node, vec<slp_tree> *ops)
1035 : {
1036 5209418 : internal_fn ifn = IFN_LAST;
1037 :
1038 5209418 : if (op != MINUS_PLUS)
1039 : return IFN_LAST;
1040 :
1041 : /* It's only valid to form FMAs and MUL with -ffp-contract=fast. */
1042 2910 : if (flag_fp_contract_mode != FP_CONTRACT_FAST
1043 2910 : && FLOAT_TYPE_P (SLP_TREE_VECTYPE (*node)))
1044 : return IFN_LAST;
1045 :
1046 2878 : auto childs = *ops;
1047 2878 : auto l0node = SLP_TREE_CHILDREN (childs[0]);
1048 :
1049 2878 : bool mul0 = vect_match_expression_p (l0node[0], MULT_EXPR);
1050 2878 : bool mul1 = vect_match_expression_p (l0node[1], MULT_EXPR);
1051 2878 : if (!mul0 && !mul1)
1052 : return IFN_LAST;
1053 :
1054 : /* Now operand2+4 may lead to another expression. */
1055 2011 : auto_vec<slp_tree> left_op, right_op;
1056 2011 : slp_tree add0 = NULL;
1057 :
1058 : /* Check if we may be a multiply add. */
1059 2011 : if (!mul0
1060 2011 : && vect_match_expression_p (l0node[0], PLUS_EXPR))
1061 : {
1062 1082 : auto vals = SLP_TREE_CHILDREN (l0node[0]);
1063 : /* Check if it's a multiply, otherwise no idea what this is. */
1064 1082 : if (!(mul0 = vect_match_expression_p (vals[1], MULT_EXPR)))
1065 2011 : return IFN_LAST;
1066 :
1067 : /* Check if the ADD is linear, otherwise it's not valid complex FMA. */
1068 623 : if (linear_loads_p (perm_cache, vals[0]) != PERM_EVENODD)
1069 : return IFN_LAST;
1070 :
1071 28 : left_op.safe_splice (SLP_TREE_CHILDREN (vals[1]));
1072 28 : add0 = vals[0];
1073 : }
1074 : else
1075 929 : left_op.safe_splice (SLP_TREE_CHILDREN (l0node[0]));
1076 :
1077 957 : right_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
1078 :
1079 957 : if (left_op.length () != 2
1080 845 : || right_op.length () != 2
1081 : || !mul0
1082 844 : || !mul1
1083 1733 : || linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN)
1084 121 : return IFN_LAST;
1085 :
1086 836 : enum _conj_status status;
1087 836 : auto_vec<slp_tree> res_ops;
1088 836 : if (!vect_validate_multiplication (perm_cache, compat_cache, left_op,
1089 : right_op, false, res_ops, &status))
1090 : {
1091 : /* Try swapping the order and re-trying since multiplication is
1092 : commutative. */
1093 742 : std::swap (left_op[0], left_op[1]);
1094 742 : std::swap (right_op[0], right_op[1]);
1095 742 : if (!vect_validate_multiplication (perm_cache, compat_cache, left_op,
1096 : right_op, false, res_ops, &status))
1097 : return IFN_LAST;
1098 : }
1099 :
1100 142 : if (status == CONJ_NONE)
1101 : {
1102 122 : if (add0)
1103 : ifn = IFN_COMPLEX_FMA;
1104 : else
1105 112 : ifn = IFN_COMPLEX_MUL;
1106 : }
1107 : else
1108 : {
1109 20 : if(add0)
1110 : ifn = IFN_COMPLEX_FMA_CONJ;
1111 : else
1112 10 : ifn = IFN_COMPLEX_MUL_CONJ;
1113 : }
1114 :
1115 142 : if (!vect_pattern_validate_optab (ifn, *node))
1116 : return IFN_LAST;
1117 :
1118 32 : ops->truncate (0);
1119 48 : ops->create (add0 ? 4 : 3);
1120 :
1121 32 : if (add0)
1122 16 : ops->quick_push (add0);
1123 :
1124 32 : complex_perm_kinds_t kind = linear_loads_p (perm_cache, res_ops[0]);
1125 32 : if (kind == PERM_EVENODD || kind == PERM_TOP)
1126 : {
1127 16 : ops->quick_push (res_ops[1]);
1128 16 : ops->quick_push (res_ops[3]);
1129 16 : ops->quick_push (res_ops[0]);
1130 : }
1131 16 : else if (kind == PERM_EVENEVEN && status != CONJ_SND)
1132 : {
1133 16 : ops->quick_push (res_ops[0]);
1134 16 : ops->quick_push (res_ops[2]);
1135 16 : ops->quick_push (res_ops[1]);
1136 : }
1137 : else
1138 : {
1139 0 : ops->quick_push (res_ops[0]);
1140 0 : ops->quick_push (res_ops[3]);
1141 0 : ops->quick_push (res_ops[1]);
1142 : }
1143 :
1144 : return ifn;
1145 2847 : }
1146 :
1147 : /* Attempt to recognize a complex mul pattern. */
1148 :
1149 : vect_pattern*
1150 0 : complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1151 : slp_compat_nodes_map_t *compat_cache,
1152 : slp_tree *node)
1153 : {
1154 0 : auto_vec<slp_tree> ops;
1155 0 : complex_operation_t op
1156 0 : = vect_detect_pair_op (*node, true, &ops);
1157 0 : internal_fn ifn
1158 0 : = complex_mul_pattern::matches (op, perm_cache, compat_cache, node, &ops);
1159 0 : if (ifn == IFN_LAST)
1160 : return NULL;
1161 :
1162 0 : return new complex_mul_pattern (node, &ops, ifn);
1163 0 : }
1164 :
1165 : /* Perform a replacement of the detected complex mul pattern with the new
1166 : instruction sequences. */
1167 :
1168 : void
1169 32 : complex_mul_pattern::build (vec_info *vinfo)
1170 : {
1171 32 : slp_tree node;
1172 32 : unsigned i;
1173 32 : switch (this->m_ifn)
1174 : {
1175 16 : case IFN_COMPLEX_MUL:
1176 16 : case IFN_COMPLEX_MUL_CONJ:
1177 16 : {
1178 16 : slp_tree newnode
1179 16 : = vect_build_combine_node (this->m_ops[0], this->m_ops[1],
1180 16 : *this->m_node);
1181 16 : SLP_TREE_REF_COUNT (this->m_ops[2])++;
1182 :
1183 48 : FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1184 32 : vect_free_slp_tree (node);
1185 :
1186 : /* First re-arrange the children. */
1187 16 : SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
1188 16 : SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
1189 16 : SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1190 16 : break;
1191 : }
1192 16 : case IFN_COMPLEX_FMA:
1193 16 : case IFN_COMPLEX_FMA_CONJ:
1194 16 : {
1195 16 : SLP_TREE_REF_COUNT (this->m_ops[0])++;
1196 16 : slp_tree newnode
1197 16 : = vect_build_combine_node (this->m_ops[1], this->m_ops[2],
1198 16 : *this->m_node);
1199 16 : SLP_TREE_REF_COUNT (this->m_ops[3])++;
1200 :
1201 48 : FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1202 32 : vect_free_slp_tree (node);
1203 :
1204 : /* First re-arrange the children. */
1205 16 : SLP_TREE_CHILDREN (*this->m_node).safe_grow (3);
1206 16 : SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[3];
1207 16 : SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1208 16 : SLP_TREE_CHILDREN (*this->m_node)[2] = this->m_ops[0];
1209 :
1210 : /* Tell the builder to expect an extra argument. */
1211 16 : this->m_num_args++;
1212 16 : break;
1213 : }
1214 0 : default:
1215 0 : gcc_unreachable ();
1216 : }
1217 :
1218 : /* And then rewrite the node itself. */
1219 32 : complex_pattern::build (vinfo);
1220 32 : }
1221 :
1222 : /*******************************************************************************
1223 : * complex_fms_pattern class
1224 : ******************************************************************************/
1225 :
1226 : class complex_fms_pattern : public complex_pattern
1227 : {
1228 : protected:
1229 0 : complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1230 0 : : complex_pattern (node, m_ops, ifn)
1231 : {
1232 0 : this->m_num_args = 3;
1233 : }
1234 :
1235 : public:
1236 : void build (vec_info *) final override;
1237 : static internal_fn
1238 : matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
1239 : slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
1240 :
1241 : static vect_pattern*
1242 : recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1243 : slp_tree *);
1244 :
1245 : static vect_pattern*
1246 0 : mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1247 : {
1248 0 : return new complex_fms_pattern (node, m_ops, ifn);
1249 : }
1250 : };
1251 :
1252 :
1253 : /* Pattern matcher for trying to match complex multiply and subtract pattern
1254 : in SLP tree. If the operation matches then IFN is set to the operation
1255 : it matched and the arguments to the two replacement statements are put in
1256 : m_ops.
1257 :
1258 : If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1259 :
1260 : This function matches the patterns shaped as:
1261 :
1262 : double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
1263 : double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
1264 :
1265 : c[i] = c[i] - ax;
1266 : c[i+1] = c[i+1] + bx;
1267 :
1268 : If a match occurred then TRUE is returned, else FALSE. The initial match is
1269 : expected to be in OP1 and the initial match operands in args0. */
1270 :
1271 : internal_fn
1272 5209418 : complex_fms_pattern::matches (complex_operation_t op,
1273 : slp_tree_to_load_perm_map_t *perm_cache,
1274 : slp_compat_nodes_map_t *compat_cache,
1275 : slp_tree * ref_node, vec<slp_tree> *ops)
1276 : {
1277 5209418 : internal_fn ifn = IFN_LAST;
1278 :
1279 : /* We need to ignore the two_operands nodes that may also match,
1280 : for that we can check if they have any scalar statements and also
1281 : check that it's not a permute node as we're looking for a normal
1282 : MINUS_EXPR operation. */
1283 5209418 : if (op != CMPLX_NONE)
1284 : return IFN_LAST;
1285 :
1286 5202528 : slp_tree root = *ref_node;
1287 5202528 : if (!vect_match_expression_p (root, MINUS_EXPR))
1288 : return IFN_LAST;
1289 :
1290 : /* It's only valid to form FMSs with -ffp-contract=fast. */
1291 68029 : if (flag_fp_contract_mode != FP_CONTRACT_FAST
1292 68029 : && FLOAT_TYPE_P (SLP_TREE_VECTYPE (*ref_node)))
1293 : return IFN_LAST;
1294 :
1295 : /* TODO: Support invariants here, with the new layout CADD now
1296 : can match before we get a chance to try CFMS. */
1297 67970 : auto nodes = SLP_TREE_CHILDREN (root);
1298 67970 : if (!vect_match_expression_p (nodes[1], MULT_EXPR)
1299 82324 : || vect_detect_pair_op (nodes[0]) != PLUS_MINUS)
1300 67947 : return IFN_LAST;
1301 :
1302 23 : auto childs = SLP_TREE_CHILDREN (nodes[0]);
1303 23 : auto l0node = SLP_TREE_CHILDREN (childs[0]);
1304 :
1305 : /* Now operand2+4 may lead to another expression. */
1306 23 : auto_vec<slp_tree> left_op, right_op;
1307 23 : left_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
1308 23 : right_op.safe_splice (SLP_TREE_CHILDREN (nodes[1]));
1309 :
1310 : /* If these nodes don't have any children then they're
1311 : not ones we're interested in. */
1312 23 : if (left_op.length () != 2
1313 17 : || right_op.length () != 2
1314 34 : || !vect_match_expression_p (l0node[1], MULT_EXPR))
1315 6 : return IFN_LAST;
1316 :
1317 17 : enum _conj_status status;
1318 17 : auto_vec<slp_tree> res_ops;
1319 17 : if (!vect_validate_multiplication (perm_cache, compat_cache, right_op,
1320 : left_op, true, res_ops, &status))
1321 : {
1322 : /* Try swapping the order and re-trying since multiplication is
1323 : commutative. */
1324 17 : std::swap (left_op[0], left_op[1]);
1325 17 : std::swap (right_op[0], right_op[1]);
1326 17 : auto_vec<slp_tree> res_ops;
1327 17 : if (!vect_validate_multiplication (perm_cache, compat_cache, right_op,
1328 : left_op, true, res_ops, &status))
1329 17 : return IFN_LAST;
1330 17 : }
1331 :
1332 0 : if (status == CONJ_NONE)
1333 : ifn = IFN_COMPLEX_FMS;
1334 : else
1335 0 : ifn = IFN_COMPLEX_FMS_CONJ;
1336 :
1337 0 : if (!vect_pattern_validate_optab (ifn, *ref_node))
1338 : return IFN_LAST;
1339 :
1340 0 : ops->truncate (0);
1341 0 : ops->create (4);
1342 :
1343 0 : complex_perm_kinds_t kind = linear_loads_p (perm_cache, res_ops[2]);
1344 0 : if (kind == PERM_EVENODD)
1345 : {
1346 0 : ops->quick_push (l0node[0]);
1347 0 : ops->quick_push (res_ops[2]);
1348 0 : ops->quick_push (res_ops[3]);
1349 0 : ops->quick_push (res_ops[1]);
1350 : }
1351 : else
1352 : {
1353 0 : ops->quick_push (l0node[0]);
1354 0 : ops->quick_push (res_ops[3]);
1355 0 : ops->quick_push (res_ops[2]);
1356 0 : ops->quick_push (res_ops[0]);
1357 : }
1358 :
1359 : return ifn;
1360 40 : }
1361 :
1362 : /* Attempt to recognize a complex mul pattern. */
1363 :
1364 : vect_pattern*
1365 0 : complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1366 : slp_compat_nodes_map_t *compat_cache,
1367 : slp_tree *node)
1368 : {
1369 0 : auto_vec<slp_tree> ops;
1370 0 : complex_operation_t op
1371 0 : = vect_detect_pair_op (*node, true, &ops);
1372 0 : internal_fn ifn
1373 0 : = complex_fms_pattern::matches (op, perm_cache, compat_cache, node, &ops);
1374 0 : if (ifn == IFN_LAST)
1375 : return NULL;
1376 :
1377 0 : return new complex_fms_pattern (node, &ops, ifn);
1378 0 : }
1379 :
1380 : /* Perform a replacement of the detected complex mul pattern with the new
1381 : instruction sequences. */
1382 :
1383 : void
1384 0 : complex_fms_pattern::build (vec_info *vinfo)
1385 : {
1386 0 : slp_tree node;
1387 0 : unsigned i;
1388 0 : slp_tree newnode =
1389 0 : vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
1390 0 : SLP_TREE_REF_COUNT (this->m_ops[0])++;
1391 0 : SLP_TREE_REF_COUNT (this->m_ops[1])++;
1392 :
1393 0 : FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1394 0 : vect_free_slp_tree (node);
1395 :
1396 0 : SLP_TREE_CHILDREN (*this->m_node).release ();
1397 0 : SLP_TREE_CHILDREN (*this->m_node).create (3);
1398 :
1399 : /* First re-arrange the children. */
1400 0 : SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
1401 0 : SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
1402 0 : SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
1403 :
1404 : /* And then rewrite the node itself. */
1405 0 : complex_pattern::build (vinfo);
1406 0 : }
1407 :
1408 : /*******************************************************************************
1409 : * complex_operations_pattern class
1410 : ******************************************************************************/
1411 :
1412 : /* This function combines all the existing pattern matchers above into one class
1413 : that shares the functionality between them. The initial match is shared
1414 : between all complex operations. */
1415 :
1416 : class complex_operations_pattern : public complex_pattern
1417 : {
1418 : protected:
1419 : complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops,
1420 : internal_fn ifn)
1421 : : complex_pattern (node, m_ops, ifn)
1422 : {
1423 : this->m_num_args = 0;
1424 : }
1425 :
1426 : public:
1427 : void build (vec_info *) final override;
1428 : static internal_fn
1429 : matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
1430 : slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
1431 :
1432 : static vect_pattern*
1433 : recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1434 : slp_tree *);
1435 : };
1436 :
1437 : /* Dummy matches implementation for proxy object. */
1438 :
1439 : internal_fn
1440 0 : complex_operations_pattern::
1441 : matches (complex_operation_t /* op */,
1442 : slp_tree_to_load_perm_map_t * /* perm_cache */,
1443 : slp_compat_nodes_map_t * /* compat_cache */,
1444 : slp_tree * /* ref_node */, vec<slp_tree> * /* ops */)
1445 : {
1446 0 : return IFN_LAST;
1447 : }
1448 :
1449 : /* Attempt to recognize a complex mul pattern. */
1450 :
1451 : vect_pattern*
1452 5209418 : complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1453 : slp_compat_nodes_map_t *ccache,
1454 : slp_tree *node)
1455 : {
1456 5209418 : auto_vec<slp_tree> ops;
1457 5209418 : complex_operation_t op
1458 5209418 : = vect_detect_pair_op (*node, true, &ops);
1459 5209418 : internal_fn ifn = IFN_LAST;
1460 :
1461 5209418 : ifn = complex_fms_pattern::matches (op, perm_cache, ccache, node, &ops);
1462 5209418 : if (ifn != IFN_LAST)
1463 0 : return complex_fms_pattern::mkInstance (node, &ops, ifn);
1464 :
1465 5209418 : ifn = complex_mul_pattern::matches (op, perm_cache, ccache, node, &ops);
1466 5209418 : if (ifn != IFN_LAST)
1467 32 : return complex_mul_pattern::mkInstance (node, &ops, ifn);
1468 :
1469 5209386 : ifn = complex_add_pattern::matches (op, perm_cache, ccache, node, &ops);
1470 5209386 : if (ifn != IFN_LAST)
1471 0 : return complex_add_pattern::mkInstance (node, &ops, ifn);
1472 :
1473 : return NULL;
1474 5209418 : }
1475 :
1476 : /* Dummy implementation of build. */
1477 :
1478 : void
1479 0 : complex_operations_pattern::build (vec_info * /* vinfo */)
1480 : {
1481 0 : gcc_unreachable ();
1482 : }
1483 :
1484 :
1485 : /* The addsub_pattern. */
1486 :
1487 : class addsub_pattern : public vect_pattern
1488 : {
1489 : public:
1490 1075 : addsub_pattern (slp_tree *node, internal_fn ifn)
1491 1075 : : vect_pattern (node, NULL, ifn) {};
1492 :
1493 : void build (vec_info *) final override;
1494 :
1495 : static vect_pattern*
1496 : recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1497 : slp_tree *);
1498 : };
1499 :
1500 : vect_pattern *
1501 5209418 : addsub_pattern::recognize (slp_tree_to_load_perm_map_t *,
1502 : slp_compat_nodes_map_t *, slp_tree *node_)
1503 : {
1504 5209418 : slp_tree node = *node_;
1505 5209418 : if (!SLP_TREE_PERMUTE_P (node)
1506 24185 : || SLP_TREE_CHILDREN (node).length () != 2
1507 5231517 : || SLP_TREE_LANE_PERMUTATION (node).length () % 2)
1508 : return NULL;
1509 :
1510 : /* Match a blend of a plus and a minus op with the same number of plus and
1511 : minus lanes on the same operands. */
1512 17152 : unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1513 17152 : unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1514 17152 : if (l0 == l1)
1515 : return NULL;
1516 14263 : bool fma_p = false;
1517 14263 : bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0],
1518 14263 : PLUS_EXPR);
1519 14263 : if (!l0add_p
1520 14263 : && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR))
1521 : {
1522 6422 : l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], CFN_FMA);
1523 6422 : if (!l0add_p
1524 6422 : && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], CFN_FMS))
1525 6420 : return NULL;
1526 : fma_p = true;
1527 : }
1528 7843 : bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1],
1529 7843 : PLUS_EXPR);
1530 7843 : if (l1add_p && fma_p)
1531 : return NULL;
1532 7843 : if (!l1add_p
1533 7843 : && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR))
1534 : {
1535 720 : if (!fma_p)
1536 : return NULL;
1537 2 : l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], CFN_FMA);
1538 2 : if (!l1add_p
1539 2 : && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], CFN_FMS))
1540 0 : return NULL;
1541 : }
1542 7123 : else if (!l1add_p && fma_p)
1543 : return NULL;
1544 :
1545 7125 : slp_tree l0node = SLP_TREE_CHILDREN (node)[l0];
1546 7125 : slp_tree l1node = SLP_TREE_CHILDREN (node)[l1];
1547 7125 : if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0]
1548 6719 : && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1])
1549 426 : || (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1]
1550 0 : && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0])))
1551 : return NULL;
1552 :
1553 23328 : for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i)
1554 : {
1555 16770 : std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i];
1556 : /* It has to be alternating -, +, -,
1557 : While we could permute the .ADDSUB inputs and the .ADDSUB output
1558 : that's only profitable over the add + sub + blend if at least
1559 : one of the permute is optimized which we can't determine here. */
1560 25199 : if (perm.first != ((i & 1) ? l1 : l0)
1561 16678 : || perm.second != i)
1562 5208343 : return NULL;
1563 : }
1564 :
1565 : /* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... }
1566 : (l0add_p), see whether we have FMA variants. We can only form FMAs
1567 : if allowed via -ffp-contract=fast or if they were FMA before. */
1568 6558 : if (!fma_p
1569 6556 : && flag_fp_contract_mode != FP_CONTRACT_FAST
1570 6591 : && FLOAT_TYPE_P (SLP_TREE_VECTYPE (l0node)))
1571 : ;
1572 6525 : else if (!l0add_p
1573 6525 : && (fma_p
1574 2846 : || vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0],
1575 2846 : MULT_EXPR)))
1576 : {
1577 : /* (c * d) -+ a */
1578 847 : if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node))
1579 37 : return new addsub_pattern (node_, IFN_VEC_FMADDSUB);
1580 : }
1581 5678 : else if (l0add_p
1582 5678 : && (fma_p
1583 5676 : || vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0],
1584 3677 : MULT_EXPR)))
1585 : {
1586 : /* (c * d) +- a */
1587 554 : if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node))
1588 23 : return new addsub_pattern (node_, IFN_VEC_FMSUBADD);
1589 : }
1590 :
1591 6498 : if (!fma_p && !l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
1592 1015 : return new addsub_pattern (node_, IFN_VEC_ADDSUB);
1593 :
1594 : return NULL;
1595 : }
1596 :
1597 : void
1598 1075 : addsub_pattern::build (vec_info *vinfo)
1599 : {
1600 1075 : slp_tree node = *m_node;
1601 :
1602 1075 : unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1603 1075 : unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1604 :
1605 1075 : switch (m_ifn)
1606 : {
1607 1015 : case IFN_VEC_ADDSUB:
1608 1015 : {
1609 1015 : slp_tree sub = SLP_TREE_CHILDREN (node)[l0];
1610 1015 : slp_tree add = SLP_TREE_CHILDREN (node)[l1];
1611 :
1612 : /* Modify the blend node in-place. */
1613 1015 : SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
1614 1015 : SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
1615 1015 : SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1616 1015 : SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1617 :
1618 : /* Build IFN_VEC_ADDSUB from the sub representative operands. */
1619 1015 : stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
1620 1015 : gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
1621 : gimple_assign_rhs1 (rep->stmt),
1622 1015 : gimple_assign_rhs2 (rep->stmt));
1623 1015 : gimple_call_set_lhs (call, make_ssa_name
1624 1015 : (TREE_TYPE (gimple_assign_lhs (rep->stmt))));
1625 1015 : gimple_call_set_nothrow (call, true);
1626 1015 : gimple_set_bb (call, gimple_bb (rep->stmt));
1627 1015 : stmt_vec_info new_rep
1628 1015 : = vinfo->add_pattern_stmt (call, vect_orig_stmt (rep));
1629 1015 : SLP_TREE_REPRESENTATIVE (node) = new_rep;
1630 1015 : STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1631 1015 : STMT_SLP_TYPE (new_rep) = pure_slp;
1632 1015 : STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1633 1015 : SLP_TREE_CODE (node) = ERROR_MARK;
1634 1015 : SLP_TREE_LANE_PERMUTATION (node).release ();
1635 :
1636 1015 : vect_free_slp_tree (sub);
1637 1015 : vect_free_slp_tree (add);
1638 1015 : break;
1639 : }
1640 60 : case IFN_VEC_FMADDSUB:
1641 60 : case IFN_VEC_FMSUBADD:
1642 60 : {
1643 60 : slp_tree sub, add;
1644 60 : if (m_ifn == IFN_VEC_FMADDSUB)
1645 : {
1646 37 : sub = SLP_TREE_CHILDREN (node)[l0];
1647 37 : add = SLP_TREE_CHILDREN (node)[l1];
1648 : }
1649 : else /* m_ifn == IFN_VEC_FMSUBADD */
1650 : {
1651 23 : sub = SLP_TREE_CHILDREN (node)[l1];
1652 23 : add = SLP_TREE_CHILDREN (node)[l0];
1653 : }
1654 : /* Modify the blend node in-place. */
1655 60 : SLP_TREE_CHILDREN (node).safe_grow (3, true);
1656 60 : gcall *call;
1657 60 : stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub);
1658 60 : if (vect_match_expression_p (add, CFN_FMA))
1659 : {
1660 2 : SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (add)[0];
1661 2 : SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (add)[1];
1662 2 : SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (add)[2];
1663 : /* Build IFN_VEC_FMADDSUB from the fms representative
1664 : operands. */
1665 2 : call = gimple_build_call_internal (m_ifn, 3,
1666 : gimple_call_arg (srep->stmt, 0),
1667 : gimple_call_arg (srep->stmt, 1),
1668 2 : gimple_call_arg (srep->stmt, 2));
1669 : }
1670 : else
1671 : {
1672 58 : slp_tree mul = SLP_TREE_CHILDREN (sub)[0];
1673 58 : SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0];
1674 58 : SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1];
1675 58 : SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1];
1676 : /* Build IFN_VEC_FMADDSUB from the mul/sub representative
1677 : operands. */
1678 58 : stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul);
1679 58 : call = gimple_build_call_internal (m_ifn, 3,
1680 : gimple_assign_rhs1 (mrep->stmt),
1681 58 : gimple_assign_rhs2 (mrep->stmt),
1682 58 : gimple_assign_rhs2 (srep->stmt));
1683 : }
1684 60 : SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1685 60 : SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1686 60 : SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++;
1687 :
1688 60 : gimple_call_set_lhs (call, make_ssa_name
1689 60 : (TREE_TYPE (gimple_get_lhs (srep->stmt))));
1690 60 : gimple_call_set_nothrow (call, true);
1691 60 : gimple_set_bb (call, gimple_bb (srep->stmt));
1692 60 : stmt_vec_info new_rep
1693 60 : = vinfo->add_pattern_stmt (call, vect_orig_stmt (srep));
1694 60 : SLP_TREE_REPRESENTATIVE (node) = new_rep;
1695 60 : STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1696 60 : STMT_SLP_TYPE (new_rep) = pure_slp;
1697 60 : STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1698 60 : SLP_TREE_CODE (node) = ERROR_MARK;
1699 60 : SLP_TREE_LANE_PERMUTATION (node).release ();
1700 :
1701 60 : vect_free_slp_tree (sub);
1702 60 : vect_free_slp_tree (add);
1703 60 : break;
1704 : }
1705 1075 : default:;
1706 : }
1707 1075 : }
1708 :
1709 : /*******************************************************************************
1710 : * Pattern matching definitions
1711 : ******************************************************************************/
1712 :
1713 : #define SLP_PATTERN(x) &x::recognize
1714 : vect_pattern_decl_t slp_patterns[]
1715 : {
1716 : /* For least amount of back-tracking and more efficient matching
1717 : order patterns from the largest to the smallest. Especially if they
1718 : overlap in what they can detect. */
1719 :
1720 : SLP_PATTERN (complex_operations_pattern),
1721 : SLP_PATTERN (addsub_pattern)
1722 : };
1723 : #undef SLP_PATTERN
1724 :
1725 : /* Set the number of SLP pattern matchers available. */
1726 : size_t num__slp_patterns = ARRAY_SIZE (slp_patterns);
|