Line data Source code
1 : /* SLP - Pattern matcher on SLP trees
2 : Copyright (C) 2020-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "target.h"
25 : #include "rtl.h"
26 : #include "tree.h"
27 : #include "gimple.h"
28 : #include "tree-pass.h"
29 : #include "ssa.h"
30 : #include "optabs-tree.h"
31 : #include "insn-config.h"
32 : #include "recog.h" /* FIXME: for insn_data */
33 : #include "fold-const.h"
34 : #include "stor-layout.h"
35 : #include "gimple-iterator.h"
36 : #include "cfgloop.h"
37 : #include "tree-vectorizer.h"
38 : #include "langhooks.h"
39 : #include "gimple-walk.h"
40 : #include "dbgcnt.h"
41 : #include "tree-vector-builder.h"
42 : #include "vec-perm-indices.h"
43 : #include "gimple-fold.h"
44 : #include "internal-fn.h"
45 :
46 : /* SLP Pattern matching mechanism.
47 :
48 : This extension to the SLP vectorizer allows one to transform the generated SLP
49 : tree based on any pattern. The difference between this and the normal vect
50 : pattern matcher is that unlike the former, this matcher allows you to match
51 : with instructions that do not belong to the same SSA dominator graph.
52 :
53 : The only requirement that this pattern matcher has is that you are only
54 : only allowed to either match an entire group or none.
55 :
56 : The pattern matcher currently only allows you to perform replacements to
57 : internal functions.
58 :
59 : Once the patterns are matched it is one way, these cannot be undone. It is
60 : currently not supported to match patterns recursively.
61 :
62 : To add a new pattern, implement the vect_pattern class and add the type to
63 : slp_patterns.
64 :
65 : */
66 :
67 : /*******************************************************************************
68 : * vect_pattern class
69 : ******************************************************************************/
70 :
71 : /* Default implementation of recognize that performs matching, validation and
72 : replacement of nodes but that can be overriden if required. */
73 :
74 : static bool
75 4654 : vect_pattern_validate_optab (internal_fn ifn, slp_tree node)
76 : {
77 4654 : tree vectype = SLP_TREE_VECTYPE (node);
78 4654 : if (ifn == IFN_LAST || !vectype)
79 : return false;
80 :
81 4654 : if (dump_enabled_p ())
82 695 : dump_printf_loc (MSG_NOTE, vect_location,
83 : "Found %s pattern in SLP tree\n",
84 : internal_fn_name (ifn));
85 :
86 4654 : if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
87 : {
88 1081 : if (dump_enabled_p ())
89 14 : dump_printf_loc (MSG_NOTE, vect_location,
90 : "Target supports %s vectorization with mode %T\n",
91 : internal_fn_name (ifn), vectype);
92 : }
93 : else
94 : {
95 3573 : if (dump_enabled_p ())
96 : {
97 681 : if (!vectype)
98 : dump_printf_loc (MSG_NOTE, vect_location,
99 : "Target does not support vector type for %G\n",
100 : STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node)));
101 : else
102 681 : dump_printf_loc (MSG_NOTE, vect_location,
103 : "Target does not support %s for vector type "
104 : "%T\n", internal_fn_name (ifn), vectype);
105 : }
106 3573 : return false;
107 : }
108 : return true;
109 : }
110 :
111 : /*******************************************************************************
112 : * General helper types
113 : ******************************************************************************/
114 :
115 : /* The COMPLEX_OPERATION enum denotes the possible pair of operations that can
116 : be matched when looking for expressions that we are interested matching for
117 : complex numbers addition and mla. */
118 :
119 : typedef enum _complex_operation : unsigned {
120 : PLUS_PLUS,
121 : MINUS_PLUS,
122 : PLUS_MINUS,
123 : MULT_MULT,
124 : CMPLX_NONE
125 : } complex_operation_t;
126 :
127 : /*******************************************************************************
128 : * General helper functions
129 : ******************************************************************************/
130 :
131 : /* Helper function of linear_loads_p that checks to see if the load permutation
132 : is sequential and in monotonically increasing order of loads with no gaps.
133 : */
134 :
135 : static inline complex_perm_kinds_t
136 2030 : is_linear_load_p (load_permutation_t loads)
137 : {
138 2093 : if (loads.length() == 0)
139 : return PERM_UNKNOWN;
140 :
141 2030 : unsigned load, i;
142 2030 : complex_perm_kinds_t candidates[4]
143 : = { PERM_ODDODD
144 : , PERM_EVENEVEN
145 : , PERM_EVENODD
146 : , PERM_ODDEVEN
147 : };
148 :
149 2030 : int valid_patterns = 4;
150 7549 : FOR_EACH_VEC_ELT (loads, i, load)
151 : {
152 5582 : unsigned adj_load = load % 2;
153 5582 : if (candidates[0] != PERM_UNKNOWN && adj_load != 1)
154 : {
155 1744 : candidates[0] = PERM_UNKNOWN;
156 1744 : valid_patterns--;
157 : }
158 5582 : if (candidates[1] != PERM_UNKNOWN && adj_load != 0)
159 : {
160 1095 : candidates[1] = PERM_UNKNOWN;
161 1095 : valid_patterns--;
162 : }
163 5582 : if (candidates[2] != PERM_UNKNOWN && load != i)
164 : {
165 1995 : candidates[2] = PERM_UNKNOWN;
166 1995 : valid_patterns--;
167 : }
168 5582 : if (candidates[3] != PERM_UNKNOWN
169 4502 : && load != (i % 2 == 0 ? i + 1 : i - 1))
170 : {
171 1319 : candidates[3] = PERM_UNKNOWN;
172 1319 : valid_patterns--;
173 : }
174 :
175 5582 : if (valid_patterns == 0)
176 : return PERM_UNKNOWN;
177 : }
178 :
179 3138 : for (i = 0; i < sizeof(candidates); i++)
180 5105 : if (candidates[i] != PERM_UNKNOWN)
181 : return candidates[i];
182 :
183 : return PERM_UNKNOWN;
184 : }
185 :
186 : /* Combine complex_perm_kinds A and B into a new permute kind that describes the
187 : resulting operation. */
188 :
189 : static inline complex_perm_kinds_t
190 15684 : vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
191 : {
192 15684 : if (a == b)
193 : return a;
194 :
195 13290 : if (a == PERM_TOP)
196 : return b;
197 :
198 1803 : if (b == PERM_TOP)
199 : return a;
200 :
201 : return PERM_UNKNOWN;
202 : }
203 :
204 : /* Check to see if all loads rooted in ROOT are linear. Linearity is
205 : defined as having no gaps between values loaded. */
206 :
207 : static complex_perm_kinds_t
208 26168 : linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
209 : {
210 26168 : if (!root)
211 : return PERM_UNKNOWN;
212 :
213 26163 : unsigned i;
214 26163 : complex_perm_kinds_t *tmp;
215 :
216 26163 : if ((tmp = perm_cache->get (root)) != NULL)
217 6820 : return *tmp;
218 :
219 19343 : complex_perm_kinds_t retval = PERM_UNKNOWN;
220 19343 : perm_cache->put (root, retval);
221 :
222 : /* If it's a load node, then just read the load permute. */
223 19343 : if (SLP_TREE_DEF_TYPE (root) == vect_internal_def
224 17033 : && !SLP_TREE_PERMUTE_P (root)
225 14948 : && STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root))
226 3222 : && DR_IS_READ (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root))))
227 : {
228 3222 : if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
229 2030 : retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
230 : else
231 1192 : retval = PERM_EVENODD;
232 3222 : perm_cache->put (root, retval);
233 3222 : return retval;
234 : }
235 16121 : else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
236 : {
237 2310 : retval = PERM_TOP;
238 2310 : perm_cache->put (root, retval);
239 2310 : return retval;
240 : }
241 :
242 : complex_perm_kinds_t kind = PERM_TOP;
243 :
244 : slp_tree child;
245 16161 : FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
246 : {
247 15684 : complex_perm_kinds_t res = linear_loads_p (perm_cache, child);
248 15684 : kind = vect_merge_perms (kind, res);
249 : /* Unknown and Top are not valid on blends as they produce no permute. */
250 15684 : retval = kind;
251 15684 : if (kind == PERM_UNKNOWN || kind == PERM_TOP)
252 : return retval;
253 : }
254 :
255 477 : retval = kind;
256 :
257 477 : perm_cache->put (root, retval);
258 477 : return retval;
259 : }
260 :
261 :
262 : /* This function attempts to make a node rooted in NODE is linear. If the node
263 : if already linear than the node itself is returned in RESULT.
264 :
265 : If the node is not linear then a new VEC_PERM_EXPR node is created with a
266 : lane permute that when applied will make the node linear. If such a
267 : permute cannot be created then FALSE is returned from the function.
268 :
269 : Here linearity is defined as having a sequential, monotically increasing
270 : load position inside the load permute generated by the loads reachable from
271 : NODE. */
272 :
273 : static slp_tree
274 0 : vect_build_swap_evenodd_node (slp_tree node)
275 : {
276 : /* Attempt to linearise the permute. */
277 0 : vec<std::pair<unsigned, unsigned> > zipped;
278 0 : zipped.create (SLP_TREE_LANES (node));
279 :
280 0 : for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2)
281 : {
282 0 : zipped.quick_push (std::make_pair (0, x+1));
283 0 : zipped.quick_push (std::make_pair (0, x));
284 : }
285 :
286 : /* Create the new permute node and store it instead. */
287 0 : slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR);
288 0 : SLP_TREE_LANE_PERMUTATION (vnode) = zipped;
289 0 : SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node);
290 0 : SLP_TREE_CHILDREN (vnode).quick_push (node);
291 0 : SLP_TREE_REF_COUNT (vnode) = 1;
292 0 : SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node);
293 0 : SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node);
294 0 : SLP_TREE_REF_COUNT (node)++;
295 0 : return vnode;
296 : }
297 :
298 : /* Checks to see of the expression represented by NODE is a gimple assign with
299 : code CODE. */
300 :
301 : static inline bool
302 10583926 : vect_match_expression_p (slp_tree node, code_helper code)
303 : {
304 10583926 : if (!node
305 9798063 : || !SLP_TREE_REPRESENTATIVE (node))
306 : return false;
307 :
308 7452928 : gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
309 7452928 : if (is_gimple_assign (expr)
310 6385538 : && code.is_tree_code ()
311 13832528 : && gimple_assign_rhs_code (expr) == (tree_code) code)
312 : return true;
313 6991448 : if (is_a <gcall *> (expr)
314 62716 : && !code.is_tree_code ()
315 6991496 : && gimple_call_combined_fn (expr) == (combined_fn) code)
316 : return true;
317 :
318 : return false;
319 : }
320 :
321 : /* Check if the given lane permute in PERMUTES matches an alternating sequence
322 : of {even odd even odd ...}. This to account for unrolled loops. Further
323 : mode there resulting permute must be linear. */
324 :
325 : static inline bool
326 6735 : vect_check_evenodd_blend (lane_permutation_t &permutes,
327 : unsigned even, unsigned odd)
328 : {
329 7116 : if (permutes.length () == 0
330 6192 : || permutes.length () % 2 != 0)
331 : return false;
332 :
333 6152 : unsigned val[2] = {even, odd};
334 6152 : unsigned seed = 0;
335 21421 : for (unsigned i = 0; i < permutes.length (); i++)
336 15610 : if (permutes[i].first != val[i % 2]
337 15610 : || permutes[i].second != seed++)
338 : return false;
339 :
340 : return true;
341 : }
342 :
343 : /* This function will match the two gimple expressions representing NODE1 and
344 : NODE2 in parallel and returns the pair operation that represents the two
345 : expressions in the two statements.
346 :
347 : If match is successful then the corresponding complex_operation is
348 : returned and the arguments to the two matched operations are returned in OPS.
349 :
350 : If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select
351 : from the two nodes alternatingly.
352 :
353 : If unsuccessful then CMPLX_NONE is returned and OPS is untouched.
354 :
355 : e.g. the following gimple statements
356 :
357 : stmt 0 _39 = _37 + _12;
358 : stmt 1 _6 = _38 - _36;
359 :
360 : will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}.
361 : */
362 :
363 : static complex_operation_t
364 1350572 : vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
365 : bool two_operands = true, vec<slp_tree> *ops = NULL)
366 : {
367 1350572 : complex_operation_t result = CMPLX_NONE;
368 :
369 1350572 : if (vect_match_expression_p (node1, MINUS_EXPR)
370 43200 : && vect_match_expression_p (node2, PLUS_EXPR)
371 1353813 : && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
372 : result = MINUS_PLUS;
373 1347652 : else if (vect_match_expression_p (node1, PLUS_EXPR)
374 139058 : && vect_match_expression_p (node2, MINUS_EXPR)
375 1351146 : && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
376 : result = PLUS_MINUS;
377 1344761 : else if (vect_match_expression_p (node1, PLUS_EXPR)
378 1344761 : && vect_match_expression_p (node2, PLUS_EXPR))
379 : result = PLUS_PLUS;
380 1340531 : else if (vect_match_expression_p (node1, MULT_EXPR)
381 1340531 : && vect_match_expression_p (node2, MULT_EXPR))
382 3686 : result = MULT_MULT;
383 :
384 1350572 : if (result != CMPLX_NONE && ops != NULL)
385 : {
386 13698 : if (two_operands)
387 : {
388 13698 : auto l0node = SLP_TREE_CHILDREN (node1);
389 13698 : auto l1node = SLP_TREE_CHILDREN (node2);
390 :
391 : /* Check if the tree is connected as we expect it. */
392 19834 : if (!((l0node[0] == l1node[0] && l0node[1] == l1node[1])
393 7688 : || (l0node[0] == l1node[1] && l0node[1] == l1node[0])))
394 1350572 : return CMPLX_NONE;
395 : }
396 6034 : ops->safe_push (node1);
397 6034 : ops->safe_push (node2);
398 : }
399 : return result;
400 : }
401 :
402 : /* Overload of vect_detect_pair_op that matches against the representative
403 : statements in the children of NODE. It is expected that NODE has exactly
404 : two children and when TWO_OPERANDS then NODE must be a VEC_PERM. */
405 :
406 : static complex_operation_t
407 4745332 : vect_detect_pair_op (slp_tree node, bool two_operands = true,
408 : vec<slp_tree> *ops = NULL)
409 : {
410 4745332 : if (!two_operands && SLP_TREE_PERMUTE_P (node))
411 : return CMPLX_NONE;
412 :
413 4745332 : if (SLP_TREE_CHILDREN (node).length () != 2)
414 : return CMPLX_NONE;
415 :
416 1350572 : vec<slp_tree> children = SLP_TREE_CHILDREN (node);
417 1350572 : lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node);
418 :
419 1350572 : return vect_detect_pair_op (children[0], children[1], lanes, two_operands,
420 1350572 : ops);
421 : }
422 :
423 : /*******************************************************************************
424 : * complex_pattern class
425 : ******************************************************************************/
426 :
427 : /* SLP Complex Numbers pattern matching.
428 :
429 : As an example, the following simple loop:
430 :
431 : double a[restrict N]; double b[restrict N]; double c[restrict N];
432 :
433 : for (int i=0; i < N; i+=2)
434 : {
435 : c[i] = a[i] - b[i+1];
436 : c[i+1] = a[i+1] + b[i];
437 : }
438 :
439 : which represents a complex addition on with a rotation of 90* around the
440 : argand plane. i.e. if `a` and `b` were complex numbers then this would be the
441 : same as `a + (b * I)`.
442 :
443 : Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
444 : both recognized in order for the pattern to work. As an SLP tree this is
445 : represented as
446 :
447 : +--------------------------------+
448 : | stmt 0 *_9 = _10; |
449 : | stmt 1 *_15 = _16; |
450 : +--------------------------------+
451 : |
452 : |
453 : v
454 : +--------------------------------+
455 : | stmt 0 _10 = _4 - _8; |
456 : | stmt 1 _16 = _12 + _14; |
457 : | lane permutation { 0[0] 1[1] } |
458 : +--------------------------------+
459 : | |
460 : | |
461 : | |
462 : +-----+ | | +-----+
463 : | | | | | |
464 : +-----| { } |<-----+ +----->| { } --------+
465 : | | | +------------------| | |
466 : | +-----+ | +-----+ |
467 : | | | |
468 : | | | |
469 : | +------|------------------+ |
470 : | | | |
471 : v v v v
472 : +--------------------------+ +--------------------------------+
473 : | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; |
474 : | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; |
475 : | load permutation { 1 0 } | | load permutation { 0 1 } |
476 : +--------------------------+ +--------------------------------+
477 :
478 : The pattern matcher allows you to replace both statements 0 and 1 or none at
479 : all. Because this operation is a two operands operation the actual nodes
480 : being replaced are those in the { } nodes. The actual scalar statements
481 : themselves are not replaced or used during the matching but instead the
482 : SLP_TREE_REPRESENTATIVE statements are inspected. You are also allowed to
483 : replace and match on any number of nodes.
484 :
485 : Because the pattern matcher matches on the representative statement for the
486 : SLP node the case of two_operators it allows you to match the children of the
487 : node. This is done using the method `recognize ()`.
488 :
489 : */
490 :
491 : /* The complex_pattern class contains common code for pattern matchers that work
492 : on complex numbers. These provide functionality to allow de-construction and
493 : validation of sequences depicting/transforming REAL and IMAG pairs. */
494 :
495 : class complex_pattern : public vect_pattern
496 : {
497 : protected:
498 : auto_vec<slp_tree> m_workset;
499 20 : complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
500 40 : : vect_pattern (node, m_ops, ifn)
501 : {
502 20 : this->m_workset.safe_push (*node);
503 20 : }
504 :
505 : public:
506 : void build (vec_info *) override;
507 :
508 : static internal_fn
509 : matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
510 : vec<slp_tree> *);
511 : };
512 :
513 : /* Create a replacement pattern statement for each node in m_node and inserts
514 : the new statement into m_node as the new representative statement. The old
515 : statement is marked as being in a pattern defined by the new statement. The
516 : statement is created as call to internal function IFN with m_num_args
517 : arguments.
518 :
519 : Futhermore the new pattern is also added to the vectorization information
520 : structure VINFO and the old statement STMT_INFO is marked as unused while
521 : the new statement is marked as used and the number of SLP uses of the new
522 : statement is incremented.
523 :
524 : The newly created SLP nodes are marked as SLP only and will be dissolved
525 : if SLP is aborted.
526 :
527 : The newly created gimple call is returned and the BB remains unchanged.
528 :
529 : This default method is designed to only match against simple operands where
530 : all the input and output types are the same.
531 : */
532 :
533 : void
534 20 : complex_pattern::build (vec_info *vinfo)
535 : {
536 20 : stmt_vec_info stmt_info;
537 :
538 20 : auto_vec<tree> args;
539 20 : args.create (this->m_num_args);
540 20 : args.quick_grow_cleared (this->m_num_args);
541 20 : slp_tree node;
542 20 : unsigned ix;
543 20 : stmt_vec_info call_stmt_info;
544 20 : gcall *call_stmt = NULL;
545 :
546 : /* Now modify the nodes themselves. */
547 60 : FOR_EACH_VEC_ELT (this->m_workset, ix, node)
548 : {
549 : /* Calculate the location of the statement in NODE to replace. */
550 20 : stmt_info = SLP_TREE_REPRESENTATIVE (node);
551 20 : stmt_vec_info reduc_def
552 20 : = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
553 20 : gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
554 20 : tree lhs_old_stmt = gimple_get_lhs (old_stmt);
555 20 : tree type = TREE_TYPE (lhs_old_stmt);
556 :
557 : /* Create the argument set for use by gimple_build_call_internal_vec. */
558 70 : for (unsigned i = 0; i < this->m_num_args; i++)
559 50 : args[i] = lhs_old_stmt;
560 :
561 : /* Create the new pattern statements. */
562 20 : call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
563 20 : tree var = make_temp_ssa_name (type, call_stmt, "slp_patt");
564 20 : gimple_call_set_lhs (call_stmt, var);
565 20 : gimple_set_location (call_stmt, gimple_location (old_stmt));
566 20 : gimple_call_set_nothrow (call_stmt, true);
567 :
568 : /* Adjust the book-keeping for the new and old statements for use during
569 : SLP. This is required to get the right VF and statement during SLP
570 : analysis. These changes are created after relevancy has been set for
571 : the nodes as such we need to manually update them. Any changes will be
572 : undone if SLP is cancelled. */
573 20 : call_stmt_info
574 20 : = vinfo->add_pattern_stmt (call_stmt, stmt_info);
575 :
576 : /* Make sure to mark the representative statement pure_slp and
577 : relevant and transfer reduction info. */
578 20 : STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope;
579 20 : STMT_SLP_TYPE (call_stmt_info) = pure_slp;
580 20 : STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def;
581 :
582 20 : gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt));
583 20 : STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node);
584 20 : STMT_VINFO_SLP_VECT_ONLY_PATTERN (call_stmt_info) = true;
585 :
586 : /* Since we are replacing all the statements in the group with the same
587 : thing it doesn't really matter. So just set it every time a new stmt
588 : is created. */
589 20 : SLP_TREE_REPRESENTATIVE (node) = call_stmt_info;
590 20 : SLP_TREE_LANE_PERMUTATION (node).release ();
591 20 : SLP_TREE_CODE (node) = CALL_EXPR;
592 : }
593 20 : }
594 :
595 : /*******************************************************************************
596 : * complex_add_pattern class
597 : ******************************************************************************/
598 :
599 : class complex_add_pattern : public complex_pattern
600 : {
601 : protected:
602 0 : complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
603 0 : : complex_pattern (node, m_ops, ifn)
604 : {
605 0 : this->m_num_args = 2;
606 : }
607 :
608 : public:
609 : void build (vec_info *) final override;
610 : static internal_fn
611 : matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
612 : slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
613 :
614 : static vect_pattern*
615 : recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
616 : slp_tree *);
617 :
618 : static vect_pattern*
619 0 : mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
620 : {
621 0 : return new complex_add_pattern (node, m_ops, ifn);
622 : }
623 : };
624 :
625 : /* Perform a replacement of the detected complex add pattern with the new
626 : instruction sequences. */
627 :
628 : void
629 0 : complex_add_pattern::build (vec_info *vinfo)
630 : {
631 0 : SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
632 :
633 0 : slp_tree node = this->m_ops[0];
634 0 : vec<slp_tree> children = SLP_TREE_CHILDREN (node);
635 :
636 : /* First re-arrange the children. */
637 0 : SLP_TREE_CHILDREN (*this->m_node)[0] = children[0];
638 0 : SLP_TREE_CHILDREN (*this->m_node)[1] =
639 0 : vect_build_swap_evenodd_node (children[1]);
640 :
641 0 : SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++;
642 0 : SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++;
643 0 : vect_free_slp_tree (this->m_ops[0]);
644 0 : vect_free_slp_tree (this->m_ops[1]);
645 :
646 0 : complex_pattern::build (vinfo);
647 0 : }
648 :
649 : /* Pattern matcher for trying to match complex addition pattern in SLP tree.
650 :
651 : If no match is found then IFN is set to IFN_LAST.
652 : This function matches the patterns shaped as:
653 :
654 : c[i] = a[i] - b[i+1];
655 : c[i+1] = a[i+1] + b[i];
656 :
657 : If a match occurred then TRUE is returned, else FALSE. The initial match is
658 : expected to be in OP1 and the initial match operands in args0. */
659 :
660 : internal_fn
661 4734606 : complex_add_pattern::matches (complex_operation_t op,
662 : slp_tree_to_load_perm_map_t *perm_cache,
663 : slp_compat_nodes_map_t * /* compat_cache */,
664 : slp_tree *node, vec<slp_tree> *ops)
665 : {
666 4734606 : internal_fn ifn = IFN_LAST;
667 :
668 : /* Find the two components. Rotation in the complex plane will modify
669 : the operations:
670 :
671 : * Rotation 0: + +
672 : * Rotation 90: - +
673 : * Rotation 180: - -
674 : * Rotation 270: + -
675 :
676 : Rotation 0 and 180 can be handled by normal SIMD code, so we don't need
677 : to care about them here. */
678 4734606 : if (op == MINUS_PLUS)
679 : ifn = IFN_COMPLEX_ADD_ROT90;
680 4731708 : else if (op == PLUS_MINUS)
681 : ifn = IFN_COMPLEX_ADD_ROT270;
682 : else
683 : return ifn;
684 :
685 : /* verify that there is a permute, otherwise this isn't a pattern we
686 : we support. */
687 5770 : gcc_assert (ops->length () == 2);
688 :
689 5770 : vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
690 :
691 : /* First node must be unpermuted. */
692 5770 : if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD)
693 : return IFN_LAST;
694 :
695 : /* Second node must be permuted. */
696 489 : if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN)
697 : return IFN_LAST;
698 :
699 335 : if (!vect_pattern_validate_optab (ifn, *node))
700 : return IFN_LAST;
701 :
702 : return ifn;
703 : }
704 :
705 : /* Attempt to recognize a complex add pattern. */
706 :
707 : vect_pattern*
708 0 : complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
709 : slp_compat_nodes_map_t *compat_cache,
710 : slp_tree *node)
711 : {
712 0 : auto_vec<slp_tree> ops;
713 0 : complex_operation_t op
714 0 : = vect_detect_pair_op (*node, true, &ops);
715 0 : internal_fn ifn
716 0 : = complex_add_pattern::matches (op, perm_cache, compat_cache, node, &ops);
717 0 : if (ifn == IFN_LAST)
718 : return NULL;
719 :
720 0 : return new complex_add_pattern (node, &ops, ifn);
721 0 : }
722 :
723 : /*******************************************************************************
724 : * complex_mul_pattern
725 : ******************************************************************************/
726 :
727 : /* Helper function to check if PERM is KIND or PERM_TOP. */
728 :
729 : static inline bool
730 533 : is_eq_or_top (slp_tree_to_load_perm_map_t *perm_cache,
731 : slp_tree op1, complex_perm_kinds_t kind1,
732 : slp_tree op2, complex_perm_kinds_t kind2)
733 : {
734 533 : complex_perm_kinds_t perm1 = linear_loads_p (perm_cache, op1);
735 533 : if (perm1 != kind1 && perm1 != PERM_TOP)
736 : return false;
737 :
738 173 : complex_perm_kinds_t perm2 = linear_loads_p (perm_cache, op2);
739 173 : if (perm2 != kind2 && perm2 != PERM_TOP)
740 : return false;
741 :
742 : return true;
743 : }
744 :
745 : enum _conj_status { CONJ_NONE, CONJ_FST, CONJ_SND };
746 :
747 : static inline bool
748 369 : compatible_complex_nodes_p (slp_compat_nodes_map_t *compat_cache,
749 : slp_tree a, int *pa, slp_tree b, int *pb)
750 : {
751 369 : bool *tmp;
752 369 : std::pair<slp_tree, slp_tree> key = std::make_pair(a, b);
753 369 : if ((tmp = compat_cache->get (key)) != NULL)
754 27 : return *tmp;
755 :
756 342 : compat_cache->put (key, false);
757 :
758 406 : if (SLP_TREE_CHILDREN (a).length () != SLP_TREE_CHILDREN (b).length ())
759 : return false;
760 :
761 340 : if (SLP_TREE_DEF_TYPE (a) != SLP_TREE_DEF_TYPE (b))
762 : return false;
763 :
764 : /* Only internal nodes can be loads, as such we can't check further if they
765 : are externals. */
766 340 : if (SLP_TREE_DEF_TYPE (a) != vect_internal_def)
767 : {
768 188 : for (unsigned i = 0; i < SLP_TREE_SCALAR_OPS (a).length (); i++)
769 : {
770 130 : tree op1 = SLP_TREE_SCALAR_OPS (a)[pa[i % 2]];
771 130 : tree op2 = SLP_TREE_SCALAR_OPS (b)[pb[i % 2]];
772 130 : if (!operand_equal_p (op1, op2, 0))
773 : return false;
774 : }
775 :
776 58 : compat_cache->put (key, true);
777 58 : return true;
778 : }
779 :
780 280 : auto a_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (a));
781 280 : auto b_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (b));
782 :
783 280 : if (gimple_code (a_stmt) != gimple_code (b_stmt))
784 : return false;
785 :
786 : /* code, children, type, externals, loads, constants */
787 280 : if (gimple_num_args (a_stmt) != gimple_num_args (b_stmt))
788 : return false;
789 :
790 : /* At this point, a and b are known to be the same gimple operations. */
791 280 : if (is_gimple_call (a_stmt))
792 : {
793 0 : if (!compatible_calls_p (dyn_cast <gcall *> (a_stmt),
794 : dyn_cast <gcall *> (b_stmt), false))
795 : return false;
796 : }
797 280 : else if (!is_gimple_assign (a_stmt))
798 : return false;
799 : else
800 : {
801 280 : tree_code acode = gimple_assign_rhs_code (a_stmt);
802 280 : tree_code bcode = gimple_assign_rhs_code (b_stmt);
803 280 : if ((acode == REALPART_EXPR || acode == IMAGPART_EXPR)
804 171 : && (bcode == REALPART_EXPR || bcode == IMAGPART_EXPR))
805 : return true;
806 :
807 109 : if (acode != bcode)
808 : return false;
809 : }
810 :
811 109 : if (!STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a))
812 78 : || !STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b)))
813 : {
814 92 : for (unsigned i = 0; i < gimple_num_args (a_stmt); i++)
815 : {
816 61 : tree t1 = gimple_arg (a_stmt, i);
817 61 : tree t2 = gimple_arg (b_stmt, i);
818 61 : if (TREE_CODE (t1) != TREE_CODE (t2))
819 : return false;
820 :
821 : /* If SSA name then we will need to inspect the children
822 : so we can punt here. */
823 61 : if (TREE_CODE (t1) == SSA_NAME)
824 43 : continue;
825 :
826 18 : if (!operand_equal_p (t1, t2, 0))
827 : return false;
828 : }
829 : }
830 : else
831 : {
832 78 : auto dr1 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a));
833 78 : auto dr2 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b));
834 : /* Don't check the last dimension as that's checked by the lineary
835 : checks. This check is also much stricter than what we need
836 : because it doesn't consider loading from adjacent elements
837 : in the same struct as loading from the same base object.
838 : But for now, I'll play it safe. */
839 78 : if (!same_data_refs (dr1, dr2, 1))
840 : return false;
841 : }
842 :
843 148 : for (unsigned i = 0; i < SLP_TREE_CHILDREN (a).length (); i++)
844 : {
845 61 : if (!compatible_complex_nodes_p (compat_cache,
846 61 : SLP_TREE_CHILDREN (a)[i], pa,
847 61 : SLP_TREE_CHILDREN (b)[i], pb))
848 : return false;
849 : }
850 :
851 87 : compat_cache->put (key, true);
852 87 : return true;
853 : }
854 :
855 :
856 : /* Check to see if the oprands to two multiplies, 2 each in LEFT_OP and
857 : RIGHT_OP match a complex multiplication or complex multiply-and-accumulate
858 : or complex multiply-and-subtract pattern. Do this using the permute cache
859 : PERM_CACHE and the combination compatibility list COMPAT_CACHE. If
860 : the operation is successful the macthing operands are returned in OPS and
861 : _STATUS indicates if the operation matched includes a conjugate of one of the
862 : operands. If the operation succeeds True is returned, otherwise False and
863 : the values in ops are meaningless. */
864 : static inline bool
865 1500 : vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
866 : slp_compat_nodes_map_t *compat_cache,
867 : const vec<slp_tree> &left_op,
868 : const vec<slp_tree> &right_op,
869 : bool subtract, vec<slp_tree> &ops,
870 : enum _conj_status *_status)
871 : {
872 1500 : enum _conj_status stats = CONJ_NONE;
873 :
874 : /* The complex operations can occur in two layouts and two permute sequences
875 : so declare them and re-use them. */
876 1500 : int styles[][4] = { { 0, 2, 1, 3} /* {L1, R1} + {L2, R2}. */
877 : , { 0, 3, 1, 2} /* {L1, R2} + {L2, R1}. */
878 : };
879 :
880 : /* Now for the corresponding permutes that go with these values. */
881 1500 : complex_perm_kinds_t perms[][4]
882 : = { { PERM_EVENEVEN, PERM_ODDODD, PERM_EVENODD, PERM_ODDEVEN }
883 : , { PERM_EVENODD, PERM_ODDEVEN, PERM_EVENEVEN, PERM_ODDODD }
884 : };
885 :
886 : /* These permutes are used during comparisons of externals on which
887 : we require strict equality. */
888 1500 : int cq[][4][2]
889 : = { { { 0, 0 }, { 1, 1 }, { 0, 1 }, { 1, 0 } }
890 : , { { 0, 1 }, { 1, 0 }, { 0, 0 }, { 1, 1 } }
891 : };
892 :
893 : /* Default to style and perm 0, most operations use this one. */
894 1500 : int style = 0;
895 1500 : int perm = subtract ? 1 : 0;
896 :
897 : /* Check if we have a negate operation, if so absorb the node and continue
898 : looking. */
899 1500 : bool neg0 = vect_match_expression_p (right_op[0], NEGATE_EXPR);
900 1500 : bool neg1 = vect_match_expression_p (right_op[1], NEGATE_EXPR);
901 :
902 : /* Create the combined inputs after remapping and flattening. */
903 1500 : ops.create (4);
904 1500 : ops.safe_splice (left_op);
905 1500 : ops.safe_splice (right_op);
906 :
907 : /* Determine which style we're looking at. We only have different ones
908 : whenever a conjugate is involved. */
909 1500 : if (neg0 && neg1)
910 : ;
911 1500 : else if (neg0)
912 : {
913 0 : ops[2] = SLP_TREE_CHILDREN (right_op[0])[0];
914 0 : stats = CONJ_FST;
915 0 : if (subtract)
916 0 : perm = 0;
917 : }
918 1500 : else if (neg1)
919 : {
920 10 : ops[3] = SLP_TREE_CHILDREN (right_op[1])[0];
921 10 : stats = CONJ_SND;
922 10 : perm = 1;
923 : }
924 :
925 1500 : *_status = stats;
926 :
927 : /* Extract out the elements to check. */
928 1500 : slp_tree op0 = ops[styles[style][0]];
929 1500 : slp_tree op1 = ops[styles[style][1]];
930 1500 : slp_tree op2 = ops[styles[style][2]];
931 1500 : slp_tree op3 = ops[styles[style][3]];
932 :
933 : /* Do cheapest test first. If failed no need to analyze further. */
934 1500 : if (linear_loads_p (perm_cache, op0) != perms[perm][0]
935 589 : || linear_loads_p (perm_cache, op1) != perms[perm][1]
936 2033 : || !is_eq_or_top (perm_cache, op2, perms[perm][2], op3, perms[perm][3]))
937 1327 : return false;
938 :
939 173 : return compatible_complex_nodes_p (compat_cache, op0, cq[perm][0], op1,
940 173 : cq[perm][1])
941 308 : && compatible_complex_nodes_p (compat_cache, op2, cq[perm][2], op3,
942 135 : cq[perm][3]);
943 : }
944 :
945 : /* This function combines two nodes containing only even and only odd lanes
946 : together into a single node which contains the nodes in even/odd order
947 : by using a lane permute.
948 :
949 : The lanes in EVEN and ODD are duplicated 2 times inside the vectors.
950 : So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}.
951 :
952 : The tree REPRESENTATION is taken from the supplied REP along with the
953 : vectype which must be the same between all three nodes.
954 : */
955 :
956 : static slp_tree
957 20 : vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep)
958 : {
959 20 : vec<std::pair<unsigned, unsigned> > perm;
960 20 : perm.create (SLP_TREE_LANES (rep));
961 :
962 40 : for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2)
963 : {
964 20 : perm.quick_push (std::make_pair (0, x));
965 20 : perm.quick_push (std::make_pair (1, x+1));
966 : }
967 :
968 20 : slp_tree vnode = vect_create_new_slp_node (2, SLP_TREE_CODE (even));
969 20 : SLP_TREE_CODE (vnode) = VEC_PERM_EXPR;
970 20 : SLP_TREE_LANE_PERMUTATION (vnode) = perm;
971 :
972 20 : SLP_TREE_CHILDREN (vnode).create (2);
973 20 : SLP_TREE_CHILDREN (vnode).quick_push (even);
974 20 : SLP_TREE_CHILDREN (vnode).quick_push (odd);
975 20 : SLP_TREE_REF_COUNT (even)++;
976 20 : SLP_TREE_REF_COUNT (odd)++;
977 20 : SLP_TREE_REF_COUNT (vnode) = 1;
978 :
979 20 : SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep);
980 40 : gcc_assert (perm.length () == SLP_TREE_LANES (vnode));
981 : /* Representation is set to that of the current node as the vectorizer
982 : can't deal with VEC_PERMs with no representation, as would be the
983 : case with invariants. */
984 20 : SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep);
985 20 : SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep);
986 20 : return vnode;
987 : }
988 :
989 : class complex_mul_pattern : public complex_pattern
990 : {
991 : protected:
992 20 : complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
993 40 : : complex_pattern (node, m_ops, ifn)
994 : {
995 20 : this->m_num_args = 2;
996 : }
997 :
998 : public:
999 : void build (vec_info *) final override;
1000 : static internal_fn
1001 : matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
1002 : slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
1003 :
1004 : static vect_pattern*
1005 : recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1006 : slp_tree *);
1007 :
1008 : static vect_pattern*
1009 20 : mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1010 : {
1011 20 : return new complex_mul_pattern (node, m_ops, ifn);
1012 : }
1013 :
1014 : };
1015 :
1016 : /* Pattern matcher for trying to match complex multiply and complex multiply
1017 : and accumulate pattern in SLP tree. If the operation matches then IFN
1018 : is set to the operation it matched and the arguments to the two
1019 : replacement statements are put in m_ops.
1020 :
1021 : If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1022 :
1023 : This function matches the patterns shaped as:
1024 :
1025 : double ax = (b[i+1] * a[i]);
1026 : double bx = (a[i+1] * b[i]);
1027 :
1028 : c[i] = c[i] - ax;
1029 : c[i+1] = c[i+1] + bx;
1030 :
1031 : If a match occurred then TRUE is returned, else FALSE. The initial match is
1032 : expected to be in OP1 and the initial match operands in args0. */
1033 :
1034 : internal_fn
1035 4734626 : complex_mul_pattern::matches (complex_operation_t op,
1036 : slp_tree_to_load_perm_map_t *perm_cache,
1037 : slp_compat_nodes_map_t *compat_cache,
1038 : slp_tree *node, vec<slp_tree> *ops)
1039 : {
1040 4734626 : internal_fn ifn = IFN_LAST;
1041 :
1042 4734626 : if (op != MINUS_PLUS)
1043 : return IFN_LAST;
1044 :
1045 2918 : auto childs = *ops;
1046 2918 : auto l0node = SLP_TREE_CHILDREN (childs[0]);
1047 :
1048 2918 : bool mul0 = vect_match_expression_p (l0node[0], MULT_EXPR);
1049 2918 : bool mul1 = vect_match_expression_p (l0node[1], MULT_EXPR);
1050 2918 : if (!mul0 && !mul1)
1051 : return IFN_LAST;
1052 :
1053 : /* Now operand2+4 may lead to another expression. */
1054 2016 : auto_vec<slp_tree> left_op, right_op;
1055 2016 : slp_tree add0 = NULL;
1056 :
1057 : /* Check if we may be a multiply add. It's only valid to form FMAs
1058 : with -ffp-contract=fast. */
1059 2016 : if (!mul0
1060 1204 : && (flag_fp_contract_mode == FP_CONTRACT_FAST
1061 3 : || !FLOAT_TYPE_P (SLP_TREE_VECTYPE (*node)))
1062 3217 : && vect_match_expression_p (l0node[0], PLUS_EXPR))
1063 : {
1064 1150 : auto vals = SLP_TREE_CHILDREN (l0node[0]);
1065 : /* Check if it's a multiply, otherwise no idea what this is. */
1066 1150 : if (!(mul0 = vect_match_expression_p (vals[1], MULT_EXPR)))
1067 2016 : return IFN_LAST;
1068 :
1069 : /* Check if the ADD is linear, otherwise it's not valid complex FMA. */
1070 633 : if (linear_loads_p (perm_cache, vals[0]) != PERM_EVENODD)
1071 : return IFN_LAST;
1072 :
1073 18 : left_op.safe_splice (SLP_TREE_CHILDREN (vals[1]));
1074 18 : add0 = vals[0];
1075 : }
1076 : else
1077 866 : left_op.safe_splice (SLP_TREE_CHILDREN (l0node[0]));
1078 :
1079 884 : right_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
1080 :
1081 884 : if (left_op.length () != 2
1082 780 : || right_op.length () != 2
1083 : || !mul0
1084 779 : || !mul1
1085 1607 : || linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN)
1086 107 : return IFN_LAST;
1087 :
1088 777 : enum _conj_status status;
1089 777 : auto_vec<slp_tree> res_ops;
1090 777 : if (!vect_validate_multiplication (perm_cache, compat_cache, left_op,
1091 : right_op, false, res_ops, &status))
1092 : {
1093 : /* Try swapping the order and re-trying since multiplication is
1094 : commutative. */
1095 697 : std::swap (left_op[0], left_op[1]);
1096 697 : std::swap (right_op[0], right_op[1]);
1097 697 : if (!vect_validate_multiplication (perm_cache, compat_cache, left_op,
1098 : right_op, false, res_ops, &status))
1099 : return IFN_LAST;
1100 : }
1101 :
1102 126 : if (status == CONJ_NONE)
1103 : {
1104 116 : if (add0)
1105 : ifn = IFN_COMPLEX_FMA;
1106 : else
1107 111 : ifn = IFN_COMPLEX_MUL;
1108 : }
1109 : else
1110 : {
1111 10 : if(add0)
1112 : ifn = IFN_COMPLEX_FMA_CONJ;
1113 : else
1114 5 : ifn = IFN_COMPLEX_MUL_CONJ;
1115 : }
1116 :
1117 126 : if (!vect_pattern_validate_optab (ifn, *node))
1118 : return IFN_LAST;
1119 :
1120 20 : ops->truncate (0);
1121 30 : ops->create (add0 ? 4 : 3);
1122 :
1123 20 : if (add0)
1124 10 : ops->quick_push (add0);
1125 :
1126 20 : complex_perm_kinds_t kind = linear_loads_p (perm_cache, res_ops[0]);
1127 20 : if (kind == PERM_EVENODD || kind == PERM_TOP)
1128 : {
1129 10 : ops->quick_push (res_ops[1]);
1130 10 : ops->quick_push (res_ops[3]);
1131 10 : ops->quick_push (res_ops[0]);
1132 : }
1133 10 : else if (kind == PERM_EVENEVEN && status != CONJ_SND)
1134 : {
1135 10 : ops->quick_push (res_ops[0]);
1136 10 : ops->quick_push (res_ops[2]);
1137 10 : ops->quick_push (res_ops[1]);
1138 : }
1139 : else
1140 : {
1141 0 : ops->quick_push (res_ops[0]);
1142 0 : ops->quick_push (res_ops[3]);
1143 0 : ops->quick_push (res_ops[1]);
1144 : }
1145 :
1146 : return ifn;
1147 2793 : }
1148 :
1149 : /* Attempt to recognize a complex mul pattern. */
1150 :
1151 : vect_pattern*
1152 0 : complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1153 : slp_compat_nodes_map_t *compat_cache,
1154 : slp_tree *node)
1155 : {
1156 0 : auto_vec<slp_tree> ops;
1157 0 : complex_operation_t op
1158 0 : = vect_detect_pair_op (*node, true, &ops);
1159 0 : internal_fn ifn
1160 0 : = complex_mul_pattern::matches (op, perm_cache, compat_cache, node, &ops);
1161 0 : if (ifn == IFN_LAST)
1162 : return NULL;
1163 :
1164 0 : return new complex_mul_pattern (node, &ops, ifn);
1165 0 : }
1166 :
1167 : /* Perform a replacement of the detected complex mul pattern with the new
1168 : instruction sequences. */
1169 :
1170 : void
1171 20 : complex_mul_pattern::build (vec_info *vinfo)
1172 : {
1173 20 : slp_tree node;
1174 20 : unsigned i;
1175 20 : switch (this->m_ifn)
1176 : {
1177 10 : case IFN_COMPLEX_MUL:
1178 10 : case IFN_COMPLEX_MUL_CONJ:
1179 10 : {
1180 10 : slp_tree newnode
1181 10 : = vect_build_combine_node (this->m_ops[0], this->m_ops[1],
1182 10 : *this->m_node);
1183 10 : SLP_TREE_REF_COUNT (this->m_ops[2])++;
1184 :
1185 30 : FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1186 20 : vect_free_slp_tree (node);
1187 :
1188 : /* First re-arrange the children. */
1189 10 : SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
1190 10 : SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
1191 10 : SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1192 10 : break;
1193 : }
1194 10 : case IFN_COMPLEX_FMA:
1195 10 : case IFN_COMPLEX_FMA_CONJ:
1196 10 : {
1197 10 : SLP_TREE_REF_COUNT (this->m_ops[0])++;
1198 10 : slp_tree newnode
1199 10 : = vect_build_combine_node (this->m_ops[1], this->m_ops[2],
1200 10 : *this->m_node);
1201 10 : SLP_TREE_REF_COUNT (this->m_ops[3])++;
1202 :
1203 30 : FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1204 20 : vect_free_slp_tree (node);
1205 :
1206 : /* First re-arrange the children. */
1207 10 : SLP_TREE_CHILDREN (*this->m_node).safe_grow (3);
1208 10 : SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[3];
1209 10 : SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1210 10 : SLP_TREE_CHILDREN (*this->m_node)[2] = this->m_ops[0];
1211 :
1212 : /* Tell the builder to expect an extra argument. */
1213 10 : this->m_num_args++;
1214 10 : break;
1215 : }
1216 0 : default:
1217 0 : gcc_unreachable ();
1218 : }
1219 :
1220 : /* And then rewrite the node itself. */
1221 20 : complex_pattern::build (vinfo);
1222 20 : }
1223 :
1224 : /*******************************************************************************
1225 : * complex_fms_pattern class
1226 : ******************************************************************************/
1227 :
1228 : class complex_fms_pattern : public complex_pattern
1229 : {
1230 : protected:
1231 0 : complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1232 0 : : complex_pattern (node, m_ops, ifn)
1233 : {
1234 0 : this->m_num_args = 3;
1235 : }
1236 :
1237 : public:
1238 : void build (vec_info *) final override;
1239 : static internal_fn
1240 : matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
1241 : slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
1242 :
1243 : static vect_pattern*
1244 : recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1245 : slp_tree *);
1246 :
1247 : static vect_pattern*
1248 0 : mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1249 : {
1250 0 : return new complex_fms_pattern (node, m_ops, ifn);
1251 : }
1252 : };
1253 :
1254 :
1255 : /* Pattern matcher for trying to match complex multiply and subtract pattern
1256 : in SLP tree. If the operation matches then IFN is set to the operation
1257 : it matched and the arguments to the two replacement statements are put in
1258 : m_ops.
1259 :
1260 : If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1261 :
1262 : This function matches the patterns shaped as:
1263 :
1264 : double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
1265 : double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
1266 :
1267 : c[i] = c[i] - ax;
1268 : c[i+1] = c[i+1] + bx;
1269 :
1270 : If a match occurred then TRUE is returned, else FALSE. The initial match is
1271 : expected to be in OP1 and the initial match operands in args0. */
1272 :
1273 : internal_fn
1274 4734626 : complex_fms_pattern::matches (complex_operation_t op,
1275 : slp_tree_to_load_perm_map_t *perm_cache,
1276 : slp_compat_nodes_map_t *compat_cache,
1277 : slp_tree * ref_node, vec<slp_tree> *ops)
1278 : {
1279 4734626 : internal_fn ifn = IFN_LAST;
1280 :
1281 : /* We need to ignore the two_operands nodes that may also match,
1282 : for that we can check if they have any scalar statements and also
1283 : check that it's not a permute node as we're looking for a normal
1284 : MINUS_EXPR operation. */
1285 4734626 : if (op != CMPLX_NONE)
1286 : return IFN_LAST;
1287 :
1288 4728592 : slp_tree root = *ref_node;
1289 4728592 : if (!vect_match_expression_p (root, MINUS_EXPR))
1290 : return IFN_LAST;
1291 :
1292 : /* TODO: Support invariants here, with the new layout CADD now
1293 : can match before we get a chance to try CFMS. */
1294 59441 : auto nodes = SLP_TREE_CHILDREN (root);
1295 59441 : if (!vect_match_expression_p (nodes[1], MULT_EXPR)
1296 70147 : || vect_detect_pair_op (nodes[0]) != PLUS_MINUS)
1297 59422 : return IFN_LAST;
1298 :
1299 19 : auto childs = SLP_TREE_CHILDREN (nodes[0]);
1300 19 : auto l0node = SLP_TREE_CHILDREN (childs[0]);
1301 :
1302 : /* Now operand2+4 may lead to another expression. */
1303 19 : auto_vec<slp_tree> left_op, right_op;
1304 19 : left_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
1305 19 : right_op.safe_splice (SLP_TREE_CHILDREN (nodes[1]));
1306 :
1307 : /* If these nodes don't have any children then they're
1308 : not ones we're interested in. */
1309 19 : if (left_op.length () != 2
1310 13 : || right_op.length () != 2
1311 26 : || !vect_match_expression_p (l0node[1], MULT_EXPR))
1312 6 : return IFN_LAST;
1313 :
1314 13 : enum _conj_status status;
1315 13 : auto_vec<slp_tree> res_ops;
1316 13 : if (!vect_validate_multiplication (perm_cache, compat_cache, right_op,
1317 : left_op, true, res_ops, &status))
1318 : {
1319 : /* Try swapping the order and re-trying since multiplication is
1320 : commutative. */
1321 13 : std::swap (left_op[0], left_op[1]);
1322 13 : std::swap (right_op[0], right_op[1]);
1323 13 : auto_vec<slp_tree> res_ops;
1324 13 : if (!vect_validate_multiplication (perm_cache, compat_cache, right_op,
1325 : left_op, true, res_ops, &status))
1326 13 : return IFN_LAST;
1327 13 : }
1328 :
1329 0 : if (status == CONJ_NONE)
1330 : ifn = IFN_COMPLEX_FMS;
1331 : else
1332 0 : ifn = IFN_COMPLEX_FMS_CONJ;
1333 :
1334 0 : if (!vect_pattern_validate_optab (ifn, *ref_node))
1335 : return IFN_LAST;
1336 :
1337 0 : ops->truncate (0);
1338 0 : ops->create (4);
1339 :
1340 0 : complex_perm_kinds_t kind = linear_loads_p (perm_cache, res_ops[2]);
1341 0 : if (kind == PERM_EVENODD)
1342 : {
1343 0 : ops->quick_push (l0node[0]);
1344 0 : ops->quick_push (res_ops[2]);
1345 0 : ops->quick_push (res_ops[3]);
1346 0 : ops->quick_push (res_ops[1]);
1347 : }
1348 : else
1349 : {
1350 0 : ops->quick_push (l0node[0]);
1351 0 : ops->quick_push (res_ops[3]);
1352 0 : ops->quick_push (res_ops[2]);
1353 0 : ops->quick_push (res_ops[0]);
1354 : }
1355 :
1356 : return ifn;
1357 32 : }
1358 :
1359 : /* Attempt to recognize a complex mul pattern. */
1360 :
1361 : vect_pattern*
1362 0 : complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1363 : slp_compat_nodes_map_t *compat_cache,
1364 : slp_tree *node)
1365 : {
1366 0 : auto_vec<slp_tree> ops;
1367 0 : complex_operation_t op
1368 0 : = vect_detect_pair_op (*node, true, &ops);
1369 0 : internal_fn ifn
1370 0 : = complex_fms_pattern::matches (op, perm_cache, compat_cache, node, &ops);
1371 0 : if (ifn == IFN_LAST)
1372 : return NULL;
1373 :
1374 0 : return new complex_fms_pattern (node, &ops, ifn);
1375 0 : }
1376 :
1377 : /* Perform a replacement of the detected complex mul pattern with the new
1378 : instruction sequences. */
1379 :
1380 : void
1381 0 : complex_fms_pattern::build (vec_info *vinfo)
1382 : {
1383 0 : slp_tree node;
1384 0 : unsigned i;
1385 0 : slp_tree newnode =
1386 0 : vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
1387 0 : SLP_TREE_REF_COUNT (this->m_ops[0])++;
1388 0 : SLP_TREE_REF_COUNT (this->m_ops[1])++;
1389 :
1390 0 : FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1391 0 : vect_free_slp_tree (node);
1392 :
1393 0 : SLP_TREE_CHILDREN (*this->m_node).release ();
1394 0 : SLP_TREE_CHILDREN (*this->m_node).create (3);
1395 :
1396 : /* First re-arrange the children. */
1397 0 : SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
1398 0 : SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
1399 0 : SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
1400 :
1401 : /* And then rewrite the node itself. */
1402 0 : complex_pattern::build (vinfo);
1403 0 : }
1404 :
1405 : /*******************************************************************************
1406 : * complex_operations_pattern class
1407 : ******************************************************************************/
1408 :
1409 : /* This function combines all the existing pattern matchers above into one class
1410 : that shares the functionality between them. The initial match is shared
1411 : between all complex operations. */
1412 :
1413 : class complex_operations_pattern : public complex_pattern
1414 : {
1415 : protected:
1416 : complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops,
1417 : internal_fn ifn)
1418 : : complex_pattern (node, m_ops, ifn)
1419 : {
1420 : this->m_num_args = 0;
1421 : }
1422 :
1423 : public:
1424 : void build (vec_info *) final override;
1425 : static internal_fn
1426 : matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
1427 : slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
1428 :
1429 : static vect_pattern*
1430 : recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1431 : slp_tree *);
1432 : };
1433 :
1434 : /* Dummy matches implementation for proxy object. */
1435 :
1436 : internal_fn
1437 0 : complex_operations_pattern::
1438 : matches (complex_operation_t /* op */,
1439 : slp_tree_to_load_perm_map_t * /* perm_cache */,
1440 : slp_compat_nodes_map_t * /* compat_cache */,
1441 : slp_tree * /* ref_node */, vec<slp_tree> * /* ops */)
1442 : {
1443 0 : return IFN_LAST;
1444 : }
1445 :
1446 : /* Attempt to recognize a complex mul pattern. */
1447 :
1448 : vect_pattern*
1449 4734626 : complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1450 : slp_compat_nodes_map_t *ccache,
1451 : slp_tree *node)
1452 : {
1453 4734626 : auto_vec<slp_tree> ops;
1454 4734626 : complex_operation_t op
1455 4734626 : = vect_detect_pair_op (*node, true, &ops);
1456 4734626 : internal_fn ifn = IFN_LAST;
1457 :
1458 4734626 : ifn = complex_fms_pattern::matches (op, perm_cache, ccache, node, &ops);
1459 4734626 : if (ifn != IFN_LAST)
1460 0 : return complex_fms_pattern::mkInstance (node, &ops, ifn);
1461 :
1462 4734626 : ifn = complex_mul_pattern::matches (op, perm_cache, ccache, node, &ops);
1463 4734626 : if (ifn != IFN_LAST)
1464 20 : return complex_mul_pattern::mkInstance (node, &ops, ifn);
1465 :
1466 4734606 : ifn = complex_add_pattern::matches (op, perm_cache, ccache, node, &ops);
1467 4734606 : if (ifn != IFN_LAST)
1468 0 : return complex_add_pattern::mkInstance (node, &ops, ifn);
1469 :
1470 : return NULL;
1471 4734626 : }
1472 :
1473 : /* Dummy implementation of build. */
1474 :
1475 : void
1476 0 : complex_operations_pattern::build (vec_info * /* vinfo */)
1477 : {
1478 0 : gcc_unreachable ();
1479 : }
1480 :
1481 :
1482 : /* The addsub_pattern. */
1483 :
1484 : class addsub_pattern : public vect_pattern
1485 : {
1486 : public:
1487 1061 : addsub_pattern (slp_tree *node, internal_fn ifn)
1488 1061 : : vect_pattern (node, NULL, ifn) {};
1489 :
1490 : void build (vec_info *) final override;
1491 :
1492 : static vect_pattern*
1493 : recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1494 : slp_tree *);
1495 : };
1496 :
1497 : vect_pattern *
1498 4734626 : addsub_pattern::recognize (slp_tree_to_load_perm_map_t *,
1499 : slp_compat_nodes_map_t *, slp_tree *node_)
1500 : {
1501 4734626 : slp_tree node = *node_;
1502 4734626 : if (!SLP_TREE_PERMUTE_P (node)
1503 20391 : || SLP_TREE_CHILDREN (node).length () != 2
1504 4752728 : || SLP_TREE_LANE_PERMUTATION (node).length () % 2)
1505 : return NULL;
1506 :
1507 : /* Match a blend of a plus and a minus op with the same number of plus and
1508 : minus lanes on the same operands. */
1509 13350 : unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1510 13350 : unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1511 13350 : if (l0 == l1)
1512 : return NULL;
1513 11250 : bool fma_p = false;
1514 11250 : bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0],
1515 11250 : PLUS_EXPR);
1516 11250 : if (!l0add_p
1517 11250 : && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR))
1518 : {
1519 4253 : l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], CFN_FMA);
1520 4253 : if (!l0add_p
1521 4253 : && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], CFN_FMS))
1522 4251 : return NULL;
1523 : fma_p = true;
1524 : }
1525 6999 : bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1],
1526 6999 : PLUS_EXPR);
1527 6999 : if (l1add_p && fma_p)
1528 : return NULL;
1529 6999 : if (!l1add_p
1530 6999 : && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR))
1531 : {
1532 718 : if (!fma_p)
1533 : return NULL;
1534 2 : l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], CFN_FMA);
1535 2 : if (!l1add_p
1536 2 : && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], CFN_FMS))
1537 0 : return NULL;
1538 : }
1539 6281 : else if (!l1add_p && fma_p)
1540 : return NULL;
1541 :
1542 6283 : slp_tree l0node = SLP_TREE_CHILDREN (node)[l0];
1543 6283 : slp_tree l1node = SLP_TREE_CHILDREN (node)[l1];
1544 6283 : if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0]
1545 5932 : && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1])
1546 365 : || (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1]
1547 0 : && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0])))
1548 : return NULL;
1549 :
1550 20910 : for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i)
1551 : {
1552 15138 : std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i];
1553 : /* It has to be alternating -, +, -,
1554 : While we could permute the .ADDSUB inputs and the .ADDSUB output
1555 : that's only profitable over the add + sub + blend if at least
1556 : one of the permute is optimized which we can't determine here. */
1557 22755 : if (perm.first != ((i & 1) ? l1 : l0)
1558 15038 : || perm.second != i)
1559 4733565 : return NULL;
1560 : }
1561 :
1562 : /* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... }
1563 : (l0add_p), see whether we have FMA variants. We can only form FMAs
1564 : if allowed via -ffp-contract=fast or if they were FMA before. */
1565 5772 : if (!fma_p
1566 5770 : && flag_fp_contract_mode != FP_CONTRACT_FAST
1567 5803 : && FLOAT_TYPE_P (SLP_TREE_VECTYPE (l0node)))
1568 : ;
1569 5741 : else if (!l0add_p
1570 5741 : && (fma_p
1571 2868 : || vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0],
1572 2868 : MULT_EXPR)))
1573 : {
1574 : /* (c * d) -+ a */
1575 777 : if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node))
1576 20 : return new addsub_pattern (node_, IFN_VEC_FMADDSUB);
1577 : }
1578 4964 : else if (l0add_p
1579 4964 : && (fma_p
1580 4962 : || vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0],
1581 2871 : MULT_EXPR)))
1582 : {
1583 : /* (c * d) +- a */
1584 538 : if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node))
1585 18 : return new addsub_pattern (node_, IFN_VEC_FMSUBADD);
1586 : }
1587 :
1588 5734 : if (!fma_p && !l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
1589 1023 : return new addsub_pattern (node_, IFN_VEC_ADDSUB);
1590 :
1591 : return NULL;
1592 : }
1593 :
1594 : void
1595 1061 : addsub_pattern::build (vec_info *vinfo)
1596 : {
1597 1061 : slp_tree node = *m_node;
1598 :
1599 1061 : unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1600 1061 : unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1601 :
1602 1061 : switch (m_ifn)
1603 : {
1604 1023 : case IFN_VEC_ADDSUB:
1605 1023 : {
1606 1023 : slp_tree sub = SLP_TREE_CHILDREN (node)[l0];
1607 1023 : slp_tree add = SLP_TREE_CHILDREN (node)[l1];
1608 :
1609 : /* Modify the blend node in-place. */
1610 1023 : SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
1611 1023 : SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
1612 1023 : SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1613 1023 : SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1614 :
1615 : /* Build IFN_VEC_ADDSUB from the sub representative operands. */
1616 1023 : stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
1617 1023 : gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
1618 : gimple_assign_rhs1 (rep->stmt),
1619 1023 : gimple_assign_rhs2 (rep->stmt));
1620 1023 : gimple_call_set_lhs (call, make_ssa_name
1621 1023 : (TREE_TYPE (gimple_assign_lhs (rep->stmt))));
1622 1023 : gimple_call_set_nothrow (call, true);
1623 1023 : gimple_set_bb (call, gimple_bb (rep->stmt));
1624 1023 : stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep);
1625 1023 : SLP_TREE_REPRESENTATIVE (node) = new_rep;
1626 1023 : STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1627 1023 : STMT_SLP_TYPE (new_rep) = pure_slp;
1628 1023 : STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1629 1023 : STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1630 1023 : STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep));
1631 1023 : SLP_TREE_CODE (node) = ERROR_MARK;
1632 1023 : SLP_TREE_LANE_PERMUTATION (node).release ();
1633 :
1634 1023 : vect_free_slp_tree (sub);
1635 1023 : vect_free_slp_tree (add);
1636 1023 : break;
1637 : }
1638 38 : case IFN_VEC_FMADDSUB:
1639 38 : case IFN_VEC_FMSUBADD:
1640 38 : {
1641 38 : slp_tree sub, add;
1642 38 : if (m_ifn == IFN_VEC_FMADDSUB)
1643 : {
1644 20 : sub = SLP_TREE_CHILDREN (node)[l0];
1645 20 : add = SLP_TREE_CHILDREN (node)[l1];
1646 : }
1647 : else /* m_ifn == IFN_VEC_FMSUBADD */
1648 : {
1649 18 : sub = SLP_TREE_CHILDREN (node)[l1];
1650 18 : add = SLP_TREE_CHILDREN (node)[l0];
1651 : }
1652 : /* Modify the blend node in-place. */
1653 38 : SLP_TREE_CHILDREN (node).safe_grow (3, true);
1654 38 : gcall *call;
1655 38 : stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub);
1656 38 : if (vect_match_expression_p (add, CFN_FMA))
1657 : {
1658 2 : SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (add)[0];
1659 2 : SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (add)[1];
1660 2 : SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (add)[2];
1661 : /* Build IFN_VEC_FMADDSUB from the fms representative
1662 : operands. */
1663 2 : call = gimple_build_call_internal (m_ifn, 3,
1664 : gimple_call_arg (srep->stmt, 0),
1665 : gimple_call_arg (srep->stmt, 1),
1666 2 : gimple_call_arg (srep->stmt, 2));
1667 : }
1668 : else
1669 : {
1670 36 : slp_tree mul = SLP_TREE_CHILDREN (sub)[0];
1671 36 : SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0];
1672 36 : SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1];
1673 36 : SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1];
1674 : /* Build IFN_VEC_FMADDSUB from the mul/sub representative
1675 : operands. */
1676 36 : stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul);
1677 36 : call = gimple_build_call_internal (m_ifn, 3,
1678 : gimple_assign_rhs1 (mrep->stmt),
1679 36 : gimple_assign_rhs2 (mrep->stmt),
1680 36 : gimple_assign_rhs2 (srep->stmt));
1681 : }
1682 38 : SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1683 38 : SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1684 38 : SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++;
1685 :
1686 38 : gimple_call_set_lhs (call, make_ssa_name
1687 38 : (TREE_TYPE (gimple_get_lhs (srep->stmt))));
1688 38 : gimple_call_set_nothrow (call, true);
1689 38 : gimple_set_bb (call, gimple_bb (srep->stmt));
1690 38 : stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep);
1691 38 : SLP_TREE_REPRESENTATIVE (node) = new_rep;
1692 38 : STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1693 38 : STMT_SLP_TYPE (new_rep) = pure_slp;
1694 38 : STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1695 38 : STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1696 38 : STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (srep));
1697 38 : SLP_TREE_CODE (node) = ERROR_MARK;
1698 38 : SLP_TREE_LANE_PERMUTATION (node).release ();
1699 :
1700 38 : vect_free_slp_tree (sub);
1701 38 : vect_free_slp_tree (add);
1702 38 : break;
1703 : }
1704 1061 : default:;
1705 : }
1706 1061 : }
1707 :
1708 : /*******************************************************************************
1709 : * Pattern matching definitions
1710 : ******************************************************************************/
1711 :
1712 : #define SLP_PATTERN(x) &x::recognize
1713 : vect_pattern_decl_t slp_patterns[]
1714 : {
1715 : /* For least amount of back-tracking and more efficient matching
1716 : order patterns from the largest to the smallest. Especially if they
1717 : overlap in what they can detect. */
1718 :
1719 : SLP_PATTERN (complex_operations_pattern),
1720 : SLP_PATTERN (addsub_pattern)
1721 : };
1722 : #undef SLP_PATTERN
1723 :
1724 : /* Set the number of SLP pattern matchers available. */
1725 : size_t num__slp_patterns = ARRAY_SIZE (slp_patterns);
|