LCOV - code coverage report
Current view: top level - gcc - tree-vect-slp-patterns.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 82.4 % 647 533
Test Date: 2026-04-20 14:57:17 Functions: 67.7 % 31 21
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* SLP - Pattern matcher on SLP trees
       2              :    Copyright (C) 2020-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify it under
       7              : the terms of the GNU General Public License as published by the Free
       8              : Software Foundation; either version 3, or (at your option) any later
       9              : version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #include "config.h"
      21              : #include "system.h"
      22              : #include "coretypes.h"
      23              : #include "backend.h"
      24              : #include "target.h"
      25              : #include "rtl.h"
      26              : #include "tree.h"
      27              : #include "gimple.h"
      28              : #include "tree-pass.h"
      29              : #include "ssa.h"
      30              : #include "optabs-tree.h"
      31              : #include "insn-config.h"
      32              : #include "recog.h"            /* FIXME: for insn_data */
      33              : #include "fold-const.h"
      34              : #include "stor-layout.h"
      35              : #include "gimple-iterator.h"
      36              : #include "cfgloop.h"
      37              : #include "tree-vectorizer.h"
      38              : #include "langhooks.h"
      39              : #include "gimple-walk.h"
      40              : #include "dbgcnt.h"
      41              : #include "tree-vector-builder.h"
      42              : #include "vec-perm-indices.h"
      43              : #include "gimple-fold.h"
      44              : #include "internal-fn.h"
      45              : 
      46              : /* SLP Pattern matching mechanism.
      47              : 
      48              :   This extension to the SLP vectorizer allows one to transform the generated SLP
      49              :   tree based on any pattern.  The difference between this and the normal vect
      50              :   pattern matcher is that unlike the former, this matcher allows you to match
      51              :   with instructions that do not belong to the same SSA dominator graph.
      52              : 
      53              :   The only requirement that this pattern matcher has is that you are only
      54              :   only allowed to either match an entire group or none.
      55              : 
      56              :   The pattern matcher currently only allows you to perform replacements to
      57              :   internal functions.
      58              : 
      59              :   Once the patterns are matched it is one way, these cannot be undone.  It is
      60              :   currently not supported to match patterns recursively.
      61              : 
      62              :   To add a new pattern, implement the vect_pattern class and add the type to
      63              :   slp_patterns.
      64              : 
      65              : */
      66              : 
      67              : /*******************************************************************************
      68              :  * vect_pattern class
      69              :  ******************************************************************************/
      70              : 
      71              : /* Default implementation of recognize that performs matching, validation and
      72              :    replacement of nodes but that can be overriden if required.  */
      73              : 
      74              : static bool
      75         4674 : vect_pattern_validate_optab (internal_fn ifn, slp_tree node)
      76              : {
      77         4674 :   tree vectype = SLP_TREE_VECTYPE (node);
      78         4674 :   if (ifn == IFN_LAST || !vectype)
      79              :     return false;
      80              : 
      81         4674 :   if (dump_enabled_p ())
      82          701 :     dump_printf_loc (MSG_NOTE, vect_location,
      83              :                      "Found %s pattern in SLP tree\n",
      84              :                      internal_fn_name (ifn));
      85              : 
      86         4674 :   if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
      87              :     {
      88         1096 :       if (dump_enabled_p ())
      89           17 :         dump_printf_loc (MSG_NOTE, vect_location,
      90              :                          "Target supports %s vectorization with mode %T\n",
      91              :                          internal_fn_name (ifn), vectype);
      92              :     }
      93              :   else
      94              :     {
      95         3578 :       if (dump_enabled_p ())
      96              :         {
      97          684 :           if (!vectype)
      98              :             dump_printf_loc (MSG_NOTE, vect_location,
      99              :                              "Target does not support vector type for %G\n",
     100              :                              STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node)));
     101              :           else
     102          684 :             dump_printf_loc (MSG_NOTE, vect_location,
     103              :                              "Target does not support %s for vector type "
     104              :                              "%T\n", internal_fn_name (ifn), vectype);
     105              :         }
     106         3578 :       return false;
     107              :     }
     108              :   return true;
     109              : }
     110              : 
     111              : /*******************************************************************************
     112              :  * General helper types
     113              :  ******************************************************************************/
     114              : 
     115              : /* The COMPLEX_OPERATION enum denotes the possible pair of operations that can
     116              :    be matched when looking for expressions that we are interested matching for
     117              :    complex numbers addition and mla.  */
     118              : 
     119              : typedef enum _complex_operation : unsigned {
     120              :   PLUS_PLUS,
     121              :   MINUS_PLUS,
     122              :   PLUS_MINUS,
     123              :   MULT_MULT,
     124              :   CMPLX_NONE
     125              : } complex_operation_t;
     126              : 
     127              : /*******************************************************************************
     128              :  * General helper functions
     129              :  ******************************************************************************/
     130              : 
     131              : /* Helper function of linear_loads_p that checks to see if the load permutation
     132              :    is sequential and in monotonically increasing order of loads with no gaps.
     133              : */
     134              : 
     135              : static inline complex_perm_kinds_t
     136         2036 : is_linear_load_p (load_permutation_t loads)
     137              : {
     138         2102 :   if (loads.length() == 0)
     139              :     return PERM_UNKNOWN;
     140              : 
     141         2036 :   unsigned load, i;
     142         2036 :   complex_perm_kinds_t candidates[4]
     143              :     = { PERM_ODDODD
     144              :       , PERM_EVENEVEN
     145              :       , PERM_EVENODD
     146              :       , PERM_ODDEVEN
     147              :       };
     148              : 
     149         2036 :   int valid_patterns = 4;
     150         7567 :   FOR_EACH_VEC_ELT (loads, i, load)
     151              :     {
     152         5597 :       unsigned adj_load = load % 2;
     153         5597 :       if (candidates[0] != PERM_UNKNOWN && adj_load != 1)
     154              :         {
     155         1750 :           candidates[0] = PERM_UNKNOWN;
     156         1750 :           valid_patterns--;
     157              :         }
     158         5597 :       if (candidates[1] != PERM_UNKNOWN && adj_load != 0)
     159              :         {
     160         1101 :           candidates[1] = PERM_UNKNOWN;
     161         1101 :           valid_patterns--;
     162              :         }
     163         5597 :       if (candidates[2] != PERM_UNKNOWN && load != i)
     164              :         {
     165         2001 :           candidates[2] = PERM_UNKNOWN;
     166         2001 :           valid_patterns--;
     167              :         }
     168         5597 :       if (candidates[3] != PERM_UNKNOWN
     169         4511 :           && load != (i % 2 == 0 ? i + 1 : i - 1))
     170              :         {
     171         1322 :           candidates[3] = PERM_UNKNOWN;
     172         1322 :           valid_patterns--;
     173              :         }
     174              : 
     175         5597 :       if (valid_patterns == 0)
     176              :         return PERM_UNKNOWN;
     177              :     }
     178              : 
     179         3147 :   for (i = 0; i < sizeof(candidates); i++)
     180         5117 :     if (candidates[i] != PERM_UNKNOWN)
     181              :       return candidates[i];
     182              : 
     183              :   return PERM_UNKNOWN;
     184              : }
     185              : 
     186              : /* Combine complex_perm_kinds A and B into a new permute kind that describes the
     187              :    resulting operation.  */
     188              : 
     189              : static inline complex_perm_kinds_t
     190        15722 : vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
     191              : {
     192        15722 :   if (a == b)
     193              :     return a;
     194              : 
     195        13326 :   if (a == PERM_TOP)
     196              :     return b;
     197              : 
     198         1809 :   if (b == PERM_TOP)
     199              :     return a;
     200              : 
     201              :   return PERM_UNKNOWN;
     202              : }
     203              : 
     204              : /* Check to see if all loads rooted in ROOT are linear.  Linearity is
     205              :    defined as having no gaps between values loaded.  */
     206              : 
     207              : static complex_perm_kinds_t
     208        26235 : linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
     209              : {
     210        26235 :   if (!root)
     211              :     return PERM_UNKNOWN;
     212              : 
     213        26230 :   unsigned i;
     214        26230 :   complex_perm_kinds_t *tmp;
     215              : 
     216        26230 :   if ((tmp = perm_cache->get (root)) != NULL)
     217         6838 :     return *tmp;
     218              : 
     219        19392 :   complex_perm_kinds_t retval = PERM_UNKNOWN;
     220        19392 :   perm_cache->put (root, retval);
     221              : 
     222              :   /* If it's a load node, then just read the load permute.  */
     223        19392 :   if (SLP_TREE_DEF_TYPE (root) == vect_internal_def
     224        17074 :       && !SLP_TREE_PERMUTE_P (root)
     225        14977 :       && STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root))
     226         3231 :       && DR_IS_READ (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root))))
     227              :     {
     228         3231 :       if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
     229         2036 :         retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
     230              :       else
     231         1195 :         retval = PERM_EVENODD;
     232         3231 :       perm_cache->put (root, retval);
     233         3231 :       return retval;
     234              :     }
     235        16161 :   else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
     236              :     {
     237         2318 :       retval = PERM_TOP;
     238         2318 :       perm_cache->put (root, retval);
     239         2318 :       return retval;
     240              :     }
     241              : 
     242              :   complex_perm_kinds_t kind = PERM_TOP;
     243              : 
     244              :   slp_tree child;
     245        16205 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
     246              :     {
     247        15722 :       complex_perm_kinds_t res = linear_loads_p (perm_cache, child);
     248        15722 :       kind = vect_merge_perms (kind, res);
     249              :       /* Unknown and Top are not valid on blends as they produce no permute.  */
     250        15722 :       retval = kind;
     251        15722 :       if (kind == PERM_UNKNOWN || kind == PERM_TOP)
     252              :         return retval;
     253              :     }
     254              : 
     255          483 :   retval = kind;
     256              : 
     257          483 :   perm_cache->put (root, retval);
     258          483 :   return retval;
     259              : }
     260              : 
     261              : 
     262              : /* This function attempts to make a node rooted in NODE is linear.  If the node
     263              :    if already linear than the node itself is returned in RESULT.
     264              : 
     265              :    If the node is not linear then a new VEC_PERM_EXPR node is created with a
     266              :    lane permute that when applied will make the node linear.   If such a
     267              :    permute cannot be created then FALSE is returned from the function.
     268              : 
     269              :    Here linearity is defined as having a sequential, monotically increasing
     270              :    load position inside the load permute generated by the loads reachable from
     271              :    NODE.  */
     272              : 
     273              : static slp_tree
     274            0 : vect_build_swap_evenodd_node (slp_tree node)
     275              : {
     276              :   /* Attempt to linearise the permute.  */
     277            0 :   vec<std::pair<unsigned, unsigned> > zipped;
     278            0 :   zipped.create (SLP_TREE_LANES (node));
     279              : 
     280            0 :   for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2)
     281              :     {
     282            0 :       zipped.quick_push (std::make_pair (0, x+1));
     283            0 :       zipped.quick_push (std::make_pair (0, x));
     284              :     }
     285              : 
     286              :   /* Create the new permute node and store it instead.  */
     287            0 :   slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR);
     288            0 :   SLP_TREE_LANE_PERMUTATION (vnode) = zipped;
     289            0 :   SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node);
     290            0 :   SLP_TREE_CHILDREN (vnode).quick_push (node);
     291            0 :   SLP_TREE_REF_COUNT (vnode) = 1;
     292            0 :   SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node);
     293            0 :   SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node);
     294            0 :   SLP_TREE_REF_COUNT (node)++;
     295            0 :   return vnode;
     296              : }
     297              : 
     298              : /* Checks to see of the expression represented by NODE is a gimple assign with
     299              :    code CODE.  */
     300              : 
     301              : static inline bool
     302     10535691 : vect_match_expression_p (slp_tree node, code_helper code)
     303              : {
     304     10535691 :   if (!node
     305      9764446 :       || SLP_TREE_PERMUTE_P (node)
     306      9715370 :       || !SLP_TREE_REPRESENTATIVE (node))
     307              :     return false;
     308              : 
     309      7388462 :   gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
     310      7388462 :   if (is_gimple_assign (expr)
     311      6325466 :       && code.is_tree_code ()
     312     13707917 :       && gimple_assign_rhs_code (expr) == (tree_code) code)
     313              :     return true;
     314      6935521 :   if (is_a <gcall *> (expr)
     315        62451 :       && !code.is_tree_code ()
     316      6935569 :       && gimple_call_combined_fn (expr) == (combined_fn) code)
     317              :     return true;
     318              : 
     319              :   return false;
     320              : }
     321              : 
     322              : /* Check if the given lane permute in PERMUTES matches an alternating sequence
     323              :    of {even odd even odd ...}.  This to account for unrolled loops.  Further
     324              :    mode there resulting permute must be linear.   */
     325              : 
     326              : static inline bool
     327         6338 : vect_check_evenodd_blend (lane_permutation_t &permutes,
     328              :                          unsigned even, unsigned odd)
     329              : {
     330         6659 :   if (permutes.length () == 0
     331         6149 :       || permutes.length () % 2 != 0)
     332              :     return false;
     333              : 
     334         6127 :   unsigned val[2] = {even, odd};
     335         6127 :   unsigned seed = 0;
     336        21412 :   for (unsigned i = 0; i < permutes.length (); i++)
     337        15584 :     if (permutes[i].first != val[i % 2]
     338        15584 :         || permutes[i].second != seed++)
     339              :       return false;
     340              : 
     341              :   return true;
     342              : }
     343              : 
     344              : /* This function will match the two gimple expressions representing NODE1 and
     345              :    NODE2 in parallel and returns the pair operation that represents the two
     346              :    expressions in the two statements.
     347              : 
     348              :    If match is successful then the corresponding complex_operation is
     349              :    returned and the arguments to the two matched operations are returned in OPS.
     350              : 
     351              :    If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select
     352              :    from the two nodes alternatingly.
     353              : 
     354              :    If unsuccessful then CMPLX_NONE is returned and OPS is untouched.
     355              : 
     356              :    e.g. the following gimple statements
     357              : 
     358              :    stmt 0 _39 = _37 + _12;
     359              :    stmt 1 _6 = _38 - _36;
     360              : 
     361              :    will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}.
     362              : */
     363              : 
     364              : static complex_operation_t
     365      1343055 : vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
     366              :                      bool two_operands = true, vec<slp_tree> *ops = NULL)
     367              : {
     368      1343055 :   complex_operation_t result = CMPLX_NONE;
     369              : 
     370      1343055 :   if (vect_match_expression_p (node1, MINUS_EXPR)
     371        42306 :       && vect_match_expression_p (node2, PLUS_EXPR)
     372      1346274 :       && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
     373              :     result = MINUS_PLUS;
     374      1340118 :   else if (vect_match_expression_p (node1, PLUS_EXPR)
     375       135975 :            && vect_match_expression_p (node2, MINUS_EXPR)
     376      1343237 :            && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
     377              :     result = PLUS_MINUS;
     378      1337227 :   else if (vect_match_expression_p (node1, PLUS_EXPR)
     379      1337227 :            && vect_match_expression_p (node2, PLUS_EXPR))
     380              :     result = PLUS_PLUS;
     381      1333789 :   else if (vect_match_expression_p (node1, MULT_EXPR)
     382      1333789 :            && vect_match_expression_p (node2, MULT_EXPR))
     383         3658 :     result = MULT_MULT;
     384              : 
     385      1343055 :   if (result != CMPLX_NONE && ops != NULL)
     386              :     {
     387        12895 :       if (two_operands)
     388              :         {
     389        12895 :           auto l0node = SLP_TREE_CHILDREN (node1);
     390        12895 :           auto l1node = SLP_TREE_CHILDREN (node2);
     391              : 
     392              :           /* Check if the tree is connected as we expect it.  */
     393        19042 :           if (!((l0node[0] == l1node[0] && l0node[1] == l1node[1])
     394         6850 :               || (l0node[0] == l1node[1] && l0node[1] == l1node[0])))
     395      1343055 :             return CMPLX_NONE;
     396              :         }
     397         6069 :       ops->safe_push (node1);
     398         6069 :       ops->safe_push (node2);
     399              :     }
     400              :   return result;
     401              : }
     402              : 
     403              : /* Overload of vect_detect_pair_op that matches against the representative
     404              :    statements in the children of NODE.  It is expected that NODE has exactly
     405              :    two children and when TWO_OPERANDS then NODE must be a VEC_PERM.  */
     406              : 
     407              : static complex_operation_t
     408      4733471 : vect_detect_pair_op (slp_tree node, bool two_operands = true,
     409              :                      vec<slp_tree> *ops = NULL)
     410              : {
     411      4733471 :   if (!two_operands && SLP_TREE_PERMUTE_P (node))
     412              :     return CMPLX_NONE;
     413              : 
     414      4733471 :   if (SLP_TREE_CHILDREN (node).length () != 2)
     415              :     return CMPLX_NONE;
     416              : 
     417      1343055 :   vec<slp_tree> children = SLP_TREE_CHILDREN (node);
     418      1343055 :   lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node);
     419              : 
     420      1343055 :   return vect_detect_pair_op (children[0], children[1], lanes, two_operands,
     421      1343055 :                               ops);
     422              : }
     423              : 
     424              : /*******************************************************************************
     425              :  * complex_pattern class
     426              :  ******************************************************************************/
     427              : 
     428              : /* SLP Complex Numbers pattern matching.
     429              : 
     430              :   As an example, the following simple loop:
     431              : 
     432              :     double a[restrict N]; double b[restrict N]; double c[restrict N];
     433              : 
     434              :     for (int i=0; i < N; i+=2)
     435              :     {
     436              :       c[i] = a[i] - b[i+1];
     437              :       c[i+1] = a[i+1] + b[i];
     438              :     }
     439              : 
     440              :   which represents a complex addition on with a rotation of 90* around the
     441              :   argand plane. i.e. if `a` and `b` were complex numbers then this would be the
     442              :   same as `a + (b * I)`.
     443              : 
     444              :   Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
     445              :   both recognized in order for the pattern to work.  As an SLP tree this is
     446              :   represented as
     447              : 
     448              :                 +--------------------------------+
     449              :                 |       stmt 0 *_9 = _10;        |
     450              :                 |       stmt 1 *_15 = _16;       |
     451              :                 +--------------------------------+
     452              :                                 |
     453              :                                 |
     454              :                                 v
     455              :                 +--------------------------------+
     456              :                 |     stmt 0 _10 = _4 - _8;      |
     457              :                 |    stmt 1 _16 = _12 + _14;     |
     458              :                 | lane permutation { 0[0] 1[1] } |
     459              :                 +--------------------------------+
     460              :                             |        |
     461              :                             |        |
     462              :                             |        |
     463              :                +-----+      |        |      +-----+
     464              :                |     |      |        |      |     |
     465              :          +-----| { } |<-----+        +----->| { } --------+
     466              :          |     |     |   +------------------|     |       |
     467              :          |     +-----+   |                  +-----+       |
     468              :          |        |      |                                |
     469              :          |        |      |                                |
     470              :          |        +------|------------------+             |
     471              :          |               |                  |             |
     472              :          v               v                  v             v
     473              :      +--------------------------+     +--------------------------------+
     474              :      |     stmt 0 _8 = *_7;     |     |        stmt 0 _4 = *_3;        |
     475              :      |    stmt 1 _14 = *_13;    |     |       stmt 1 _12 = *_11;       |
     476              :      | load permutation { 1 0 } |     |    load permutation { 0 1 }    |
     477              :      +--------------------------+     +--------------------------------+
     478              : 
     479              :   The pattern matcher allows you to replace both statements 0 and 1 or none at
     480              :   all.  Because this operation is a two operands operation the actual nodes
     481              :   being replaced are those in the { } nodes.  The actual scalar statements
     482              :   themselves are not replaced or used during the matching but instead the
     483              :   SLP_TREE_REPRESENTATIVE statements are inspected.  You are also allowed to
     484              :   replace and match on any number of nodes.
     485              : 
     486              :   Because the pattern matcher matches on the representative statement for the
     487              :   SLP node the case of two_operators it allows you to match the children of the
     488              :   node.  This is done using the method `recognize ()`.
     489              : 
     490              : */
     491              : 
     492              : /* The complex_pattern class contains common code for pattern matchers that work
     493              :    on complex numbers.  These provide functionality to allow de-construction and
     494              :    validation of sequences depicting/transforming REAL and IMAG pairs.  */
     495              : 
     496              : class complex_pattern : public vect_pattern
     497              : {
     498              :   protected:
     499              :     auto_vec<slp_tree> m_workset;
     500           20 :     complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
     501           40 :       : vect_pattern (node, m_ops, ifn)
     502              :     {
     503           20 :       this->m_workset.safe_push (*node);
     504           20 :     }
     505              : 
     506              :   public:
     507              :     void build (vec_info *) override;
     508              : 
     509              :     static internal_fn
     510              :     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
     511              :              vec<slp_tree> *);
     512              : };
     513              : 
     514              : /* Create a replacement pattern statement for each node in m_node and inserts
     515              :    the new statement into m_node as the new representative statement.  The old
     516              :    statement is marked as being in a pattern defined by the new statement.  The
     517              :    statement is created as call to internal function IFN with m_num_args
     518              :    arguments.
     519              : 
     520              :    Futhermore the new pattern is also added to the vectorization information
     521              :    structure VINFO and the old statement STMT_INFO is marked as unused while
     522              :    the new statement is marked as used and the number of SLP uses of the new
     523              :    statement is incremented.
     524              : 
     525              :    The newly created SLP nodes are marked as SLP only and will be dissolved
     526              :    if SLP is aborted.
     527              : 
     528              :    The newly created gimple call is returned and the BB remains unchanged.
     529              : 
     530              :    This default method is designed to only match against simple operands where
     531              :    all the input and output types are the same.
     532              : */
     533              : 
     534              : void
     535           20 : complex_pattern::build (vec_info *vinfo)
     536              : {
     537           20 :   stmt_vec_info stmt_info;
     538              : 
     539           20 :   auto_vec<tree> args;
     540           20 :   args.create (this->m_num_args);
     541           20 :   args.quick_grow_cleared (this->m_num_args);
     542           20 :   slp_tree node;
     543           20 :   unsigned ix;
     544           20 :   stmt_vec_info call_stmt_info;
     545           20 :   gcall *call_stmt = NULL;
     546              : 
     547              :   /* Now modify the nodes themselves.  */
     548           60 :   FOR_EACH_VEC_ELT (this->m_workset, ix, node)
     549              :     {
     550              :       /* Calculate the location of the statement in NODE to replace.  */
     551           20 :       stmt_info = SLP_TREE_REPRESENTATIVE (node);
     552           20 :       stmt_vec_info reduc_def
     553           20 :         = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
     554           20 :       gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
     555           20 :       tree lhs_old_stmt = gimple_get_lhs (old_stmt);
     556           20 :       tree type = TREE_TYPE (lhs_old_stmt);
     557              : 
     558              :       /* Create the argument set for use by gimple_build_call_internal_vec.  */
     559           70 :       for (unsigned i = 0; i < this->m_num_args; i++)
     560           50 :         args[i] = lhs_old_stmt;
     561              : 
     562              :       /* Create the new pattern statements.  */
     563           20 :       call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
     564           20 :       tree var = make_temp_ssa_name (type, call_stmt, "slp_patt");
     565           20 :       gimple_call_set_lhs (call_stmt, var);
     566           20 :       gimple_set_location (call_stmt, gimple_location (old_stmt));
     567           20 :       gimple_call_set_nothrow (call_stmt, true);
     568              : 
     569              :       /* Adjust the book-keeping for the new and old statements for use during
     570              :          SLP.  This is required to get the right VF and statement during SLP
     571              :          analysis.  These changes are created after relevancy has been set for
     572              :          the nodes as such we need to manually update them.  Any changes will be
     573              :          undone if SLP is cancelled.  */
     574           20 :       call_stmt_info
     575           20 :         = vinfo->add_pattern_stmt (call_stmt, stmt_info);
     576              : 
     577              :       /* Make sure to mark the representative statement pure_slp and
     578              :          relevant and transfer reduction info. */
     579           20 :       STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope;
     580           20 :       STMT_SLP_TYPE (call_stmt_info) = pure_slp;
     581           20 :       STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def;
     582              : 
     583           20 :       gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt));
     584           20 :       STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node);
     585           20 :       STMT_VINFO_SLP_VECT_ONLY_PATTERN (call_stmt_info) = true;
     586              : 
     587              :       /* Since we are replacing all the statements in the group with the same
     588              :          thing it doesn't really matter.  So just set it every time a new stmt
     589              :          is created.  */
     590           20 :       SLP_TREE_REPRESENTATIVE (node) = call_stmt_info;
     591           20 :       SLP_TREE_LANE_PERMUTATION (node).release ();
     592           20 :       SLP_TREE_CODE (node) = CALL_EXPR;
     593              :     }
     594           20 : }
     595              : 
     596              : /*******************************************************************************
     597              :  * complex_add_pattern class
     598              :  ******************************************************************************/
     599              : 
     600              : class complex_add_pattern : public complex_pattern
     601              : {
     602              :   protected:
     603            0 :     complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
     604            0 :       : complex_pattern (node, m_ops, ifn)
     605              :     {
     606            0 :       this->m_num_args = 2;
     607              :     }
     608              : 
     609              :   public:
     610              :     void build (vec_info *) final override;
     611              :     static internal_fn
     612              :     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
     613              :              slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
     614              : 
     615              :     static vect_pattern*
     616              :     recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
     617              :                slp_tree *);
     618              : 
     619              :     static vect_pattern*
     620            0 :     mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
     621              :     {
     622            0 :       return new complex_add_pattern (node, m_ops, ifn);
     623              :     }
     624              : };
     625              : 
     626              : /* Perform a replacement of the detected complex add pattern with the new
     627              :    instruction sequences.  */
     628              : 
     629              : void
     630            0 : complex_add_pattern::build (vec_info *vinfo)
     631              : {
     632            0 :   SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
     633              : 
     634            0 :   slp_tree node = this->m_ops[0];
     635            0 :   vec<slp_tree> children = SLP_TREE_CHILDREN (node);
     636              : 
     637              :   /* First re-arrange the children.  */
     638            0 :   SLP_TREE_CHILDREN (*this->m_node)[0] = children[0];
     639            0 :   SLP_TREE_CHILDREN (*this->m_node)[1] =
     640            0 :     vect_build_swap_evenodd_node (children[1]);
     641              : 
     642            0 :   SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++;
     643            0 :   SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++;
     644            0 :   vect_free_slp_tree (this->m_ops[0]);
     645            0 :   vect_free_slp_tree (this->m_ops[1]);
     646              : 
     647            0 :   complex_pattern::build (vinfo);
     648            0 : }
     649              : 
     650              : /* Pattern matcher for trying to match complex addition pattern in SLP tree.
     651              : 
     652              :    If no match is found then IFN is set to IFN_LAST.
     653              :    This function matches the patterns shaped as:
     654              : 
     655              :    c[i] = a[i] - b[i+1];
     656              :    c[i+1] = a[i+1] + b[i];
     657              : 
     658              :    If a match occurred then TRUE is returned, else FALSE.  The initial match is
     659              :    expected to be in OP1 and the initial match operands in args0.  */
     660              : 
     661              : internal_fn
     662      4722760 : complex_add_pattern::matches (complex_operation_t op,
     663              :                               slp_tree_to_load_perm_map_t *perm_cache,
     664              :                               slp_compat_nodes_map_t * /* compat_cache */,
     665              :                               slp_tree *node, vec<slp_tree> *ops)
     666              : {
     667      4722760 :   internal_fn ifn = IFN_LAST;
     668              : 
     669              :   /* Find the two components.  Rotation in the complex plane will modify
     670              :      the operations:
     671              : 
     672              :       * Rotation  0: + +
     673              :       * Rotation 90: - +
     674              :       * Rotation 180: - -
     675              :       * Rotation 270: + -
     676              : 
     677              :       Rotation 0 and 180 can be handled by normal SIMD code, so we don't need
     678              :       to care about them here.  */
     679      4722760 :   if (op == MINUS_PLUS)
     680              :     ifn = IFN_COMPLEX_ADD_ROT90;
     681      4719845 :   else if (op == PLUS_MINUS)
     682              :     ifn = IFN_COMPLEX_ADD_ROT270;
     683              :   else
     684              :     return ifn;
     685              : 
     686              :   /* verify that there is a permute, otherwise this isn't a pattern we
     687              :      we support.  */
     688         5787 :   gcc_assert (ops->length () == 2);
     689              : 
     690         5787 :   vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
     691              : 
     692              :   /* First node must be unpermuted.  */
     693         5787 :   if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD)
     694              :     return IFN_LAST;
     695              : 
     696              :   /* Second node must be permuted.  */
     697          492 :   if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN)
     698              :     return IFN_LAST;
     699              : 
     700          338 :   if (!vect_pattern_validate_optab (ifn, *node))
     701              :     return IFN_LAST;
     702              : 
     703              :   return ifn;
     704              : }
     705              : 
     706              : /* Attempt to recognize a complex add pattern.  */
     707              : 
     708              : vect_pattern*
     709            0 : complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
     710              :                                 slp_compat_nodes_map_t *compat_cache,
     711              :                                 slp_tree *node)
     712              : {
     713            0 :   auto_vec<slp_tree> ops;
     714            0 :   complex_operation_t op
     715            0 :     = vect_detect_pair_op (*node, true, &ops);
     716            0 :   internal_fn ifn
     717            0 :     = complex_add_pattern::matches (op, perm_cache, compat_cache, node, &ops);
     718            0 :   if (ifn == IFN_LAST)
     719              :     return NULL;
     720              : 
     721            0 :   return new complex_add_pattern (node, &ops, ifn);
     722            0 : }
     723              : 
     724              : /*******************************************************************************
     725              :  * complex_mul_pattern
     726              :  ******************************************************************************/
     727              : 
     728              : /* Helper function to check if PERM is KIND or PERM_TOP.  */
     729              : 
     730              : static inline bool
     731          533 : is_eq_or_top (slp_tree_to_load_perm_map_t *perm_cache,
     732              :               slp_tree op1, complex_perm_kinds_t kind1,
     733              :               slp_tree op2, complex_perm_kinds_t kind2)
     734              : {
     735          533 :   complex_perm_kinds_t perm1 = linear_loads_p (perm_cache, op1);
     736          533 :   if (perm1 != kind1 && perm1 != PERM_TOP)
     737              :     return false;
     738              : 
     739          173 :   complex_perm_kinds_t perm2 = linear_loads_p (perm_cache, op2);
     740          173 :   if (perm2 != kind2 && perm2 != PERM_TOP)
     741              :     return false;
     742              : 
     743              :   return true;
     744              : }
     745              : 
     746              : enum _conj_status { CONJ_NONE, CONJ_FST, CONJ_SND };
     747              : 
     748              : static inline bool
     749          369 : compatible_complex_nodes_p (slp_compat_nodes_map_t *compat_cache,
     750              :                             slp_tree a, int *pa, slp_tree b, int *pb)
     751              : {
     752          369 :   bool *tmp;
     753          369 :   std::pair<slp_tree, slp_tree> key = std::make_pair(a, b);
     754          369 :   if ((tmp = compat_cache->get (key)) != NULL)
     755           27 :     return *tmp;
     756              : 
     757          342 :    compat_cache->put (key, false);
     758              : 
     759          406 :   if (SLP_TREE_CHILDREN (a).length () != SLP_TREE_CHILDREN (b).length ())
     760              :     return false;
     761              : 
     762          340 :   if (SLP_TREE_DEF_TYPE (a) != SLP_TREE_DEF_TYPE (b))
     763              :     return false;
     764              : 
     765              :   /* Only internal nodes can be loads, as such we can't check further if they
     766              :      are externals.  */
     767          340 :   if (SLP_TREE_DEF_TYPE (a) != vect_internal_def)
     768              :     {
     769          188 :       for (unsigned i = 0; i < SLP_TREE_SCALAR_OPS (a).length (); i++)
     770              :         {
     771          130 :           tree op1 = SLP_TREE_SCALAR_OPS (a)[pa[i % 2]];
     772          130 :           tree op2 = SLP_TREE_SCALAR_OPS (b)[pb[i % 2]];
     773          130 :           if (!operand_equal_p (op1, op2, 0))
     774              :             return false;
     775              :         }
     776              : 
     777           58 :       compat_cache->put (key, true);
     778           58 :       return true;
     779              :     }
     780              : 
     781          280 :   auto a_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (a));
     782          280 :   auto b_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (b));
     783              : 
     784          280 :   if (gimple_code (a_stmt) != gimple_code (b_stmt))
     785              :     return false;
     786              : 
     787              :   /* code, children, type, externals, loads, constants  */
     788          280 :   if (gimple_num_args (a_stmt) != gimple_num_args (b_stmt))
     789              :     return false;
     790              : 
     791              :   /* At this point, a and b are known to be the same gimple operations.  */
     792          280 :   if (is_gimple_call (a_stmt))
     793              :     {
     794            0 :         if (!compatible_calls_p (dyn_cast <gcall *> (a_stmt),
     795              :                                  dyn_cast <gcall *> (b_stmt), false))
     796              :           return false;
     797              :     }
     798          280 :   else if (!is_gimple_assign (a_stmt))
     799              :     return false;
     800              :   else
     801              :     {
     802          280 :       tree_code acode = gimple_assign_rhs_code (a_stmt);
     803          280 :       tree_code bcode = gimple_assign_rhs_code (b_stmt);
     804          280 :       if ((acode == REALPART_EXPR || acode == IMAGPART_EXPR)
     805          171 :           && (bcode == REALPART_EXPR || bcode == IMAGPART_EXPR))
     806              :         return true;
     807              : 
     808          109 :       if (acode != bcode)
     809              :         return false;
     810              :     }
     811              : 
     812          109 :   if (!STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a))
     813           78 :       || !STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b)))
     814              :     {
     815           92 :       for (unsigned i = 0; i < gimple_num_args (a_stmt); i++)
     816              :         {
     817           61 :           tree t1 = gimple_arg (a_stmt, i);
     818           61 :           tree t2 = gimple_arg (b_stmt, i);
     819           61 :           if (TREE_CODE (t1) != TREE_CODE (t2))
     820              :             return false;
     821              : 
     822              :           /* If SSA name then we will need to inspect the children
     823              :              so we can punt here.  */
     824           61 :           if (TREE_CODE (t1) == SSA_NAME)
     825           43 :             continue;
     826              : 
     827           18 :           if (!operand_equal_p (t1, t2, 0))
     828              :             return false;
     829              :         }
     830              :     }
     831              :   else
     832              :     {
     833           78 :       auto dr1 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a));
     834           78 :       auto dr2 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b));
     835              :       /* Don't check the last dimension as that's checked by the lineary
     836              :          checks.  This check is also much stricter than what we need
     837              :          because it doesn't consider loading from adjacent elements
     838              :          in the same struct as loading from the same base object.
     839              :          But for now, I'll play it safe.  */
     840           78 :       if (!same_data_refs (dr1, dr2, 1))
     841              :         return false;
     842              :     }
     843              : 
     844          148 :   for (unsigned i = 0; i < SLP_TREE_CHILDREN (a).length (); i++)
     845              :     {
     846           61 :       if (!compatible_complex_nodes_p (compat_cache,
     847           61 :                                        SLP_TREE_CHILDREN (a)[i], pa,
     848           61 :                                        SLP_TREE_CHILDREN (b)[i], pb))
     849              :         return false;
     850              :     }
     851              : 
     852           87 :   compat_cache->put (key, true);
     853           87 :   return true;
     854              : }
     855              : 
     856              : 
     857              : /* Check to see if the oprands to two multiplies, 2 each in LEFT_OP and
     858              :    RIGHT_OP match a complex multiplication  or complex multiply-and-accumulate
     859              :    or complex multiply-and-subtract pattern.  Do this using the permute cache
     860              :    PERM_CACHE and the combination compatibility list COMPAT_CACHE.  If
     861              :    the operation is successful the macthing operands are returned in OPS and
     862              :    _STATUS indicates if the operation matched includes a conjugate of one of the
     863              :    operands.  If the operation succeeds True is returned, otherwise False and
     864              :    the values in ops are meaningless.  */
     865              : static inline bool
     866         1506 : vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
     867              :                               slp_compat_nodes_map_t *compat_cache,
     868              :                               const vec<slp_tree> &left_op,
     869              :                               const vec<slp_tree> &right_op,
     870              :                               bool subtract, vec<slp_tree> &ops,
     871              :                               enum _conj_status *_status)
     872              : {
     873         1506 :   enum _conj_status stats = CONJ_NONE;
     874              : 
     875              :   /* The complex operations can occur in two layouts and two permute sequences
     876              :      so declare them and re-use them.  */
     877         1506 :   int styles[][4] = { { 0, 2, 1, 3} /* {L1, R1} + {L2, R2}.  */
     878              :                     , { 0, 3, 1, 2} /* {L1, R2} + {L2, R1}.  */
     879              :                     };
     880              : 
     881              :   /* Now for the corresponding permutes that go with these values.  */
     882         1506 :   complex_perm_kinds_t perms[][4]
     883              :     = { { PERM_EVENEVEN, PERM_ODDODD, PERM_EVENODD, PERM_ODDEVEN }
     884              :       , { PERM_EVENODD, PERM_ODDEVEN, PERM_EVENEVEN, PERM_ODDODD }
     885              :       };
     886              : 
     887              :   /* These permutes are used during comparisons of externals on which
     888              :      we require strict equality.  */
     889         1506 :   int cq[][4][2]
     890              :     = { { { 0, 0 }, { 1, 1 }, { 0, 1 }, { 1, 0 } }
     891              :       , { { 0, 1 }, { 1, 0 }, { 0, 0 }, { 1, 1 } }
     892              :       };
     893              : 
     894              :   /* Default to style and perm 0, most operations use this one.  */
     895         1506 :   int style = 0;
     896         1506 :   int perm = subtract ? 1 : 0;
     897              : 
     898              :   /* Check if we have a negate operation, if so absorb the node and continue
     899              :      looking.  */
     900         1506 :   bool neg0 = vect_match_expression_p (right_op[0], NEGATE_EXPR);
     901         1506 :   bool neg1 = vect_match_expression_p (right_op[1], NEGATE_EXPR);
     902              : 
     903              :   /* Create the combined inputs after remapping and flattening.  */
     904         1506 :   ops.create (4);
     905         1506 :   ops.safe_splice (left_op);
     906         1506 :   ops.safe_splice (right_op);
     907              : 
     908              :   /* Determine which style we're looking at.  We only have different ones
     909              :      whenever a conjugate is involved.  */
     910         1506 :   if (neg0 && neg1)
     911              :     ;
     912         1506 :   else if (neg0)
     913              :     {
     914            0 :       ops[2] = SLP_TREE_CHILDREN (right_op[0])[0];
     915            0 :       stats = CONJ_FST;
     916            0 :       if (subtract)
     917            0 :         perm = 0;
     918              :     }
     919         1506 :   else if (neg1)
     920              :     {
     921           10 :       ops[3] = SLP_TREE_CHILDREN (right_op[1])[0];
     922           10 :       stats = CONJ_SND;
     923           10 :       perm = 1;
     924              :     }
     925              : 
     926         1506 :   *_status = stats;
     927              : 
     928              :   /* Extract out the elements to check.  */
     929         1506 :   slp_tree op0 = ops[styles[style][0]];
     930         1506 :   slp_tree op1 = ops[styles[style][1]];
     931         1506 :   slp_tree op2 = ops[styles[style][2]];
     932         1506 :   slp_tree op3 = ops[styles[style][3]];
     933              : 
     934              :   /* Do cheapest test first.  If failed no need to analyze further.  */
     935         1506 :   if (linear_loads_p (perm_cache, op0) != perms[perm][0]
     936          589 :       || linear_loads_p (perm_cache, op1) != perms[perm][1]
     937         2039 :       || !is_eq_or_top (perm_cache, op2, perms[perm][2], op3, perms[perm][3]))
     938         1333 :     return false;
     939              : 
     940          173 :   return compatible_complex_nodes_p (compat_cache, op0, cq[perm][0], op1,
     941          173 :                                      cq[perm][1])
     942          308 :          && compatible_complex_nodes_p (compat_cache, op2, cq[perm][2], op3,
     943          135 :                                         cq[perm][3]);
     944              : }
     945              : 
     946              : /* This function combines two nodes containing only even and only odd lanes
     947              :    together into a single node which contains the nodes in even/odd order
     948              :    by using a lane permute.
     949              : 
     950              :    The lanes in EVEN and ODD are duplicated 2 times inside the vectors.
     951              :    So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}.
     952              : 
     953              :    The tree REPRESENTATION is taken from the supplied REP along with the
     954              :    vectype which must be the same between all three nodes.
     955              : */
     956              : 
     957              : static slp_tree
     958           20 : vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep)
     959              : {
     960           20 :   vec<std::pair<unsigned, unsigned> > perm;
     961           20 :   perm.create (SLP_TREE_LANES (rep));
     962              : 
     963           40 :   for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2)
     964              :     {
     965           20 :       perm.quick_push (std::make_pair (0, x));
     966           20 :       perm.quick_push (std::make_pair (1, x+1));
     967              :     }
     968              : 
     969           20 :   slp_tree vnode = vect_create_new_slp_node (2, SLP_TREE_CODE (even));
     970           20 :   SLP_TREE_CODE (vnode) = VEC_PERM_EXPR;
     971           20 :   SLP_TREE_LANE_PERMUTATION (vnode) = perm;
     972              : 
     973           20 :   SLP_TREE_CHILDREN (vnode).create (2);
     974           20 :   SLP_TREE_CHILDREN (vnode).quick_push (even);
     975           20 :   SLP_TREE_CHILDREN (vnode).quick_push (odd);
     976           20 :   SLP_TREE_REF_COUNT (even)++;
     977           20 :   SLP_TREE_REF_COUNT (odd)++;
     978           20 :   SLP_TREE_REF_COUNT (vnode) = 1;
     979              : 
     980           20 :   SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep);
     981           40 :   gcc_assert (perm.length () == SLP_TREE_LANES (vnode));
     982              :   /* Representation is set to that of the current node as the vectorizer
     983              :      can't deal with VEC_PERMs with no representation, as would be the
     984              :      case with invariants.  */
     985           20 :   SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep);
     986           20 :   SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep);
     987           20 :   return vnode;
     988              : }
     989              : 
     990              : class complex_mul_pattern : public complex_pattern
     991              : {
     992              :   protected:
     993           20 :     complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
     994           40 :       : complex_pattern (node, m_ops, ifn)
     995              :     {
     996           20 :       this->m_num_args = 2;
     997              :     }
     998              : 
     999              :   public:
    1000              :     void build (vec_info *) final override;
    1001              :     static internal_fn
    1002              :     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
    1003              :              slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
    1004              : 
    1005              :     static vect_pattern*
    1006              :     recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
    1007              :                slp_tree *);
    1008              : 
    1009              :     static vect_pattern*
    1010           20 :     mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
    1011              :     {
    1012           20 :       return new complex_mul_pattern (node, m_ops, ifn);
    1013              :     }
    1014              : 
    1015              : };
    1016              : 
    1017              : /* Pattern matcher for trying to match complex multiply and complex multiply
    1018              :    and accumulate pattern in SLP tree.  If the operation matches then IFN
    1019              :    is set to the operation it matched and the arguments to the two
    1020              :    replacement statements are put in m_ops.
    1021              : 
    1022              :    If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
    1023              : 
    1024              :    This function matches the patterns shaped as:
    1025              : 
    1026              :    double ax = (b[i+1] * a[i]);
    1027              :    double bx = (a[i+1] * b[i]);
    1028              : 
    1029              :    c[i] = c[i] - ax;
    1030              :    c[i+1] = c[i+1] + bx;
    1031              : 
    1032              :    If a match occurred then TRUE is returned, else FALSE.  The initial match is
    1033              :    expected to be in OP1 and the initial match operands in args0.  */
    1034              : 
    1035              : internal_fn
    1036      4722780 : complex_mul_pattern::matches (complex_operation_t op,
    1037              :                               slp_tree_to_load_perm_map_t *perm_cache,
    1038              :                               slp_compat_nodes_map_t *compat_cache,
    1039              :                               slp_tree *node, vec<slp_tree> *ops)
    1040              : {
    1041      4722780 :   internal_fn ifn = IFN_LAST;
    1042              : 
    1043      4722780 :   if (op != MINUS_PLUS)
    1044              :     return IFN_LAST;
    1045              : 
    1046         2935 :   auto childs = *ops;
    1047         2935 :   auto l0node = SLP_TREE_CHILDREN (childs[0]);
    1048              : 
    1049         2935 :   bool mul0 = vect_match_expression_p (l0node[0], MULT_EXPR);
    1050         2935 :   bool mul1 = vect_match_expression_p (l0node[1], MULT_EXPR);
    1051         2935 :   if (!mul0 && !mul1)
    1052              :     return IFN_LAST;
    1053              : 
    1054              :   /* Now operand2+4 may lead to another expression.  */
    1055         2019 :   auto_vec<slp_tree> left_op, right_op;
    1056         2019 :   slp_tree add0 = NULL;
    1057              : 
    1058              :   /* Check if we may be a multiply add.  It's only valid to form FMAs
    1059              :      with -ffp-contract=fast.  */
    1060         2019 :   if (!mul0
    1061         1204 :       && (flag_fp_contract_mode == FP_CONTRACT_FAST
    1062            3 :           || !FLOAT_TYPE_P (SLP_TREE_VECTYPE (*node)))
    1063         3220 :       && vect_match_expression_p (l0node[0], PLUS_EXPR))
    1064              :     {
    1065         1144 :       auto vals = SLP_TREE_CHILDREN (l0node[0]);
    1066              :       /* Check if it's a multiply, otherwise no idea what this is.  */
    1067         1144 :       if (!(mul0 = vect_match_expression_p (vals[1], MULT_EXPR)))
    1068         2019 :         return IFN_LAST;
    1069              : 
    1070              :       /* Check if the ADD is linear, otherwise it's not valid complex FMA.  */
    1071          633 :       if (linear_loads_p (perm_cache, vals[0]) != PERM_EVENODD)
    1072              :         return IFN_LAST;
    1073              : 
    1074           18 :       left_op.safe_splice (SLP_TREE_CHILDREN (vals[1]));
    1075           18 :       add0 = vals[0];
    1076              :     }
    1077              :   else
    1078          875 :     left_op.safe_splice (SLP_TREE_CHILDREN (l0node[0]));
    1079              : 
    1080          893 :   right_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
    1081              : 
    1082          893 :   if (left_op.length () != 2
    1083          789 :       || right_op.length () != 2
    1084              :       || !mul0
    1085          788 :       || !mul1
    1086         1619 :       || linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN)
    1087          113 :     return IFN_LAST;
    1088              : 
    1089          780 :   enum _conj_status status;
    1090          780 :   auto_vec<slp_tree> res_ops;
    1091          780 :   if (!vect_validate_multiplication (perm_cache, compat_cache, left_op,
    1092              :                                      right_op, false, res_ops, &status))
    1093              :     {
    1094              :       /* Try swapping the order and re-trying since multiplication is
    1095              :          commutative.  */
    1096          700 :       std::swap (left_op[0], left_op[1]);
    1097          700 :       std::swap (right_op[0], right_op[1]);
    1098          700 :       if (!vect_validate_multiplication (perm_cache, compat_cache, left_op,
    1099              :                                          right_op, false, res_ops, &status))
    1100              :         return IFN_LAST;
    1101              :     }
    1102              : 
    1103          126 :   if (status == CONJ_NONE)
    1104              :     {
    1105          116 :       if (add0)
    1106              :         ifn = IFN_COMPLEX_FMA;
    1107              :       else
    1108          111 :         ifn = IFN_COMPLEX_MUL;
    1109              :     }
    1110              :   else
    1111              :     {
    1112           10 :       if(add0)
    1113              :         ifn = IFN_COMPLEX_FMA_CONJ;
    1114              :       else
    1115            5 :         ifn = IFN_COMPLEX_MUL_CONJ;
    1116              :     }
    1117              : 
    1118          126 :   if (!vect_pattern_validate_optab (ifn, *node))
    1119              :     return IFN_LAST;
    1120              : 
    1121           20 :   ops->truncate (0);
    1122           30 :   ops->create (add0 ? 4 : 3);
    1123              : 
    1124           20 :   if (add0)
    1125           10 :     ops->quick_push (add0);
    1126              : 
    1127           20 :   complex_perm_kinds_t kind = linear_loads_p (perm_cache, res_ops[0]);
    1128           20 :   if (kind == PERM_EVENODD || kind == PERM_TOP)
    1129              :     {
    1130           10 :       ops->quick_push (res_ops[1]);
    1131           10 :       ops->quick_push (res_ops[3]);
    1132           10 :       ops->quick_push (res_ops[0]);
    1133              :     }
    1134           10 :   else if (kind == PERM_EVENEVEN && status != CONJ_SND)
    1135              :     {
    1136           10 :       ops->quick_push (res_ops[0]);
    1137           10 :       ops->quick_push (res_ops[2]);
    1138           10 :       ops->quick_push (res_ops[1]);
    1139              :     }
    1140              :   else
    1141              :     {
    1142            0 :       ops->quick_push (res_ops[0]);
    1143            0 :       ops->quick_push (res_ops[3]);
    1144            0 :       ops->quick_push (res_ops[1]);
    1145              :     }
    1146              : 
    1147              :   return ifn;
    1148         2799 : }
    1149              : 
    1150              : /* Attempt to recognize a complex mul pattern.  */
    1151              : 
    1152              : vect_pattern*
    1153            0 : complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
    1154              :                                 slp_compat_nodes_map_t *compat_cache,
    1155              :                                 slp_tree *node)
    1156              : {
    1157            0 :   auto_vec<slp_tree> ops;
    1158            0 :   complex_operation_t op
    1159            0 :     = vect_detect_pair_op (*node, true, &ops);
    1160            0 :   internal_fn ifn
    1161            0 :     = complex_mul_pattern::matches (op, perm_cache, compat_cache, node, &ops);
    1162            0 :   if (ifn == IFN_LAST)
    1163              :     return NULL;
    1164              : 
    1165            0 :   return new complex_mul_pattern (node, &ops, ifn);
    1166            0 : }
    1167              : 
    1168              : /* Perform a replacement of the detected complex mul pattern with the new
    1169              :    instruction sequences.  */
    1170              : 
    1171              : void
    1172           20 : complex_mul_pattern::build (vec_info *vinfo)
    1173              : {
    1174           20 :   slp_tree node;
    1175           20 :   unsigned i;
    1176           20 :   switch (this->m_ifn)
    1177              :   {
    1178           10 :     case IFN_COMPLEX_MUL:
    1179           10 :     case IFN_COMPLEX_MUL_CONJ:
    1180           10 :       {
    1181           10 :         slp_tree newnode
    1182           10 :           = vect_build_combine_node (this->m_ops[0], this->m_ops[1],
    1183           10 :                                      *this->m_node);
    1184           10 :         SLP_TREE_REF_COUNT (this->m_ops[2])++;
    1185              : 
    1186           30 :         FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
    1187           20 :           vect_free_slp_tree (node);
    1188              : 
    1189              :         /* First re-arrange the children.  */
    1190           10 :         SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
    1191           10 :         SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
    1192           10 :         SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
    1193           10 :         break;
    1194              :       }
    1195           10 :     case IFN_COMPLEX_FMA:
    1196           10 :     case IFN_COMPLEX_FMA_CONJ:
    1197           10 :       {
    1198           10 :         SLP_TREE_REF_COUNT (this->m_ops[0])++;
    1199           10 :         slp_tree newnode
    1200           10 :           = vect_build_combine_node (this->m_ops[1], this->m_ops[2],
    1201           10 :                                      *this->m_node);
    1202           10 :         SLP_TREE_REF_COUNT (this->m_ops[3])++;
    1203              : 
    1204           30 :         FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
    1205           20 :           vect_free_slp_tree (node);
    1206              : 
    1207              :         /* First re-arrange the children.  */
    1208           10 :         SLP_TREE_CHILDREN (*this->m_node).safe_grow (3);
    1209           10 :         SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[3];
    1210           10 :         SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
    1211           10 :         SLP_TREE_CHILDREN (*this->m_node)[2] = this->m_ops[0];
    1212              : 
    1213              :         /* Tell the builder to expect an extra argument.  */
    1214           10 :         this->m_num_args++;
    1215           10 :         break;
    1216              :       }
    1217            0 :     default:
    1218            0 :       gcc_unreachable ();
    1219              :   }
    1220              : 
    1221              :   /* And then rewrite the node itself.  */
    1222           20 :   complex_pattern::build (vinfo);
    1223           20 : }
    1224              : 
    1225              : /*******************************************************************************
    1226              :  * complex_fms_pattern class
    1227              :  ******************************************************************************/
    1228              : 
    1229              : class complex_fms_pattern : public complex_pattern
    1230              : {
    1231              :   protected:
    1232            0 :     complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
    1233            0 :       : complex_pattern (node, m_ops, ifn)
    1234              :     {
    1235            0 :       this->m_num_args = 3;
    1236              :     }
    1237              : 
    1238              :   public:
    1239              :     void build (vec_info *) final override;
    1240              :     static internal_fn
    1241              :     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
    1242              :              slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
    1243              : 
    1244              :     static vect_pattern*
    1245              :     recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
    1246              :                slp_tree *);
    1247              : 
    1248              :     static vect_pattern*
    1249            0 :     mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
    1250              :     {
    1251            0 :       return new complex_fms_pattern (node, m_ops, ifn);
    1252              :     }
    1253              : };
    1254              : 
    1255              : 
    1256              : /* Pattern matcher for trying to match complex multiply and subtract pattern
    1257              :    in SLP tree.  If the operation matches then IFN is set to the operation
    1258              :    it matched and the arguments to the two replacement statements are put in
    1259              :    m_ops.
    1260              : 
    1261              :    If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
    1262              : 
    1263              :    This function matches the patterns shaped as:
    1264              : 
    1265              :    double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
    1266              :    double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
    1267              : 
    1268              :    c[i] = c[i] - ax;
    1269              :    c[i+1] = c[i+1] + bx;
    1270              : 
    1271              :    If a match occurred then TRUE is returned, else FALSE.  The initial match is
    1272              :    expected to be in OP1 and the initial match operands in args0.  */
    1273              : 
    1274              : internal_fn
    1275      4722780 : complex_fms_pattern::matches (complex_operation_t op,
    1276              :                               slp_tree_to_load_perm_map_t *perm_cache,
    1277              :                               slp_compat_nodes_map_t *compat_cache,
    1278              :                               slp_tree * ref_node, vec<slp_tree> *ops)
    1279              : {
    1280      4722780 :   internal_fn ifn = IFN_LAST;
    1281              : 
    1282              :   /* We need to ignore the two_operands nodes that may also match,
    1283              :      for that we can check if they have any scalar statements and also
    1284              :      check that it's not a permute node as we're looking for a normal
    1285              :      MINUS_EXPR operation.  */
    1286      4722780 :   if (op != CMPLX_NONE)
    1287              :     return IFN_LAST;
    1288              : 
    1289      4716711 :   slp_tree root = *ref_node;
    1290      4716711 :   if (!vect_match_expression_p (root, MINUS_EXPR))
    1291              :     return IFN_LAST;
    1292              : 
    1293              :   /* TODO: Support invariants here, with the new layout CADD now
    1294              :            can match before we get a chance to try CFMS.  */
    1295        59246 :   auto nodes = SLP_TREE_CHILDREN (root);
    1296        59246 :   if (!vect_match_expression_p (nodes[1], MULT_EXPR)
    1297        69937 :       || vect_detect_pair_op (nodes[0]) != PLUS_MINUS)
    1298        59227 :     return IFN_LAST;
    1299              : 
    1300           19 :   auto childs = SLP_TREE_CHILDREN (nodes[0]);
    1301           19 :   auto l0node = SLP_TREE_CHILDREN (childs[0]);
    1302              : 
    1303              :   /* Now operand2+4 may lead to another expression.  */
    1304           19 :   auto_vec<slp_tree> left_op, right_op;
    1305           19 :   left_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
    1306           19 :   right_op.safe_splice (SLP_TREE_CHILDREN (nodes[1]));
    1307              : 
    1308              :   /* If these nodes don't have any children then they're
    1309              :      not ones we're interested in.  */
    1310           19 :   if (left_op.length () != 2
    1311           13 :       || right_op.length () != 2
    1312           26 :       || !vect_match_expression_p (l0node[1], MULT_EXPR))
    1313            6 :     return IFN_LAST;
    1314              : 
    1315           13 :   enum _conj_status status;
    1316           13 :   auto_vec<slp_tree> res_ops;
    1317           13 :   if (!vect_validate_multiplication (perm_cache, compat_cache, right_op,
    1318              :                                      left_op, true, res_ops, &status))
    1319              :     {
    1320              :       /* Try swapping the order and re-trying since multiplication is
    1321              :          commutative.  */
    1322           13 :       std::swap (left_op[0], left_op[1]);
    1323           13 :       std::swap (right_op[0], right_op[1]);
    1324           13 :       auto_vec<slp_tree> res_ops;
    1325           13 :       if (!vect_validate_multiplication (perm_cache, compat_cache, right_op,
    1326              :                                          left_op, true, res_ops, &status))
    1327           13 :         return IFN_LAST;
    1328           13 :     }
    1329              : 
    1330            0 :   if (status == CONJ_NONE)
    1331              :     ifn = IFN_COMPLEX_FMS;
    1332              :   else
    1333            0 :     ifn = IFN_COMPLEX_FMS_CONJ;
    1334              : 
    1335            0 :   if (!vect_pattern_validate_optab (ifn, *ref_node))
    1336              :     return IFN_LAST;
    1337              : 
    1338            0 :   ops->truncate (0);
    1339            0 :   ops->create (4);
    1340              : 
    1341            0 :   complex_perm_kinds_t kind = linear_loads_p (perm_cache, res_ops[2]);
    1342            0 :   if (kind == PERM_EVENODD)
    1343              :     {
    1344            0 :       ops->quick_push (l0node[0]);
    1345            0 :       ops->quick_push (res_ops[2]);
    1346            0 :       ops->quick_push (res_ops[3]);
    1347            0 :       ops->quick_push (res_ops[1]);
    1348              :     }
    1349              :   else
    1350              :     {
    1351            0 :       ops->quick_push (l0node[0]);
    1352            0 :       ops->quick_push (res_ops[3]);
    1353            0 :       ops->quick_push (res_ops[2]);
    1354            0 :       ops->quick_push (res_ops[0]);
    1355              :     }
    1356              : 
    1357              :   return ifn;
    1358           32 : }
    1359              : 
    1360              : /* Attempt to recognize a complex mul pattern.  */
    1361              : 
    1362              : vect_pattern*
    1363            0 : complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
    1364              :                                 slp_compat_nodes_map_t *compat_cache,
    1365              :                                 slp_tree *node)
    1366              : {
    1367            0 :   auto_vec<slp_tree> ops;
    1368            0 :   complex_operation_t op
    1369            0 :     = vect_detect_pair_op (*node, true, &ops);
    1370            0 :   internal_fn ifn
    1371            0 :     = complex_fms_pattern::matches (op, perm_cache, compat_cache, node, &ops);
    1372            0 :   if (ifn == IFN_LAST)
    1373              :     return NULL;
    1374              : 
    1375            0 :   return new complex_fms_pattern (node, &ops, ifn);
    1376            0 : }
    1377              : 
    1378              : /* Perform a replacement of the detected complex mul pattern with the new
    1379              :    instruction sequences.  */
    1380              : 
    1381              : void
    1382            0 : complex_fms_pattern::build (vec_info *vinfo)
    1383              : {
    1384            0 :   slp_tree node;
    1385            0 :   unsigned i;
    1386            0 :   slp_tree newnode =
    1387            0 :     vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
    1388            0 :   SLP_TREE_REF_COUNT (this->m_ops[0])++;
    1389            0 :   SLP_TREE_REF_COUNT (this->m_ops[1])++;
    1390              : 
    1391            0 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
    1392            0 :     vect_free_slp_tree (node);
    1393              : 
    1394            0 :   SLP_TREE_CHILDREN (*this->m_node).release ();
    1395            0 :   SLP_TREE_CHILDREN (*this->m_node).create (3);
    1396              : 
    1397              :   /* First re-arrange the children.  */
    1398            0 :   SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
    1399            0 :   SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
    1400            0 :   SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
    1401              : 
    1402              :   /* And then rewrite the node itself.  */
    1403            0 :   complex_pattern::build (vinfo);
    1404            0 : }
    1405              : 
    1406              : /*******************************************************************************
    1407              :  * complex_operations_pattern class
    1408              :  ******************************************************************************/
    1409              : 
    1410              : /* This function combines all the existing pattern matchers above into one class
    1411              :    that shares the functionality between them.  The initial match is shared
    1412              :    between all complex operations.  */
    1413              : 
    1414              : class complex_operations_pattern : public complex_pattern
    1415              : {
    1416              :   protected:
    1417              :     complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops,
    1418              :                                 internal_fn ifn)
    1419              :       : complex_pattern (node, m_ops, ifn)
    1420              :     {
    1421              :       this->m_num_args = 0;
    1422              :     }
    1423              : 
    1424              :   public:
    1425              :     void build (vec_info *) final override;
    1426              :     static internal_fn
    1427              :     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
    1428              :              slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
    1429              : 
    1430              :     static vect_pattern*
    1431              :     recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
    1432              :                slp_tree *);
    1433              : };
    1434              : 
    1435              : /* Dummy matches implementation for proxy object.  */
    1436              : 
    1437              : internal_fn
    1438            0 : complex_operations_pattern::
    1439              : matches (complex_operation_t /* op */,
    1440              :          slp_tree_to_load_perm_map_t * /* perm_cache */,
    1441              :          slp_compat_nodes_map_t * /* compat_cache */,
    1442              :          slp_tree * /* ref_node */, vec<slp_tree> * /* ops */)
    1443              : {
    1444            0 :   return IFN_LAST;
    1445              : }
    1446              : 
    1447              : /* Attempt to recognize a complex mul pattern.  */
    1448              : 
    1449              : vect_pattern*
    1450      4722780 : complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
    1451              :                                        slp_compat_nodes_map_t *ccache,
    1452              :                                        slp_tree *node)
    1453              : {
    1454      4722780 :   auto_vec<slp_tree> ops;
    1455      4722780 :   complex_operation_t op
    1456      4722780 :     = vect_detect_pair_op (*node, true, &ops);
    1457      4722780 :   internal_fn ifn = IFN_LAST;
    1458              : 
    1459      4722780 :   ifn  = complex_fms_pattern::matches (op, perm_cache, ccache, node, &ops);
    1460      4722780 :   if (ifn != IFN_LAST)
    1461            0 :     return complex_fms_pattern::mkInstance (node, &ops, ifn);
    1462              : 
    1463      4722780 :   ifn  = complex_mul_pattern::matches (op, perm_cache, ccache, node, &ops);
    1464      4722780 :   if (ifn != IFN_LAST)
    1465           20 :     return complex_mul_pattern::mkInstance (node, &ops, ifn);
    1466              : 
    1467      4722760 :   ifn  = complex_add_pattern::matches (op, perm_cache, ccache, node, &ops);
    1468      4722760 :   if (ifn != IFN_LAST)
    1469            0 :     return complex_add_pattern::mkInstance (node, &ops, ifn);
    1470              : 
    1471              :   return NULL;
    1472      4722780 : }
    1473              : 
    1474              : /* Dummy implementation of build.  */
    1475              : 
    1476              : void
    1477            0 : complex_operations_pattern::build (vec_info * /* vinfo */)
    1478              : {
    1479            0 :   gcc_unreachable ();
    1480              : }
    1481              : 
    1482              : 
    1483              : /* The addsub_pattern.  */
    1484              : 
    1485              : class addsub_pattern : public vect_pattern
    1486              : {
    1487              :   public:
    1488         1076 :     addsub_pattern (slp_tree *node, internal_fn ifn)
    1489         1076 :         : vect_pattern (node, NULL, ifn) {};
    1490              : 
    1491              :     void build (vec_info *) final override;
    1492              : 
    1493              :     static vect_pattern*
    1494              :     recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
    1495              :                slp_tree *);
    1496              : };
    1497              : 
    1498              : vect_pattern *
    1499      4722780 : addsub_pattern::recognize (slp_tree_to_load_perm_map_t *,
    1500              :                            slp_compat_nodes_map_t *, slp_tree *node_)
    1501              : {
    1502      4722780 :   slp_tree node = *node_;
    1503      4722780 :   if (!SLP_TREE_PERMUTE_P (node)
    1504        20476 :       || SLP_TREE_CHILDREN (node).length () != 2
    1505      4740961 :       || SLP_TREE_LANE_PERMUTATION (node).length () % 2)
    1506              :     return NULL;
    1507              : 
    1508              :   /* Match a blend of a plus and a minus op with the same number of plus and
    1509              :      minus lanes on the same operands.  */
    1510        13429 :   unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
    1511        13429 :   unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
    1512        13429 :   if (l0 == l1)
    1513              :     return NULL;
    1514        11305 :   bool fma_p = false;
    1515        11305 :   bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0],
    1516        11305 :                                           PLUS_EXPR);
    1517        11305 :   if (!l0add_p
    1518        11305 :       && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR))
    1519              :     {
    1520         4309 :       l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], CFN_FMA);
    1521         4309 :       if (!l0add_p
    1522         4309 :           && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], CFN_FMS))
    1523         4307 :         return NULL;
    1524              :       fma_p = true;
    1525              :     }
    1526         6998 :   bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1],
    1527         6998 :                                           PLUS_EXPR);
    1528         6998 :   if (l1add_p && fma_p)
    1529              :     return NULL;
    1530         6998 :   if (!l1add_p
    1531         6998 :       && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR))
    1532              :     {
    1533          718 :       if (!fma_p)
    1534              :         return NULL;
    1535            2 :       l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], CFN_FMA);
    1536            2 :       if (!l1add_p
    1537            2 :           && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], CFN_FMS))
    1538            0 :         return NULL;
    1539              :     }
    1540         6280 :   else if (!l1add_p && fma_p)
    1541              :     return NULL;
    1542              : 
    1543         6282 :   slp_tree l0node = SLP_TREE_CHILDREN (node)[l0];
    1544         6282 :   slp_tree l1node = SLP_TREE_CHILDREN (node)[l1];
    1545         6282 :   if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0]
    1546         5949 :          && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1])
    1547          347 :         || (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1]
    1548            0 :             && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0])))
    1549              :     return NULL;
    1550              : 
    1551        20985 :   for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i)
    1552              :     {
    1553        15196 :       std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i];
    1554              :       /* It has to be alternating -, +, -,
    1555              :          While we could permute the .ADDSUB inputs and the .ADDSUB output
    1556              :          that's only profitable over the add + sub + blend if at least
    1557              :          one of the permute is optimized which we can't determine here.  */
    1558        22842 :       if (perm.first != ((i & 1) ? l1 : l0)
    1559        15096 :           || perm.second != i)
    1560      4721704 :         return NULL;
    1561              :     }
    1562              : 
    1563              :   /* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... }
    1564              :      (l0add_p), see whether we have FMA variants.  We can only form FMAs
    1565              :      if allowed via -ffp-contract=fast or if they were FMA before.  */
    1566         5789 :   if (!fma_p
    1567         5787 :       && flag_fp_contract_mode != FP_CONTRACT_FAST
    1568         5820 :       && FLOAT_TYPE_P (SLP_TREE_VECTYPE (l0node)))
    1569              :     ;
    1570         5758 :   else if (!l0add_p
    1571         5758 :            && (fma_p
    1572         2885 :                || vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0],
    1573         2885 :                                            MULT_EXPR)))
    1574              :     {
    1575              :       /* (c * d) -+ a */
    1576          780 :       if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node))
    1577           23 :         return new addsub_pattern (node_, IFN_VEC_FMADDSUB);
    1578              :     }
    1579         4978 :   else if (l0add_p
    1580         4978 :            && (fma_p
    1581         4976 :                || vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0],
    1582         2871 :                                            MULT_EXPR)))
    1583              :     {
    1584              :       /* (c * d) +- a */
    1585          538 :       if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node))
    1586           18 :         return new addsub_pattern (node_, IFN_VEC_FMSUBADD);
    1587              :     }
    1588              : 
    1589         5748 :   if (!fma_p && !l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
    1590         1035 :     return new addsub_pattern (node_, IFN_VEC_ADDSUB);
    1591              : 
    1592              :   return NULL;
    1593              : }
    1594              : 
    1595              : void
    1596         1076 : addsub_pattern::build (vec_info *vinfo)
    1597              : {
    1598         1076 :   slp_tree node = *m_node;
    1599              : 
    1600         1076 :   unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
    1601         1076 :   unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
    1602              : 
    1603         1076 :   switch (m_ifn)
    1604              :     {
    1605         1035 :     case IFN_VEC_ADDSUB:
    1606         1035 :       {
    1607         1035 :         slp_tree sub = SLP_TREE_CHILDREN (node)[l0];
    1608         1035 :         slp_tree add = SLP_TREE_CHILDREN (node)[l1];
    1609              : 
    1610              :         /* Modify the blend node in-place.  */
    1611         1035 :         SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
    1612         1035 :         SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
    1613         1035 :         SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
    1614         1035 :         SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
    1615              : 
    1616              :         /* Build IFN_VEC_ADDSUB from the sub representative operands.  */
    1617         1035 :         stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
    1618         1035 :         gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
    1619              :                                                   gimple_assign_rhs1 (rep->stmt),
    1620         1035 :                                                   gimple_assign_rhs2 (rep->stmt));
    1621         1035 :         gimple_call_set_lhs (call, make_ssa_name
    1622         1035 :                              (TREE_TYPE (gimple_assign_lhs (rep->stmt))));
    1623         1035 :         gimple_call_set_nothrow (call, true);
    1624         1035 :         gimple_set_bb (call, gimple_bb (rep->stmt));
    1625         1035 :         stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep);
    1626         1035 :         SLP_TREE_REPRESENTATIVE (node) = new_rep;
    1627         1035 :         STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
    1628         1035 :         STMT_SLP_TYPE (new_rep) = pure_slp;
    1629         1035 :         STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
    1630         1035 :         STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
    1631         1035 :         STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep));
    1632         1035 :         SLP_TREE_CODE (node) = ERROR_MARK;
    1633         1035 :         SLP_TREE_LANE_PERMUTATION (node).release ();
    1634              : 
    1635         1035 :         vect_free_slp_tree (sub);
    1636         1035 :         vect_free_slp_tree (add);
    1637         1035 :         break;
    1638              :       }
    1639           41 :     case IFN_VEC_FMADDSUB:
    1640           41 :     case IFN_VEC_FMSUBADD:
    1641           41 :       {
    1642           41 :         slp_tree sub, add;
    1643           41 :         if (m_ifn == IFN_VEC_FMADDSUB)
    1644              :           {
    1645           23 :             sub = SLP_TREE_CHILDREN (node)[l0];
    1646           23 :             add = SLP_TREE_CHILDREN (node)[l1];
    1647              :           }
    1648              :         else /* m_ifn == IFN_VEC_FMSUBADD */
    1649              :           {
    1650           18 :             sub = SLP_TREE_CHILDREN (node)[l1];
    1651           18 :             add = SLP_TREE_CHILDREN (node)[l0];
    1652              :           }
    1653              :         /* Modify the blend node in-place.  */
    1654           41 :         SLP_TREE_CHILDREN (node).safe_grow (3, true);
    1655           41 :         gcall *call;
    1656           41 :         stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub);
    1657           41 :         if (vect_match_expression_p (add, CFN_FMA))
    1658              :           {
    1659            2 :             SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (add)[0];
    1660            2 :             SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (add)[1];
    1661            2 :             SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (add)[2];
    1662              :             /* Build IFN_VEC_FMADDSUB from the fms representative
    1663              :                operands.  */
    1664            2 :             call = gimple_build_call_internal (m_ifn, 3,
    1665              :                                                gimple_call_arg (srep->stmt, 0),
    1666              :                                                gimple_call_arg (srep->stmt, 1),
    1667            2 :                                                gimple_call_arg (srep->stmt, 2));
    1668              :           }
    1669              :         else
    1670              :           {
    1671           39 :             slp_tree mul = SLP_TREE_CHILDREN (sub)[0];
    1672           39 :             SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0];
    1673           39 :             SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1];
    1674           39 :             SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1];
    1675              :             /* Build IFN_VEC_FMADDSUB from the mul/sub representative
    1676              :                operands.  */
    1677           39 :             stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul);
    1678           39 :             call = gimple_build_call_internal (m_ifn, 3,
    1679              :                                                gimple_assign_rhs1 (mrep->stmt),
    1680           39 :                                                gimple_assign_rhs2 (mrep->stmt),
    1681           39 :                                                gimple_assign_rhs2 (srep->stmt));
    1682              :           }
    1683           41 :         SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
    1684           41 :         SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
    1685           41 :         SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++;
    1686              : 
    1687           41 :         gimple_call_set_lhs (call, make_ssa_name
    1688           41 :                              (TREE_TYPE (gimple_get_lhs (srep->stmt))));
    1689           41 :         gimple_call_set_nothrow (call, true);
    1690           41 :         gimple_set_bb (call, gimple_bb (srep->stmt));
    1691           41 :         stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep);
    1692           41 :         SLP_TREE_REPRESENTATIVE (node) = new_rep;
    1693           41 :         STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
    1694           41 :         STMT_SLP_TYPE (new_rep) = pure_slp;
    1695           41 :         STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
    1696           41 :         STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
    1697           41 :         STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (srep));
    1698           41 :         SLP_TREE_CODE (node) = ERROR_MARK;
    1699           41 :         SLP_TREE_LANE_PERMUTATION (node).release ();
    1700              : 
    1701           41 :         vect_free_slp_tree (sub);
    1702           41 :         vect_free_slp_tree (add);
    1703           41 :         break;
    1704              :       }
    1705         1076 :     default:;
    1706              :     }
    1707         1076 : }
    1708              : 
    1709              : /*******************************************************************************
    1710              :  * Pattern matching definitions
    1711              :  ******************************************************************************/
    1712              : 
    1713              : #define SLP_PATTERN(x) &x::recognize
    1714              : vect_pattern_decl_t slp_patterns[]
    1715              : {
    1716              :   /* For least amount of back-tracking and more efficient matching
    1717              :      order patterns from the largest to the smallest.  Especially if they
    1718              :      overlap in what they can detect.  */
    1719              : 
    1720              :   SLP_PATTERN (complex_operations_pattern),
    1721              :   SLP_PATTERN (addsub_pattern)
    1722              : };
    1723              : #undef SLP_PATTERN
    1724              : 
    1725              : /* Set the number of SLP pattern matchers available.  */
    1726              : size_t num__slp_patterns = ARRAY_SIZE (slp_patterns);
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.