LCOV - code coverage report
Current view: top level - gcc - vec-perm-indices.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 92.2 % 153 141
Test Date: 2026-02-28 14:20:25 Functions: 90.9 % 11 10
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* A representation of vector permutation indices.
       2              :    Copyright (C) 2017-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify it under
       7              : the terms of the GNU General Public License as published by the Free
       8              : Software Foundation; either version 3, or (at your option) any later
       9              : version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #include "config.h"
      21              : #include "system.h"
      22              : #include "coretypes.h"
      23              : #include "vec-perm-indices.h"
      24              : #include "tree.h"
      25              : #include "fold-const.h"
      26              : #include "tree-vector-builder.h"
      27              : #include "backend.h"
      28              : #include "rtl.h"
      29              : #include "memmodel.h"
      30              : #include "emit-rtl.h"
      31              : #include "selftest.h"
      32              : #include "rtx-vector-builder.h"
      33              : 
      34              : /* Switch to a new permutation vector that selects between NINPUTS vector
      35              :    inputs that have NELTS_PER_INPUT elements each.  Take the elements of the
      36              :    new permutation vector from ELEMENTS, clamping each one to be in range.  */
      37              : 
      38              : void
      39      3842280 : vec_perm_indices::new_vector (const vec_perm_builder &elements,
      40              :                               unsigned int ninputs,
      41              :                               poly_uint64 nelts_per_input)
      42              : {
      43      3842280 :   m_ninputs = ninputs;
      44      3842280 :   m_nelts_per_input = nelts_per_input;
      45              :   /* If the vector has a constant number of elements, expand the
      46              :      encoding and clamp each element.  E.g. { 0, 2, 4, ... } might
      47              :      wrap halfway if there is only one vector input, and we want
      48              :      the wrapped form to be the canonical one.
      49              : 
      50              :      If the vector has a variable number of elements, just copy
      51              :      the encoding.  In that case the unwrapped form is canonical
      52              :      and there is no way of representing the wrapped form.  */
      53      3842280 :   poly_uint64 full_nelts = elements.full_nelts ();
      54      3842280 :   unsigned HOST_WIDE_INT copy_nelts;
      55      3842280 :   if (full_nelts.is_constant (&copy_nelts))
      56      3842280 :     m_encoding.new_vector (full_nelts, copy_nelts, 1);
      57              :   else
      58              :     {
      59              :       copy_nelts = elements.encoded_nelts ();
      60              :       m_encoding.new_vector (full_nelts, elements.npatterns (),
      61              :                              elements.nelts_per_pattern ());
      62              :     }
      63      3842280 :   unsigned int npatterns = m_encoding.npatterns ();
      64     30853924 :   for (unsigned int i = 0; i < npatterns; ++i)
      65     27011883 :     m_encoding.quick_push (clamp (elements.elt (i)));
      66              :   /* Use the fact that:
      67              : 
      68              :         (a + b) % c == ((a % c) + (b % c)) % c
      69              : 
      70              :      to simplify the clamping of variable-length vectors.  */
      71      3842280 :   for (unsigned int i = npatterns; i < copy_nelts; ++i)
      72              :     {
      73            0 :       element_type step = clamp (elements.elt (i)
      74            0 :                                  - elements.elt (i - npatterns));
      75            0 :       m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
      76              :     }
      77      3842280 :   m_encoding.finalize ();
      78      3842280 : }
      79              : 
      80              : /* Switch to a new permutation vector that selects the same input elements
      81              :    as ORIG, but with each element split into FACTOR pieces.  For example,
      82              :    if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
      83              :    { 2, 3, 4, 5, 0, 1, 6, 7 }.  */
      84              : 
      85              : void
      86       677835 : vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
      87              :                                        unsigned int factor)
      88              : {
      89       677835 :   m_ninputs = orig.m_ninputs;
      90       677835 :   m_nelts_per_input = orig.m_nelts_per_input * factor;
      91       677835 :   m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
      92       677835 :                          orig.m_encoding.npatterns () * factor,
      93              :                          orig.m_encoding.nelts_per_pattern ());
      94       677835 :   unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
      95      2240356 :   for (unsigned int i = 0; i < encoded_nelts; ++i)
      96              :     {
      97      1562521 :       element_type base = orig.m_encoding[i] * factor;
      98     10392989 :       for (unsigned int j = 0; j < factor; ++j)
      99      8830468 :         m_encoding.quick_push (base + j);
     100              :     }
     101       677835 :   m_encoding.finalize ();
     102       677835 : }
     103              : 
     104              : /* Check whether we can switch to a new permutation vector that
     105              :    selects the same input elements as ORIG, but with each element
     106              :    built up from FACTOR pieces.  Return true if yes, otherwise
     107              :    return false.  Every FACTOR permutation indexes should be
     108              :    continuous separately and the first one of each batch should
     109              :    be able to exactly modulo FACTOR.  For example, if ORIG is
     110              :    { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation
     111              :    is { 1, 2, 0, 3 }.  */
     112              : 
     113              : bool
     114           18 : vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig,
     115              :                                      unsigned int factor)
     116              : {
     117           18 :   gcc_assert (factor > 0);
     118              : 
     119           18 :   if (maybe_lt (orig.m_nelts_per_input, factor))
     120              :     return false;
     121              : 
     122           18 :   poly_uint64 nelts;
     123              :   /* Invalid if vector units number isn't multiple of factor.  */
     124           36 :   if (!multiple_p (orig.m_nelts_per_input, factor, &nelts))
     125              :     return false;
     126              : 
     127              :   /* Only handle the case that npatterns is multiple of factor.
     128              :      FIXME: Try to see whether we can reshape it by factor npatterns.  */
     129           18 :   if (orig.m_encoding.npatterns () % factor != 0)
     130              :     return false;
     131              : 
     132           11 :   unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
     133           11 :   auto_vec<element_type, 32> encoding (encoded_nelts);
     134              :   /* Separate all encoded elements into batches by size factor,
     135              :      then ensure the first element of each batch is multiple of
     136              :      factor and all elements in each batch is consecutive from
     137              :      the first one.  */
     138           12 :   for (unsigned int i = 0; i < encoded_nelts; i += factor)
     139              :     {
     140           11 :       element_type first = orig.m_encoding[i];
     141           11 :       element_type new_index;
     142           11 :       if (!multiple_p (first, factor, &new_index))
     143           10 :         return false;
     144           25 :       for (unsigned int j = 1; j < factor; ++j)
     145           24 :         if (maybe_ne (first + j, orig.m_encoding[i + j]))
     146           10 :           return false;
     147            1 :       encoding.quick_push (new_index);
     148              :     }
     149              : 
     150            1 :   m_ninputs = orig.m_ninputs;
     151            1 :   m_nelts_per_input = nelts;
     152            1 :   poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor);
     153            1 :   unsigned int npatterns = orig.m_encoding.npatterns () / factor;
     154              : 
     155            1 :   m_encoding.new_vector (full_nelts, npatterns,
     156              :                          orig.m_encoding.nelts_per_pattern ());
     157            1 :   m_encoding.splice (encoding);
     158            1 :   m_encoding.finalize ();
     159              : 
     160            1 :   return true;
     161           11 : }
     162              : 
     163              : /* Rotate the inputs of the permutation right by DELTA inputs.  This changes
     164              :    the values of the permutation vector but it doesn't change the way that
     165              :    the elements are encoded.  */
     166              : 
     167              : void
     168        15072 : vec_perm_indices::rotate_inputs (int delta)
     169              : {
     170        15072 :   element_type element_delta = delta * m_nelts_per_input;
     171        77890 :   for (unsigned int i = 0; i < m_encoding.length (); ++i)
     172        62818 :     m_encoding[i] = clamp (m_encoding[i] + element_delta);
     173        15072 : }
     174              : 
     175              : /* Return true if index OUT_BASE + I * OUT_STEP selects input
     176              :    element IN_BASE + I * IN_STEP.  For example, the call to test
     177              :    whether a permute reverses a vector of N elements would be:
     178              : 
     179              :      series_p (0, 1, N - 1, -1)
     180              : 
     181              :    which would return true for { N - 1, N - 2, N - 3, ... }.
     182              :    The calls to test for an interleaving of elements starting
     183              :    at N1 and N2 would be:
     184              : 
     185              :      series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
     186              : 
     187              :    which would return true for { N1, N2, N1 + 1, N2 + 1, ... }.  */
     188              : 
     189              : bool
     190      3789211 : vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
     191              :                             element_type in_base, element_type in_step) const
     192              : {
     193              :   /* Check the base value.  */
     194      3789211 :   if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
     195              :     return false;
     196              : 
     197      1261159 :   element_type full_nelts = m_encoding.full_nelts ();
     198      1261159 :   unsigned int npatterns = m_encoding.npatterns ();
     199              : 
     200              :   /* Calculate which multiple of OUT_STEP elements we need to get
     201              :      back to the same pattern.  */
     202      1261159 :   unsigned int cycle_length = least_common_multiple (out_step, npatterns);
     203              : 
     204              :   /* Check the steps.  */
     205      1261159 :   in_step = clamp (in_step);
     206      1261159 :   out_base += out_step;
     207      1261159 :   unsigned int limit = 0;
     208      2139619 :   for (;;)
     209              :     {
     210              :       /* Succeed if we've checked all the elements in the vector.  */
     211      1700389 :       if (known_ge (out_base, full_nelts))
     212      1261159 :         return true;
     213              : 
     214      1376893 :       if (out_base >= npatterns)
     215              :         {
     216              :           /* We've got to the end of the "foreground" values.  Check
     217              :              2 elements from each pattern in the "background" values.  */
     218       855471 :           if (limit == 0)
     219       830261 :             limit = out_base + cycle_length * 2;
     220        25210 :           else if (out_base >= limit)
     221              :             return true;
     222              :         }
     223              : 
     224      1369619 :       element_type v0 = m_encoding.elt (out_base - out_step);
     225      1369619 :       element_type v1 = m_encoding.elt (out_base);
     226      1701961 :       if (maybe_ne (clamp (v1 - v0), in_step))
     227              :         return false;
     228              : 
     229       439230 :       out_base += out_step;
     230       439230 :     }
     231              : }
     232              : 
     233              : /* Return true if all elements of the permutation vector are in the range
     234              :    [START, START + SIZE).  */
     235              : 
     236              : bool
     237      3329171 : vec_perm_indices::all_in_range_p (element_type start, element_type size) const
     238              : {
     239              :   /* Check the first two elements of each pattern.  */
     240      3329171 :   unsigned int npatterns = m_encoding.npatterns ();
     241      3329171 :   unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
     242      3329171 :   unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
     243     14769206 :   for (unsigned int i = 0; i < base_nelts; ++i)
     244     24942346 :     if (!known_in_range_p (m_encoding[i], start, size))
     245              :       return false;
     246              : 
     247              :   /* For stepped encodings, check the full range of the series.  */
     248      1266895 :   if (nelts_per_pattern == 3)
     249              :     {
     250       376207 :       element_type limit = input_nelts ();
     251              : 
     252              :       /* The number of elements in each pattern beyond the first two
     253              :          that we checked above.  */
     254       376207 :       poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
     255       376207 :                                          npatterns) - 2;
     256       798611 :       for (unsigned int i = 0; i < npatterns; ++i)
     257              :         {
     258              :           /* BASE1 has been checked but BASE2 hasn't.   */
     259       604140 :           element_type base1 = m_encoding[i + npatterns];
     260       604140 :           element_type base2 = m_encoding[i + base_nelts];
     261              : 
     262              :           /* The step to add to get from BASE1 to each subsequent value.  */
     263       604140 :           element_type step = clamp (base2 - base1);
     264              : 
     265              :           /* STEP has no inherent sign, so a value near LIMIT can
     266              :              act as a negative step.  The series is in range if it
     267              :              is in range according to one of the two interpretations.
     268              : 
     269              :              Since we're dealing with clamped values, ELEMENT_TYPE is
     270              :              wide enough for overflow not to be a problem.  */
     271       604140 :           element_type headroom_down = base1 - start;
     272       604140 :           element_type headroom_up = size - headroom_down - 1;
     273       604140 :           HOST_WIDE_INT diff;
     274       604140 :           if ((!step.is_constant (&diff)
     275       604140 :                || maybe_lt (headroom_up, diff * step_nelts))
     276       181960 :               && (!(limit - step).is_constant (&diff)
     277       181960 :                   || maybe_lt (headroom_down, diff * step_nelts)))
     278      2244012 :             return false;
     279              :         }
     280              :     }
     281              :   return true;
     282              : }
     283              : 
     284              : /* Try to read the contents of VECTOR_CST CST as a constant permutation
     285              :    vector.  Return true and add the elements to BUILDER on success,
     286              :    otherwise return false without modifying BUILDER.  */
     287              : 
     288              : bool
     289      1933929 : tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
     290              : {
     291      1933929 :   unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
     292      9556402 :   for (unsigned int i = 0; i < encoded_nelts; ++i)
     293      7622498 :     if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
     294              :       return false;
     295              : 
     296      3867808 :   builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
     297      1933904 :                        VECTOR_CST_NPATTERNS (cst),
     298      1933904 :                        VECTOR_CST_NELTS_PER_PATTERN (cst));
     299      9556377 :   for (unsigned int i = 0; i < encoded_nelts; ++i)
     300      7622473 :     builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
     301              :   return true;
     302              : }
     303              : 
     304              : /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES.  */
     305              : 
     306              : tree
     307      1232149 : vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
     308              : {
     309      1232149 :   gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
     310      1232149 :   tree_vector_builder sel (type, indices.encoding ().npatterns (),
     311      1232149 :                            indices.encoding ().nelts_per_pattern ());
     312      1232149 :   unsigned int encoded_nelts = sel.encoded_nelts ();
     313      8361102 :   for (unsigned int i = 0; i < encoded_nelts; i++)
     314      7128953 :     sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
     315      1232149 :   return sel.build ();
     316      1232149 : }
     317              : 
     318              : /* Return a CONST_VECTOR of mode MODE that contains the elements of
     319              :    INDICES.  */
     320              : 
     321              : rtx
     322            0 : vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
     323              : {
     324            0 :   gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
     325              :               && known_eq (GET_MODE_NUNITS (mode), indices.length ()));
     326            0 :   rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
     327            0 :                           indices.encoding ().nelts_per_pattern ());
     328            0 :   unsigned int encoded_nelts = sel.encoded_nelts ();
     329            0 :   for (unsigned int i = 0; i < encoded_nelts; i++)
     330            0 :     sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
     331            0 :   return sel.build ();
     332            0 : }
     333              : 
     334              : #if CHECKING_P
     335              : 
     336              : namespace selftest {
     337              : 
     338              : /* Test a 12-element vector.  */
     339              : 
     340              : static void
     341            4 : test_vec_perm_12 (void)
     342              : {
     343            4 :   vec_perm_builder builder (12, 12, 1);
     344           20 :   for (unsigned int i = 0; i < 4; ++i)
     345              :     {
     346           16 :       builder.quick_push (i * 5);
     347           16 :       builder.quick_push (3 + i);
     348           16 :       builder.quick_push (2 + 3 * i);
     349              :     }
     350            4 :   vec_perm_indices indices (builder, 1, 12);
     351            4 :   ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
     352            4 :   ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
     353            4 :   ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
     354            4 :   ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
     355            4 :   ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
     356              : 
     357            4 :   ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
     358            4 :   ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
     359              : 
     360            4 :   ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
     361            4 :   ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
     362              : 
     363            4 :   ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
     364            4 :   ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
     365              : 
     366            4 :   ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
     367            4 :   ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
     368            4 :   ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
     369            4 : }
     370              : 
     371              : /* Run selftests for this file.  */
     372              : 
     373              : void
     374            4 : vec_perm_indices_cc_tests ()
     375              : {
     376            4 :   test_vec_perm_12 ();
     377            4 : }
     378              : 
     379              : } // namespace selftest
     380              : 
     381              : #endif
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.