LCOV - gcc.info - gcc/vec-perm-indices.cc

LCOV - code coverage report

Current view:	top level - gcc - vec-perm-indices.cc (source / functions)		Coverage	Total	Hit
Test:	gcc.info	Lines:	92.2 %	153	141
Test Date:	2025-07-26 09:32:30	Functions:	90.9 %	11	10
Legend:	Lines: hit not hit \| Branches: + taken - not taken # not executed	Branches:	-	0	0

             Branch data     Line data    Source code

       1                 :             : /* A representation of vector permutation indices.
       2                 :             :    Copyright (C) 2017-2025 Free Software Foundation, Inc.
       3                 :             : 
       4                 :             : This file is part of GCC.
       5                 :             : 
       6                 :             : GCC is free software; you can redistribute it and/or modify it under
       7                 :             : the terms of the GNU General Public License as published by the Free
       8                 :             : Software Foundation; either version 3, or (at your option) any later
       9                 :             : version.
      10                 :             : 
      11                 :             : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12                 :             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13                 :             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14                 :             : for more details.
      15                 :             : 
      16                 :             : You should have received a copy of the GNU General Public License
      17                 :             : along with GCC; see the file COPYING3.  If not see
      18                 :             : <http://www.gnu.org/licenses/>.  */
      19                 :             : 
      20                 :             : #include "config.h"
      21                 :             : #include "system.h"
      22                 :             : #include "coretypes.h"
      23                 :             : #include "vec-perm-indices.h"
      24                 :             : #include "tree.h"
      25                 :             : #include "fold-const.h"
      26                 :             : #include "tree-vector-builder.h"
      27                 :             : #include "backend.h"
      28                 :             : #include "rtl.h"
      29                 :             : #include "memmodel.h"
      30                 :             : #include "emit-rtl.h"
      31                 :             : #include "selftest.h"
      32                 :             : #include "rtx-vector-builder.h"
      33                 :             : 
      34                 :             : /* Switch to a new permutation vector that selects between NINPUTS vector
      35                 :             :    inputs that have NELTS_PER_INPUT elements each.  Take the elements of the
      36                 :             :    new permutation vector from ELEMENTS, clamping each one to be in range.  */
      37                 :             : 
      38                 :             : void
      39                 :     3652398 : vec_perm_indices::new_vector (const vec_perm_builder &elements,
      40                 :             :                               unsigned int ninputs,
      41                 :             :                               poly_uint64 nelts_per_input)
      42                 :             : {
      43                 :     3652398 :   m_ninputs = ninputs;
      44                 :     3652398 :   m_nelts_per_input = nelts_per_input;
      45                 :             :   /* If the vector has a constant number of elements, expand the
      46                 :             :      encoding and clamp each element.  E.g. { 0, 2, 4, ... } might
      47                 :             :      wrap halfway if there is only one vector input, and we want
      48                 :             :      the wrapped form to be the canonical one.
      49                 :             : 
      50                 :             :      If the vector has a variable number of elements, just copy
      51                 :             :      the encoding.  In that case the unwrapped form is canonical
      52                 :             :      and there is no way of representing the wrapped form.  */
      53                 :     3652398 :   poly_uint64 full_nelts = elements.full_nelts ();
      54                 :     3652398 :   unsigned HOST_WIDE_INT copy_nelts;
      55                 :     3652398 :   if (full_nelts.is_constant (&copy_nelts))
      56                 :     3652398 :     m_encoding.new_vector (full_nelts, copy_nelts, 1);
      57                 :             :   else
      58                 :             :     {
      59                 :             :       copy_nelts = elements.encoded_nelts ();
      60                 :             :       m_encoding.new_vector (full_nelts, elements.npatterns (),
      61                 :             :                              elements.nelts_per_pattern ());
      62                 :             :     }
      63                 :     3652398 :   unsigned int npatterns = m_encoding.npatterns ();
      64                 :    29604752 :   for (unsigned int i = 0; i < npatterns; ++i)
      65                 :    25952593 :     m_encoding.quick_push (clamp (elements.elt (i)));
      66                 :             :   /* Use the fact that:
      67                 :             : 
      68                 :             :         (a + b) % c == ((a % c) + (b % c)) % c
      69                 :             : 
      70                 :             :      to simplify the clamping of variable-length vectors.  */
      71                 :     3652398 :   for (unsigned int i = npatterns; i < copy_nelts; ++i)
      72                 :             :     {
      73                 :           0 :       element_type step = clamp (elements.elt (i)
      74                 :           0 :                                  - elements.elt (i - npatterns));
      75                 :           0 :       m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
      76                 :             :     }
      77                 :     3652398 :   m_encoding.finalize ();
      78                 :     3652398 : }
      79                 :             : 
      80                 :             : /* Switch to a new permutation vector that selects the same input elements
      81                 :             :    as ORIG, but with each element split into FACTOR pieces.  For example,
      82                 :             :    if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
      83                 :             :    { 2, 3, 4, 5, 0, 1, 6, 7 }.  */
      84                 :             : 
      85                 :             : void
      86                 :      669885 : vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
      87                 :             :                                        unsigned int factor)
      88                 :             : {
      89                 :      669885 :   m_ninputs = orig.m_ninputs;
      90                 :      669885 :   m_nelts_per_input = orig.m_nelts_per_input * factor;
      91                 :      669885 :   m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
      92                 :      669885 :                          orig.m_encoding.npatterns () * factor,
      93                 :             :                          orig.m_encoding.nelts_per_pattern ());
      94                 :      669885 :   unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
      95                 :     2198223 :   for (unsigned int i = 0; i < encoded_nelts; ++i)
      96                 :             :     {
      97                 :     1528338 :       element_type base = orig.m_encoding[i] * factor;
      98                 :    10075432 :       for (unsigned int j = 0; j < factor; ++j)
      99                 :     8547094 :         m_encoding.quick_push (base + j);
     100                 :             :     }
     101                 :      669885 :   m_encoding.finalize ();
     102                 :      669885 : }
     103                 :             : 
     104                 :             : /* Check whether we can switch to a new permutation vector that
     105                 :             :    selects the same input elements as ORIG, but with each element
     106                 :             :    built up from FACTOR pieces.  Return true if yes, otherwise
     107                 :             :    return false.  Every FACTOR permutation indexes should be
     108                 :             :    continuous separately and the first one of each batch should
     109                 :             :    be able to exactly modulo FACTOR.  For example, if ORIG is
     110                 :             :    { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation
     111                 :             :    is { 1, 2, 0, 3 }.  */
     112                 :             : 
     113                 :             : bool
     114                 :          18 : vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig,
     115                 :             :                                      unsigned int factor)
     116                 :             : {
     117                 :          18 :   gcc_assert (factor > 0);
     118                 :             : 
     119                 :          18 :   if (maybe_lt (orig.m_nelts_per_input, factor))
     120                 :             :     return false;
     121                 :             : 
     122                 :          18 :   poly_uint64 nelts;
     123                 :             :   /* Invalid if vector units number isn't multiple of factor.  */
     124                 :          36 :   if (!multiple_p (orig.m_nelts_per_input, factor, &nelts))
     125                 :             :     return false;
     126                 :             : 
     127                 :             :   /* Only handle the case that npatterns is multiple of factor.
     128                 :             :      FIXME: Try to see whether we can reshape it by factor npatterns.  */
     129                 :          18 :   if (orig.m_encoding.npatterns () % factor != 0)
     130                 :             :     return false;
     131                 :             : 
     132                 :          11 :   unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
     133                 :          11 :   auto_vec<element_type, 32> encoding (encoded_nelts);
     134                 :             :   /* Separate all encoded elements into batches by size factor,
     135                 :             :      then ensure the first element of each batch is multiple of
     136                 :             :      factor and all elements in each batch is consecutive from
     137                 :             :      the first one.  */
     138                 :          12 :   for (unsigned int i = 0; i < encoded_nelts; i += factor)
     139                 :             :     {
     140                 :          11 :       element_type first = orig.m_encoding[i];
     141                 :          11 :       element_type new_index;
     142                 :          11 :       if (!multiple_p (first, factor, &new_index))
     143                 :          10 :         return false;
     144                 :          25 :       for (unsigned int j = 1; j < factor; ++j)
     145                 :          24 :         if (maybe_ne (first + j, orig.m_encoding[i + j]))
     146                 :          10 :           return false;
     147                 :           1 :       encoding.quick_push (new_index);
     148                 :             :     }
     149                 :             : 
     150                 :           1 :   m_ninputs = orig.m_ninputs;
     151                 :           1 :   m_nelts_per_input = nelts;
     152                 :           1 :   poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor);
     153                 :           1 :   unsigned int npatterns = orig.m_encoding.npatterns () / factor;
     154                 :             : 
     155                 :           1 :   m_encoding.new_vector (full_nelts, npatterns,
     156                 :             :                          orig.m_encoding.nelts_per_pattern ());
     157                 :           1 :   m_encoding.splice (encoding);
     158                 :           1 :   m_encoding.finalize ();
     159                 :             : 
     160                 :           1 :   return true;
     161                 :          11 : }
     162                 :             : 
     163                 :             : /* Rotate the inputs of the permutation right by DELTA inputs.  This changes
     164                 :             :    the values of the permutation vector but it doesn't change the way that
     165                 :             :    the elements are encoded.  */
     166                 :             : 
     167                 :             : void
     168                 :       15059 : vec_perm_indices::rotate_inputs (int delta)
     169                 :             : {
     170                 :       15059 :   element_type element_delta = delta * m_nelts_per_input;
     171                 :       77537 :   for (unsigned int i = 0; i < m_encoding.length (); ++i)
     172                 :       62478 :     m_encoding[i] = clamp (m_encoding[i] + element_delta);
     173                 :       15059 : }
     174                 :             : 
     175                 :             : /* Return true if index OUT_BASE + I * OUT_STEP selects input
     176                 :             :    element IN_BASE + I * IN_STEP.  For example, the call to test
     177                 :             :    whether a permute reverses a vector of N elements would be:
     178                 :             : 
     179                 :             :      series_p (0, 1, N - 1, -1)
     180                 :             : 
     181                 :             :    which would return true for { N - 1, N - 2, N - 3, ... }.
     182                 :             :    The calls to test for an interleaving of elements starting
     183                 :             :    at N1 and N2 would be:
     184                 :             : 
     185                 :             :      series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
     186                 :             : 
     187                 :             :    which would return true for { N1, N2, N1 + 1, N2 + 1, ... }.  */
     188                 :             : 
     189                 :             : bool
     190                 :     3551218 : vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
     191                 :             :                             element_type in_base, element_type in_step) const
     192                 :             : {
     193                 :             :   /* Check the base value.  */
     194                 :     3551218 :   if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
     195                 :             :     return false;
     196                 :             : 
     197                 :     1135775 :   element_type full_nelts = m_encoding.full_nelts ();
     198                 :     1135775 :   unsigned int npatterns = m_encoding.npatterns ();
     199                 :             : 
     200                 :             :   /* Calculate which multiple of OUT_STEP elements we need to get
     201                 :             :      back to the same pattern.  */
     202                 :     1135775 :   unsigned int cycle_length = least_common_multiple (out_step, npatterns);
     203                 :             : 
     204                 :             :   /* Check the steps.  */
     205                 :     1135775 :   in_step = clamp (in_step);
     206                 :     1135775 :   out_base += out_step;
     207                 :     1135775 :   unsigned int limit = 0;
     208                 :     1912935 :   for (;;)
     209                 :             :     {
     210                 :             :       /* Succeed if we've checked all the elements in the vector.  */
     211                 :     1524355 :       if (known_ge (out_base, full_nelts))
     212                 :     1135775 :         return true;
     213                 :             : 
     214                 :     1246880 :       if (out_base >= npatterns)
     215                 :             :         {
     216                 :             :           /* We've got to the end of the "foreground" values.  Check
     217                 :             :              2 elements from each pattern in the "background" values.  */
     218                 :      770289 :           if (limit == 0)
     219                 :      746130 :             limit = out_base + cycle_length * 2;
     220                 :       24159 :           else if (out_base >= limit)
     221                 :             :             return true;
     222                 :             :         }
     223                 :             : 
     224                 :     1239886 :       element_type v0 = m_encoding.elt (out_base - out_step);
     225                 :     1239886 :       element_type v1 = m_encoding.elt (out_base);
     226                 :     1502790 :       if (maybe_ne (clamp (v1 - v0), in_step))
     227                 :             :         return false;
     228                 :             : 
     229                 :      388580 :       out_base += out_step;
     230                 :      388580 :     }
     231                 :             : }
     232                 :             : 
     233                 :             : /* Return true if all elements of the permutation vector are in the range
     234                 :             :    [START, START + SIZE).  */
     235                 :             : 
     236                 :             : bool
     237                 :     3220906 : vec_perm_indices::all_in_range_p (element_type start, element_type size) const
     238                 :             : {
     239                 :             :   /* Check the first two elements of each pattern.  */
     240                 :     3220906 :   unsigned int npatterns = m_encoding.npatterns ();
     241                 :     3220906 :   unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
     242                 :     3220906 :   unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
     243                 :    14222013 :   for (unsigned int i = 0; i < base_nelts; ++i)
     244                 :    23989890 :     if (!known_in_range_p (m_encoding[i], start, size))
     245                 :             :       return false;
     246                 :             : 
     247                 :             :   /* For stepped encodings, check the full range of the series.  */
     248                 :     1233230 :   if (nelts_per_pattern == 3)
     249                 :             :     {
     250                 :      371615 :       element_type limit = input_nelts ();
     251                 :             : 
     252                 :             :       /* The number of elements in each pattern beyond the first two
     253                 :             :          that we checked above.  */
     254                 :      371615 :       poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
     255                 :      371615 :                                          npatterns) - 2;
     256                 :      800044 :       for (unsigned int i = 0; i < npatterns; ++i)
     257                 :             :         {
     258                 :             :           /* BASE1 has been checked but BASE2 hasn't.   */
     259                 :      598163 :           element_type base1 = m_encoding[i + npatterns];
     260                 :      598163 :           element_type base2 = m_encoding[i + base_nelts];
     261                 :             : 
     262                 :             :           /* The step to add to get from BASE1 to each subsequent value.  */
     263                 :      598163 :           element_type step = clamp (base2 - base1);
     264                 :             : 
     265                 :             :           /* STEP has no inherent sign, so a value near LIMIT can
     266                 :             :              act as a negative step.  The series is in range if it
     267                 :             :              is in range according to one of the two interpretations.
     268                 :             : 
     269                 :             :              Since we're dealing with clamped values, ELEMENT_TYPE is
     270                 :             :              wide enough for overflow not to be a problem.  */
     271                 :      598163 :           element_type headroom_down = base1 - start;
     272                 :      598163 :           element_type headroom_up = size - headroom_down - 1;
     273                 :      598163 :           HOST_WIDE_INT diff;
     274                 :      598163 :           if ((!step.is_constant (&diff)
     275                 :      598163 :                || maybe_lt (headroom_up, diff * step_nelts))
     276                 :      169974 :               && (!(limit - step).is_constant (&diff)
     277                 :      169974 :                   || maybe_lt (headroom_down, diff * step_nelts)))
     278                 :     2157410 :             return false;
     279                 :             :         }
     280                 :             :     }
     281                 :             :   return true;
     282                 :             : }
     283                 :             : 
     284                 :             : /* Try to read the contents of VECTOR_CST CST as a constant permutation
     285                 :             :    vector.  Return true and add the elements to BUILDER on success,
     286                 :             :    otherwise return false without modifying BUILDER.  */
     287                 :             : 
     288                 :             : bool
     289                 :     1829185 : tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
     290                 :             : {
     291                 :     1829185 :   unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
     292                 :     9149115 :   for (unsigned int i = 0; i < encoded_nelts; ++i)
     293                 :     7319955 :     if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
     294                 :             :       return false;
     295                 :             : 
     296                 :     3658320 :   builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
     297                 :     1829160 :                        VECTOR_CST_NPATTERNS (cst),
     298                 :     1829160 :                        VECTOR_CST_NELTS_PER_PATTERN (cst));
     299                 :     9149090 :   for (unsigned int i = 0; i < encoded_nelts; ++i)
     300                 :     7319930 :     builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
     301                 :             :   return true;
     302                 :             : }
     303                 :             : 
     304                 :             : /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES.  */
     305                 :             : 
     306                 :             : tree
     307                 :     1196756 : vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
     308                 :             : {
     309                 :     1196756 :   gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
     310                 :     1196756 :   tree_vector_builder sel (type, indices.encoding ().npatterns (),
     311                 :     1196756 :                            indices.encoding ().nelts_per_pattern ());
     312                 :     1196756 :   unsigned int encoded_nelts = sel.encoded_nelts ();
     313                 :     8128666 :   for (unsigned int i = 0; i < encoded_nelts; i++)
     314                 :     6931910 :     sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
     315                 :     1196756 :   return sel.build ();
     316                 :     1196756 : }
     317                 :             : 
     318                 :             : /* Return a CONST_VECTOR of mode MODE that contains the elements of
     319                 :             :    INDICES.  */
     320                 :             : 
     321                 :             : rtx
     322                 :           0 : vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
     323                 :             : {
     324                 :           0 :   gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
     325                 :             :               && known_eq (GET_MODE_NUNITS (mode), indices.length ()));
     326                 :           0 :   rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
     327                 :           0 :                           indices.encoding ().nelts_per_pattern ());
     328                 :           0 :   unsigned int encoded_nelts = sel.encoded_nelts ();
     329                 :           0 :   for (unsigned int i = 0; i < encoded_nelts; i++)
     330                 :           0 :     sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
     331                 :           0 :   return sel.build ();
     332                 :           0 : }
     333                 :             : 
     334                 :             : #if CHECKING_P
     335                 :             : 
     336                 :             : namespace selftest {
     337                 :             : 
     338                 :             : /* Test a 12-element vector.  */
     339                 :             : 
     340                 :             : static void
     341                 :           4 : test_vec_perm_12 (void)
     342                 :             : {
     343                 :           4 :   vec_perm_builder builder (12, 12, 1);
     344                 :          20 :   for (unsigned int i = 0; i < 4; ++i)
     345                 :             :     {
     346                 :          16 :       builder.quick_push (i * 5);
     347                 :          16 :       builder.quick_push (3 + i);
     348                 :          16 :       builder.quick_push (2 + 3 * i);
     349                 :             :     }
     350                 :           4 :   vec_perm_indices indices (builder, 1, 12);
     351                 :           4 :   ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
     352                 :           4 :   ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
     353                 :           4 :   ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
     354                 :           4 :   ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
     355                 :           4 :   ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
     356                 :             : 
     357                 :           4 :   ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
     358                 :           4 :   ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
     359                 :             : 
     360                 :           4 :   ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
     361                 :           4 :   ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
     362                 :             : 
     363                 :           4 :   ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
     364                 :           4 :   ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
     365                 :             : 
     366                 :           4 :   ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
     367                 :           4 :   ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
     368                 :           4 :   ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
     369                 :           4 : }
     370                 :             : 
     371                 :             : /* Run selftests for this file.  */
     372                 :             : 
     373                 :             : void
     374                 :           4 : vec_perm_indices_cc_tests ()
     375                 :             : {
     376                 :           4 :   test_vec_perm_12 ();
     377                 :           4 : }
     378                 :             : 
     379                 :             : } // namespace selftest
     380                 :             : 
     381                 :             : #endif

Generated by: LCOV version 2.1-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.