Line data Source code
1 : /* A representation of vector permutation indices.
2 : Copyright (C) 2017-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "vec-perm-indices.h"
24 : #include "tree.h"
25 : #include "fold-const.h"
26 : #include "tree-vector-builder.h"
27 : #include "backend.h"
28 : #include "rtl.h"
29 : #include "memmodel.h"
30 : #include "emit-rtl.h"
31 : #include "selftest.h"
32 : #include "rtx-vector-builder.h"
33 :
34 : /* Switch to a new permutation vector that selects between NINPUTS vector
35 : inputs that have NELTS_PER_INPUT elements each. Take the elements of the
36 : new permutation vector from ELEMENTS, clamping each one to be in range. */
37 :
38 : void
39 3842280 : vec_perm_indices::new_vector (const vec_perm_builder &elements,
40 : unsigned int ninputs,
41 : poly_uint64 nelts_per_input)
42 : {
43 3842280 : m_ninputs = ninputs;
44 3842280 : m_nelts_per_input = nelts_per_input;
45 : /* If the vector has a constant number of elements, expand the
46 : encoding and clamp each element. E.g. { 0, 2, 4, ... } might
47 : wrap halfway if there is only one vector input, and we want
48 : the wrapped form to be the canonical one.
49 :
50 : If the vector has a variable number of elements, just copy
51 : the encoding. In that case the unwrapped form is canonical
52 : and there is no way of representing the wrapped form. */
53 3842280 : poly_uint64 full_nelts = elements.full_nelts ();
54 3842280 : unsigned HOST_WIDE_INT copy_nelts;
55 3842280 : if (full_nelts.is_constant (©_nelts))
56 3842280 : m_encoding.new_vector (full_nelts, copy_nelts, 1);
57 : else
58 : {
59 : copy_nelts = elements.encoded_nelts ();
60 : m_encoding.new_vector (full_nelts, elements.npatterns (),
61 : elements.nelts_per_pattern ());
62 : }
63 3842280 : unsigned int npatterns = m_encoding.npatterns ();
64 30853924 : for (unsigned int i = 0; i < npatterns; ++i)
65 27011883 : m_encoding.quick_push (clamp (elements.elt (i)));
66 : /* Use the fact that:
67 :
68 : (a + b) % c == ((a % c) + (b % c)) % c
69 :
70 : to simplify the clamping of variable-length vectors. */
71 3842280 : for (unsigned int i = npatterns; i < copy_nelts; ++i)
72 : {
73 0 : element_type step = clamp (elements.elt (i)
74 0 : - elements.elt (i - npatterns));
75 0 : m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
76 : }
77 3842280 : m_encoding.finalize ();
78 3842280 : }
79 :
80 : /* Switch to a new permutation vector that selects the same input elements
81 : as ORIG, but with each element split into FACTOR pieces. For example,
82 : if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
83 : { 2, 3, 4, 5, 0, 1, 6, 7 }. */
84 :
85 : void
86 677835 : vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
87 : unsigned int factor)
88 : {
89 677835 : m_ninputs = orig.m_ninputs;
90 677835 : m_nelts_per_input = orig.m_nelts_per_input * factor;
91 677835 : m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
92 677835 : orig.m_encoding.npatterns () * factor,
93 : orig.m_encoding.nelts_per_pattern ());
94 677835 : unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
95 2240356 : for (unsigned int i = 0; i < encoded_nelts; ++i)
96 : {
97 1562521 : element_type base = orig.m_encoding[i] * factor;
98 10392989 : for (unsigned int j = 0; j < factor; ++j)
99 8830468 : m_encoding.quick_push (base + j);
100 : }
101 677835 : m_encoding.finalize ();
102 677835 : }
103 :
104 : /* Check whether we can switch to a new permutation vector that
105 : selects the same input elements as ORIG, but with each element
106 : built up from FACTOR pieces. Return true if yes, otherwise
107 : return false. Every FACTOR permutation indexes should be
108 : continuous separately and the first one of each batch should
109 : be able to exactly modulo FACTOR. For example, if ORIG is
110 : { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation
111 : is { 1, 2, 0, 3 }. */
112 :
113 : bool
114 18 : vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig,
115 : unsigned int factor)
116 : {
117 18 : gcc_assert (factor > 0);
118 :
119 18 : if (maybe_lt (orig.m_nelts_per_input, factor))
120 : return false;
121 :
122 18 : poly_uint64 nelts;
123 : /* Invalid if vector units number isn't multiple of factor. */
124 36 : if (!multiple_p (orig.m_nelts_per_input, factor, &nelts))
125 : return false;
126 :
127 : /* Only handle the case that npatterns is multiple of factor.
128 : FIXME: Try to see whether we can reshape it by factor npatterns. */
129 18 : if (orig.m_encoding.npatterns () % factor != 0)
130 : return false;
131 :
132 11 : unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
133 11 : auto_vec<element_type, 32> encoding (encoded_nelts);
134 : /* Separate all encoded elements into batches by size factor,
135 : then ensure the first element of each batch is multiple of
136 : factor and all elements in each batch is consecutive from
137 : the first one. */
138 12 : for (unsigned int i = 0; i < encoded_nelts; i += factor)
139 : {
140 11 : element_type first = orig.m_encoding[i];
141 11 : element_type new_index;
142 11 : if (!multiple_p (first, factor, &new_index))
143 10 : return false;
144 25 : for (unsigned int j = 1; j < factor; ++j)
145 24 : if (maybe_ne (first + j, orig.m_encoding[i + j]))
146 10 : return false;
147 1 : encoding.quick_push (new_index);
148 : }
149 :
150 1 : m_ninputs = orig.m_ninputs;
151 1 : m_nelts_per_input = nelts;
152 1 : poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor);
153 1 : unsigned int npatterns = orig.m_encoding.npatterns () / factor;
154 :
155 1 : m_encoding.new_vector (full_nelts, npatterns,
156 : orig.m_encoding.nelts_per_pattern ());
157 1 : m_encoding.splice (encoding);
158 1 : m_encoding.finalize ();
159 :
160 1 : return true;
161 11 : }
162 :
163 : /* Rotate the inputs of the permutation right by DELTA inputs. This changes
164 : the values of the permutation vector but it doesn't change the way that
165 : the elements are encoded. */
166 :
167 : void
168 15072 : vec_perm_indices::rotate_inputs (int delta)
169 : {
170 15072 : element_type element_delta = delta * m_nelts_per_input;
171 77890 : for (unsigned int i = 0; i < m_encoding.length (); ++i)
172 62818 : m_encoding[i] = clamp (m_encoding[i] + element_delta);
173 15072 : }
174 :
175 : /* Return true if index OUT_BASE + I * OUT_STEP selects input
176 : element IN_BASE + I * IN_STEP. For example, the call to test
177 : whether a permute reverses a vector of N elements would be:
178 :
179 : series_p (0, 1, N - 1, -1)
180 :
181 : which would return true for { N - 1, N - 2, N - 3, ... }.
182 : The calls to test for an interleaving of elements starting
183 : at N1 and N2 would be:
184 :
185 : series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
186 :
187 : which would return true for { N1, N2, N1 + 1, N2 + 1, ... }. */
188 :
189 : bool
190 3789211 : vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
191 : element_type in_base, element_type in_step) const
192 : {
193 : /* Check the base value. */
194 3789211 : if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
195 : return false;
196 :
197 1261159 : element_type full_nelts = m_encoding.full_nelts ();
198 1261159 : unsigned int npatterns = m_encoding.npatterns ();
199 :
200 : /* Calculate which multiple of OUT_STEP elements we need to get
201 : back to the same pattern. */
202 1261159 : unsigned int cycle_length = least_common_multiple (out_step, npatterns);
203 :
204 : /* Check the steps. */
205 1261159 : in_step = clamp (in_step);
206 1261159 : out_base += out_step;
207 1261159 : unsigned int limit = 0;
208 2139619 : for (;;)
209 : {
210 : /* Succeed if we've checked all the elements in the vector. */
211 1700389 : if (known_ge (out_base, full_nelts))
212 1261159 : return true;
213 :
214 1376893 : if (out_base >= npatterns)
215 : {
216 : /* We've got to the end of the "foreground" values. Check
217 : 2 elements from each pattern in the "background" values. */
218 855471 : if (limit == 0)
219 830261 : limit = out_base + cycle_length * 2;
220 25210 : else if (out_base >= limit)
221 : return true;
222 : }
223 :
224 1369619 : element_type v0 = m_encoding.elt (out_base - out_step);
225 1369619 : element_type v1 = m_encoding.elt (out_base);
226 1701961 : if (maybe_ne (clamp (v1 - v0), in_step))
227 : return false;
228 :
229 439230 : out_base += out_step;
230 439230 : }
231 : }
232 :
233 : /* Return true if all elements of the permutation vector are in the range
234 : [START, START + SIZE). */
235 :
236 : bool
237 3329171 : vec_perm_indices::all_in_range_p (element_type start, element_type size) const
238 : {
239 : /* Check the first two elements of each pattern. */
240 3329171 : unsigned int npatterns = m_encoding.npatterns ();
241 3329171 : unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
242 3329171 : unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
243 14769206 : for (unsigned int i = 0; i < base_nelts; ++i)
244 24942346 : if (!known_in_range_p (m_encoding[i], start, size))
245 : return false;
246 :
247 : /* For stepped encodings, check the full range of the series. */
248 1266895 : if (nelts_per_pattern == 3)
249 : {
250 376207 : element_type limit = input_nelts ();
251 :
252 : /* The number of elements in each pattern beyond the first two
253 : that we checked above. */
254 376207 : poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
255 376207 : npatterns) - 2;
256 798611 : for (unsigned int i = 0; i < npatterns; ++i)
257 : {
258 : /* BASE1 has been checked but BASE2 hasn't. */
259 604140 : element_type base1 = m_encoding[i + npatterns];
260 604140 : element_type base2 = m_encoding[i + base_nelts];
261 :
262 : /* The step to add to get from BASE1 to each subsequent value. */
263 604140 : element_type step = clamp (base2 - base1);
264 :
265 : /* STEP has no inherent sign, so a value near LIMIT can
266 : act as a negative step. The series is in range if it
267 : is in range according to one of the two interpretations.
268 :
269 : Since we're dealing with clamped values, ELEMENT_TYPE is
270 : wide enough for overflow not to be a problem. */
271 604140 : element_type headroom_down = base1 - start;
272 604140 : element_type headroom_up = size - headroom_down - 1;
273 604140 : HOST_WIDE_INT diff;
274 604140 : if ((!step.is_constant (&diff)
275 604140 : || maybe_lt (headroom_up, diff * step_nelts))
276 181960 : && (!(limit - step).is_constant (&diff)
277 181960 : || maybe_lt (headroom_down, diff * step_nelts)))
278 2244012 : return false;
279 : }
280 : }
281 : return true;
282 : }
283 :
284 : /* Try to read the contents of VECTOR_CST CST as a constant permutation
285 : vector. Return true and add the elements to BUILDER on success,
286 : otherwise return false without modifying BUILDER. */
287 :
288 : bool
289 1933929 : tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
290 : {
291 1933929 : unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
292 9556402 : for (unsigned int i = 0; i < encoded_nelts; ++i)
293 7622498 : if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
294 : return false;
295 :
296 3867808 : builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
297 1933904 : VECTOR_CST_NPATTERNS (cst),
298 1933904 : VECTOR_CST_NELTS_PER_PATTERN (cst));
299 9556377 : for (unsigned int i = 0; i < encoded_nelts; ++i)
300 7622473 : builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
301 : return true;
302 : }
303 :
304 : /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. */
305 :
306 : tree
307 1232149 : vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
308 : {
309 1232149 : gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
310 1232149 : tree_vector_builder sel (type, indices.encoding ().npatterns (),
311 1232149 : indices.encoding ().nelts_per_pattern ());
312 1232149 : unsigned int encoded_nelts = sel.encoded_nelts ();
313 8361102 : for (unsigned int i = 0; i < encoded_nelts; i++)
314 7128953 : sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
315 1232149 : return sel.build ();
316 1232149 : }
317 :
318 : /* Return a CONST_VECTOR of mode MODE that contains the elements of
319 : INDICES. */
320 :
321 : rtx
322 0 : vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
323 : {
324 0 : gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
325 : && known_eq (GET_MODE_NUNITS (mode), indices.length ()));
326 0 : rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
327 0 : indices.encoding ().nelts_per_pattern ());
328 0 : unsigned int encoded_nelts = sel.encoded_nelts ();
329 0 : for (unsigned int i = 0; i < encoded_nelts; i++)
330 0 : sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
331 0 : return sel.build ();
332 0 : }
333 :
334 : #if CHECKING_P
335 :
336 : namespace selftest {
337 :
338 : /* Test a 12-element vector. */
339 :
340 : static void
341 4 : test_vec_perm_12 (void)
342 : {
343 4 : vec_perm_builder builder (12, 12, 1);
344 20 : for (unsigned int i = 0; i < 4; ++i)
345 : {
346 16 : builder.quick_push (i * 5);
347 16 : builder.quick_push (3 + i);
348 16 : builder.quick_push (2 + 3 * i);
349 : }
350 4 : vec_perm_indices indices (builder, 1, 12);
351 4 : ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
352 4 : ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
353 4 : ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
354 4 : ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
355 4 : ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
356 :
357 4 : ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
358 4 : ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
359 :
360 4 : ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
361 4 : ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
362 :
363 4 : ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
364 4 : ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
365 :
366 4 : ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
367 4 : ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
368 4 : ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
369 4 : }
370 :
371 : /* Run selftests for this file. */
372 :
373 : void
374 4 : vec_perm_indices_cc_tests ()
375 : {
376 4 : test_vec_perm_12 ();
377 4 : }
378 :
379 : } // namespace selftest
380 :
381 : #endif
|