Branch data Line data Source code
1 : : /* A representation of vector permutation indices.
2 : : Copyright (C) 2017-2024 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it under
7 : : the terms of the GNU General Public License as published by the Free
8 : : Software Foundation; either version 3, or (at your option) any later
9 : : version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "vec-perm-indices.h"
24 : : #include "tree.h"
25 : : #include "fold-const.h"
26 : : #include "tree-vector-builder.h"
27 : : #include "backend.h"
28 : : #include "rtl.h"
29 : : #include "memmodel.h"
30 : : #include "emit-rtl.h"
31 : : #include "selftest.h"
32 : : #include "rtx-vector-builder.h"
33 : :
34 : : /* Switch to a new permutation vector that selects between NINPUTS vector
35 : : inputs that have NELTS_PER_INPUT elements each. Take the elements of the
36 : : new permutation vector from ELEMENTS, clamping each one to be in range. */
37 : :
38 : : void
39 : 3597611 : vec_perm_indices::new_vector (const vec_perm_builder &elements,
40 : : unsigned int ninputs,
41 : : poly_uint64 nelts_per_input)
42 : : {
43 : 3597611 : m_ninputs = ninputs;
44 : 3597611 : m_nelts_per_input = nelts_per_input;
45 : : /* If the vector has a constant number of elements, expand the
46 : : encoding and clamp each element. E.g. { 0, 2, 4, ... } might
47 : : wrap halfway if there is only one vector input, and we want
48 : : the wrapped form to be the canonical one.
49 : :
50 : : If the vector has a variable number of elements, just copy
51 : : the encoding. In that case the unwrapped form is canonical
52 : : and there is no way of representing the wrapped form. */
53 : 3597611 : poly_uint64 full_nelts = elements.full_nelts ();
54 : 3597611 : unsigned HOST_WIDE_INT copy_nelts;
55 : 3597611 : if (full_nelts.is_constant (©_nelts))
56 : 3597611 : m_encoding.new_vector (full_nelts, copy_nelts, 1);
57 : : else
58 : : {
59 : : copy_nelts = elements.encoded_nelts ();
60 : : m_encoding.new_vector (full_nelts, elements.npatterns (),
61 : : elements.nelts_per_pattern ());
62 : : }
63 : 3597611 : unsigned int npatterns = m_encoding.npatterns ();
64 : 29278732 : for (unsigned int i = 0; i < npatterns; ++i)
65 : 25681360 : m_encoding.quick_push (clamp (elements.elt (i)));
66 : : /* Use the fact that:
67 : :
68 : : (a + b) % c == ((a % c) + (b % c)) % c
69 : :
70 : : to simplify the clamping of variable-length vectors. */
71 : 3597611 : for (unsigned int i = npatterns; i < copy_nelts; ++i)
72 : : {
73 : 0 : element_type step = clamp (elements.elt (i)
74 : 0 : - elements.elt (i - npatterns));
75 : 0 : m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
76 : : }
77 : 3597611 : m_encoding.finalize ();
78 : 3597611 : }
79 : :
80 : : /* Switch to a new permutation vector that selects the same input elements
81 : : as ORIG, but with each element split into FACTOR pieces. For example,
82 : : if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
83 : : { 2, 3, 4, 5, 0, 1, 6, 7 }. */
84 : :
85 : : void
86 : 612773 : vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
87 : : unsigned int factor)
88 : : {
89 : 612773 : m_ninputs = orig.m_ninputs;
90 : 612773 : m_nelts_per_input = orig.m_nelts_per_input * factor;
91 : 612773 : m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
92 : 612773 : orig.m_encoding.npatterns () * factor,
93 : : orig.m_encoding.nelts_per_pattern ());
94 : 612773 : unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
95 : 2037262 : for (unsigned int i = 0; i < encoded_nelts; ++i)
96 : : {
97 : 1424489 : element_type base = orig.m_encoding[i] * factor;
98 : 9350569 : for (unsigned int j = 0; j < factor; ++j)
99 : 7926080 : m_encoding.quick_push (base + j);
100 : : }
101 : 612773 : m_encoding.finalize ();
102 : 612773 : }
103 : :
104 : : /* Check whether we can switch to a new permutation vector that
105 : : selects the same input elements as ORIG, but with each element
106 : : built up from FACTOR pieces. Return true if yes, otherwise
107 : : return false. Every FACTOR permutation indexes should be
108 : : continuous separately and the first one of each batch should
109 : : be able to exactly modulo FACTOR. For example, if ORIG is
110 : : { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation
111 : : is { 1, 2, 0, 3 }. */
112 : :
113 : : bool
114 : 15 : vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig,
115 : : unsigned int factor)
116 : : {
117 : 15 : gcc_assert (factor > 0);
118 : :
119 : 15 : if (maybe_lt (orig.m_nelts_per_input, factor))
120 : : return false;
121 : :
122 : 15 : poly_uint64 nelts;
123 : : /* Invalid if vector units number isn't multiple of factor. */
124 : 30 : if (!multiple_p (orig.m_nelts_per_input, factor, &nelts))
125 : : return false;
126 : :
127 : : /* Only handle the case that npatterns is multiple of factor.
128 : : FIXME: Try to see whether we can reshape it by factor npatterns. */
129 : 15 : if (orig.m_encoding.npatterns () % factor != 0)
130 : : return false;
131 : :
132 : 11 : unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
133 : 11 : auto_vec<element_type, 32> encoding (encoded_nelts);
134 : : /* Separate all encoded elements into batches by size factor,
135 : : then ensure the first element of each batch is multiple of
136 : : factor and all elements in each batch is consecutive from
137 : : the first one. */
138 : 12 : for (unsigned int i = 0; i < encoded_nelts; i += factor)
139 : : {
140 : 11 : element_type first = orig.m_encoding[i];
141 : 11 : element_type new_index;
142 : 11 : if (!multiple_p (first, factor, &new_index))
143 : 10 : return false;
144 : 25 : for (unsigned int j = 1; j < factor; ++j)
145 : 24 : if (maybe_ne (first + j, orig.m_encoding[i + j]))
146 : 10 : return false;
147 : 1 : encoding.quick_push (new_index);
148 : : }
149 : :
150 : 1 : m_ninputs = orig.m_ninputs;
151 : 1 : m_nelts_per_input = nelts;
152 : 1 : poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor);
153 : 1 : unsigned int npatterns = orig.m_encoding.npatterns () / factor;
154 : :
155 : 1 : m_encoding.new_vector (full_nelts, npatterns,
156 : : orig.m_encoding.nelts_per_pattern ());
157 : 1 : m_encoding.splice (encoding);
158 : 1 : m_encoding.finalize ();
159 : :
160 : 1 : return true;
161 : 11 : }
162 : :
163 : : /* Rotate the inputs of the permutation right by DELTA inputs. This changes
164 : : the values of the permutation vector but it doesn't change the way that
165 : : the elements are encoded. */
166 : :
167 : : void
168 : 15112 : vec_perm_indices::rotate_inputs (int delta)
169 : : {
170 : 15112 : element_type element_delta = delta * m_nelts_per_input;
171 : 77696 : for (unsigned int i = 0; i < m_encoding.length (); ++i)
172 : 62584 : m_encoding[i] = clamp (m_encoding[i] + element_delta);
173 : 15112 : }
174 : :
175 : : /* Return true if index OUT_BASE + I * OUT_STEP selects input
176 : : element IN_BASE + I * IN_STEP. For example, the call to test
177 : : whether a permute reverses a vector of N elements would be:
178 : :
179 : : series_p (0, 1, N - 1, -1)
180 : :
181 : : which would return true for { N - 1, N - 2, N - 3, ... }.
182 : : The calls to test for an interleaving of elements starting
183 : : at N1 and N2 would be:
184 : :
185 : : series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
186 : :
187 : : which would return true for { N1, N2, N1 + 1, N2 + 1, ... }. */
188 : :
189 : : bool
190 : 3449905 : vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
191 : : element_type in_base, element_type in_step) const
192 : : {
193 : : /* Check the base value. */
194 : 3449905 : if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
195 : : return false;
196 : :
197 : 1110403 : element_type full_nelts = m_encoding.full_nelts ();
198 : 1110403 : unsigned int npatterns = m_encoding.npatterns ();
199 : :
200 : : /* Calculate which multiple of OUT_STEP elements we need to get
201 : : back to the same pattern. */
202 : 1110403 : unsigned int cycle_length = least_common_multiple (out_step, npatterns);
203 : :
204 : : /* Check the steps. */
205 : 1110403 : in_step = clamp (in_step);
206 : 1110403 : out_base += out_step;
207 : 1110403 : unsigned int limit = 0;
208 : 1815237 : for (;;)
209 : : {
210 : : /* Succeed if we've checked all the elements in the vector. */
211 : 1462820 : if (known_ge (out_base, full_nelts))
212 : 1110403 : return true;
213 : :
214 : 1159413 : if (out_base >= npatterns)
215 : : {
216 : : /* We've got to the end of the "foreground" values. Check
217 : : 2 elements from each pattern in the "background" values. */
218 : 681454 : if (limit == 0)
219 : 656384 : limit = out_base + cycle_length * 2;
220 : 25070 : else if (out_base >= limit)
221 : : return true;
222 : : }
223 : :
224 : 1151228 : element_type v0 = m_encoding.elt (out_base - out_step);
225 : 1151228 : element_type v1 = m_encoding.elt (out_base);
226 : 1378133 : if (maybe_ne (clamp (v1 - v0), in_step))
227 : : return false;
228 : :
229 : 352417 : out_base += out_step;
230 : 352417 : }
231 : : }
232 : :
233 : : /* Return true if all elements of the permutation vector are in the range
234 : : [START, START + SIZE). */
235 : :
236 : : bool
237 : 3050559 : vec_perm_indices::all_in_range_p (element_type start, element_type size) const
238 : : {
239 : : /* Check the first two elements of each pattern. */
240 : 3050559 : unsigned int npatterns = m_encoding.npatterns ();
241 : 3050559 : unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
242 : 3050559 : unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
243 : 13323889 : for (unsigned int i = 0; i < base_nelts; ++i)
244 : 22441624 : if (!known_in_range_p (m_encoding[i], start, size))
245 : : return false;
246 : :
247 : : /* For stepped encodings, check the full range of the series. */
248 : 1155595 : if (nelts_per_pattern == 3)
249 : : {
250 : 379154 : element_type limit = input_nelts ();
251 : :
252 : : /* The number of elements in each pattern beyond the first two
253 : : that we checked above. */
254 : 379154 : poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
255 : 379154 : npatterns) - 2;
256 : 833189 : for (unsigned int i = 0; i < npatterns; ++i)
257 : : {
258 : : /* BASE1 has been checked but BASE2 hasn't. */
259 : 620003 : element_type base1 = m_encoding[i + npatterns];
260 : 620003 : element_type base2 = m_encoding[i + base_nelts];
261 : :
262 : : /* The step to add to get from BASE1 to each subsequent value. */
263 : 620003 : element_type step = clamp (base2 - base1);
264 : :
265 : : /* STEP has no inherent sign, so a value near LIMIT can
266 : : act as a negative step. The series is in range if it
267 : : is in range according to one of the two interpretations.
268 : :
269 : : Since we're dealing with clamped values, ELEMENT_TYPE is
270 : : wide enough for overflow not to be a problem. */
271 : 620003 : element_type headroom_down = base1 - start;
272 : 620003 : element_type headroom_up = size - headroom_down - 1;
273 : 620003 : HOST_WIDE_INT diff;
274 : 620003 : if ((!step.is_constant (&diff)
275 : 620003 : || maybe_lt (headroom_up, diff * step_nelts))
276 : 166208 : && (!(limit - step).is_constant (&diff)
277 : 166208 : || maybe_lt (headroom_down, diff * step_nelts)))
278 : 2060932 : return false;
279 : : }
280 : : }
281 : : return true;
282 : : }
283 : :
284 : : /* Try to read the contents of VECTOR_CST CST as a constant permutation
285 : : vector. Return true and add the elements to BUILDER on success,
286 : : otherwise return false without modifying BUILDER. */
287 : :
288 : : bool
289 : 1747232 : tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
290 : : {
291 : 1747232 : unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
292 : 8865315 : for (unsigned int i = 0; i < encoded_nelts; ++i)
293 : 7118108 : if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
294 : : return false;
295 : :
296 : 3494414 : builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
297 : 1747207 : VECTOR_CST_NPATTERNS (cst),
298 : 1747207 : VECTOR_CST_NELTS_PER_PATTERN (cst));
299 : 8865290 : for (unsigned int i = 0; i < encoded_nelts; ++i)
300 : 7118083 : builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
301 : : return true;
302 : : }
303 : :
304 : : /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. */
305 : :
306 : : tree
307 : 1191725 : vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
308 : : {
309 : 1191725 : gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
310 : 1191725 : tree_vector_builder sel (type, indices.encoding ().npatterns (),
311 : 1191725 : indices.encoding ().nelts_per_pattern ());
312 : 1191725 : unsigned int encoded_nelts = sel.encoded_nelts ();
313 : 8116301 : for (unsigned int i = 0; i < encoded_nelts; i++)
314 : 6924576 : sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
315 : 1191725 : return sel.build ();
316 : 1191725 : }
317 : :
318 : : /* Return a CONST_VECTOR of mode MODE that contains the elements of
319 : : INDICES. */
320 : :
321 : : rtx
322 : 0 : vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
323 : : {
324 : 0 : gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
325 : : && known_eq (GET_MODE_NUNITS (mode), indices.length ()));
326 : 0 : rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
327 : 0 : indices.encoding ().nelts_per_pattern ());
328 : 0 : unsigned int encoded_nelts = sel.encoded_nelts ();
329 : 0 : for (unsigned int i = 0; i < encoded_nelts; i++)
330 : 0 : sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
331 : 0 : return sel.build ();
332 : 0 : }
333 : :
334 : : #if CHECKING_P
335 : :
336 : : namespace selftest {
337 : :
338 : : /* Test a 12-element vector. */
339 : :
340 : : static void
341 : 4 : test_vec_perm_12 (void)
342 : : {
343 : 4 : vec_perm_builder builder (12, 12, 1);
344 : 20 : for (unsigned int i = 0; i < 4; ++i)
345 : : {
346 : 16 : builder.quick_push (i * 5);
347 : 16 : builder.quick_push (3 + i);
348 : 16 : builder.quick_push (2 + 3 * i);
349 : : }
350 : 4 : vec_perm_indices indices (builder, 1, 12);
351 : 4 : ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
352 : 4 : ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
353 : 4 : ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
354 : 4 : ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
355 : 4 : ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
356 : :
357 : 4 : ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
358 : 4 : ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
359 : :
360 : 4 : ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
361 : 4 : ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
362 : :
363 : 4 : ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
364 : 4 : ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
365 : :
366 : 4 : ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
367 : 4 : ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
368 : 4 : ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
369 : 4 : }
370 : :
371 : : /* Run selftests for this file. */
372 : :
373 : : void
374 : 4 : vec_perm_indices_cc_tests ()
375 : : {
376 : 4 : test_vec_perm_12 ();
377 : 4 : }
378 : :
379 : : } // namespace selftest
380 : :
381 : : #endif
|