1// SPDX-License-Identifier: Apache-2.0
2// ----------------------------------------------------------------------------
3// Copyright 2020-2024 Arm Limited
4//
5// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6// use this file except in compliance with the License. You may obtain a copy
7// of the License at:
8//
9//     http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14// License for the specific language governing permissions and limitations
15// under the License.
16// ----------------------------------------------------------------------------
17
18/**
19 * @brief Unit tests for the vectorized SIMD functionality.
20 */
21
22#include <limits>
23
24#include "gtest/gtest.h"
25
26#include "../astcenc_internal.h"
27#include "../astcenc_vecmathlib.h"
28
29namespace astcenc
30{
31
32// Misc utility tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
33
34static unsigned int round_down(unsigned int x)
35{
36	unsigned int remainder = x % ASTCENC_SIMD_WIDTH;
37	return x - remainder;
38}
39
40static unsigned int round_up(unsigned int x)
41{
42	unsigned int remainder = x % ASTCENC_SIMD_WIDTH;
43	if (!remainder)
44	{
45		return x;
46	}
47
48	return x - remainder + ASTCENC_SIMD_WIDTH;
49}
50
51/** @brief Test VLA loop limit round down. */
52TEST(misc, RoundDownVLA)
53{
54	// Static ones which are valid for all VLA widths
55	EXPECT_EQ(round_down_to_simd_multiple_vla(0),  0u);
56	EXPECT_EQ(round_down_to_simd_multiple_vla(8),  8u);
57	EXPECT_EQ(round_down_to_simd_multiple_vla(16), 16u);
58
59	// Variable ones which depend on VLA width
60	EXPECT_EQ(round_down_to_simd_multiple_vla(3),   round_down(3));
61	EXPECT_EQ(round_down_to_simd_multiple_vla(5),   round_down(5));
62	EXPECT_EQ(round_down_to_simd_multiple_vla(7),   round_down(7));
63	EXPECT_EQ(round_down_to_simd_multiple_vla(231), round_down(231));
64}
65
66/** @brief Test VLA loop limit round up. */
67TEST(misc, RoundUpVLA)
68{
69	// Static ones which are valid for all VLA widths
70	EXPECT_EQ(round_up_to_simd_multiple_vla(0),  0u);
71	EXPECT_EQ(round_up_to_simd_multiple_vla(8),  8u);
72	EXPECT_EQ(round_up_to_simd_multiple_vla(16), 16u);
73
74	// Variable ones which depend on VLA width
75	EXPECT_EQ(round_up_to_simd_multiple_vla(3),   round_up(3));
76	EXPECT_EQ(round_up_to_simd_multiple_vla(5),   round_up(5));
77	EXPECT_EQ(round_up_to_simd_multiple_vla(7),   round_up(7));
78	EXPECT_EQ(round_up_to_simd_multiple_vla(231), round_up(231));
79}
80
81#if ASTCENC_SIMD_WIDTH == 1
82
83// VLA (1-wide) tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
84
85/** @brief Test VLA change_sign. */
86TEST(vfloat, ChangeSign)
87{
88	vfloat a0(-1.0f);
89	vfloat b0(-1.0f);
90	vfloat r0 = change_sign(a0, b0);
91	EXPECT_EQ(r0.lane<0>(), 1.0f);
92
93	vfloat a1( 1.0f);
94	vfloat b1(-1.0f);
95	vfloat r1 = change_sign(a1, b1);
96	EXPECT_EQ(r1.lane<0>(), -1.0f);
97
98	vfloat a2(-3.12f);
99	vfloat b2( 3.12f);
100	vfloat r2 = change_sign(a2, b2);
101	EXPECT_EQ(r2.lane<0>(), -3.12f);
102
103	vfloat a3( 3.12f);
104	vfloat b3( 3.12f);
105	vfloat r3 = change_sign(a3, b3);
106	EXPECT_EQ(r3.lane<0>(), 3.12f);
107}
108
109/** @brief Test VLA atan. */
110TEST(vfloat, Atan)
111{
112	vfloat a0(-0.15f);
113	vfloat r0 = atan(a0);
114	EXPECT_NEAR(r0.lane<0>(), -0.149061f, 0.005f);
115
116	vfloat a1(0.0f);
117	vfloat r1 = atan(a1);
118	EXPECT_NEAR(r1.lane<0>(),  0.000000f, 0.005f);
119
120	vfloat a2(0.9f);
121	vfloat r2 = atan(a2);
122	EXPECT_NEAR(r2.lane<0>(),  0.733616f, 0.005f);
123
124	vfloat a3(2.1f);
125	vfloat r3 = atan(a3);
126	EXPECT_NEAR(r3.lane<0>(),  1.123040f, 0.005f);
127}
128
129/** @brief Test VLA atan2. */
130TEST(vfloat, Atan2)
131{
132	vfloat a0(-0.15f);
133	vfloat b0( 1.15f);
134	vfloat r0 = atan2(a0, b0);
135	EXPECT_NEAR(r0.lane<0>(), -0.129816f, 0.005f);
136
137	vfloat a1( 0.0f);
138	vfloat b1(-3.0f);
139	vfloat r1 = atan2(a1, b1);
140	EXPECT_NEAR(r1.lane<0>(),  3.141592f, 0.005f);
141
142	vfloat a2( 0.9f);
143	vfloat b2(-0.9f);
144	vfloat r2 = atan2(a2, b2);
145	EXPECT_NEAR(r2.lane<0>(),  2.360342f, 0.005f);
146
147	vfloat a3( 2.1f);
148	vfloat b3( 1.1f);
149	vfloat r3 = atan2(a3, b3);
150	EXPECT_NEAR(r3.lane<0>(),  1.084357f, 0.005f);
151}
152
153#elif ASTCENC_SIMD_WIDTH == 4
154
155// VLA (4-wide) tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
156
157/** @brief Test VLA change_sign. */
158TEST(vfloat, ChangeSign)
159{
160	vfloat a(-1.0f,  1.0f, -3.12f, 3.12f);
161	vfloat b(-1.0f, -1.0f,  3.12f, 3.12f);
162	vfloat r = change_sign(a, b);
163	EXPECT_EQ(r.lane<0>(),  1.0f);
164	EXPECT_EQ(r.lane<1>(), -1.0f);
165	EXPECT_EQ(r.lane<2>(), -3.12f);
166	EXPECT_EQ(r.lane<3>(),  3.12f);
167}
168
169/** @brief Test VLA atan. */
170TEST(vfloat, Atan)
171{
172	vfloat a(-0.15f, 0.0f, 0.9f, 2.1f);
173	vfloat r = atan(a);
174	EXPECT_NEAR(r.lane<0>(), -0.149061f, 0.005f);
175	EXPECT_NEAR(r.lane<1>(),  0.000000f, 0.005f);
176	EXPECT_NEAR(r.lane<2>(),  0.733616f, 0.005f);
177	EXPECT_NEAR(r.lane<3>(),  1.123040f, 0.005f);
178}
179
180/** @brief Test VLA atan2. */
181TEST(vfloat, Atan2)
182{
183	vfloat a(-0.15f, 0.0f, 0.9f, 2.1f);
184	vfloat b(1.15f, -3.0f, -0.9f, 1.1f);
185	vfloat r = atan2(a, b);
186	EXPECT_NEAR(r.lane<0>(), -0.129816f, 0.005f);
187	EXPECT_NEAR(r.lane<1>(),  3.141592f, 0.005f);
188	EXPECT_NEAR(r.lane<2>(),  2.360342f, 0.005f);
189	EXPECT_NEAR(r.lane<3>(),  1.084357f, 0.005f);
190}
191
192#elif ASTCENC_SIMD_WIDTH == 8
193
194// VLA (8-wide) tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
195
196/** @brief Test VLA change_sign. */
197TEST(vfloat, ChangeSign)
198{
199	vfloat a(-1.0f,  1.0f, -3.12f, 3.12f, -1.0f,  1.0f, -3.12f, 3.12f);
200	vfloat b(-1.0f, -1.0f,  3.12f, 3.12f, -1.0f, -1.0f,  3.12f, 3.12f);
201	vfloat r = change_sign(a, b);
202	EXPECT_EQ(r.lane<0>(),  1.0f);
203	EXPECT_EQ(r.lane<1>(), -1.0f);
204	EXPECT_EQ(r.lane<2>(), -3.12f);
205	EXPECT_EQ(r.lane<3>(),  3.12f);
206	EXPECT_EQ(r.lane<4>(),  1.0f);
207	EXPECT_EQ(r.lane<5>(), -1.0f);
208	EXPECT_EQ(r.lane<6>(), -3.12f);
209	EXPECT_EQ(r.lane<7>(),  3.12f);
210}
211
212/** @brief Test VLA atan. */
213TEST(vfloat, Atan)
214{
215	vfloat a(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f);
216	vfloat r = atan(a);
217	EXPECT_NEAR(r.lane<0>(), -0.149061f, 0.005f);
218	EXPECT_NEAR(r.lane<1>(),  0.000000f, 0.005f);
219	EXPECT_NEAR(r.lane<2>(),  0.733616f, 0.005f);
220	EXPECT_NEAR(r.lane<3>(),  1.123040f, 0.005f);
221	EXPECT_NEAR(r.lane<4>(), -0.149061f, 0.005f);
222	EXPECT_NEAR(r.lane<5>(),  0.000000f, 0.005f);
223	EXPECT_NEAR(r.lane<6>(),  0.733616f, 0.005f);
224	EXPECT_NEAR(r.lane<7>(),  1.123040f, 0.005f);
225}
226
227/** @brief Test VLA atan2. */
228TEST(vfloat, Atan2)
229{
230	vfloat a(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f);
231	vfloat b(1.15f, -3.0f, -0.9f, 1.1f, 1.15f, -3.0f, -0.9f, 1.1f);
232	vfloat r = atan2(a, b);
233	EXPECT_NEAR(r.lane<0>(), -0.129816f, 0.005f);
234	EXPECT_NEAR(r.lane<1>(),  3.141592f, 0.005f);
235	EXPECT_NEAR(r.lane<2>(),  2.360342f, 0.005f);
236	EXPECT_NEAR(r.lane<3>(),  1.084357f, 0.005f);
237	EXPECT_NEAR(r.lane<4>(), -0.129816f, 0.005f);
238	EXPECT_NEAR(r.lane<5>(),  3.141592f, 0.005f);
239	EXPECT_NEAR(r.lane<6>(),  2.360342f, 0.005f);
240	EXPECT_NEAR(r.lane<7>(),  1.084357f, 0.005f);
241}
242
243#endif
244
245static const float qnan = std::numeric_limits<float>::quiet_NaN();
246
247alignas(32) static const float f32_data[9] {
248	0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f
249};
250
251alignas(32) static const int s32_data[9] {
252	0, 1, 2, 3, 4, 5 , 6, 7, 8
253};
254
255alignas(32) static const uint8_t u8_data[9] {
256	0, 1, 2, 3, 4, 5 , 6, 7, 8
257};
258
259// VFLOAT4 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
260
261/** @brief Test unaligned vfloat4 data load. */
262TEST(vfloat4, UnalignedLoad)
263{
264	vfloat4 a(&(f32_data[1]));
265	EXPECT_EQ(a.lane<0>(), 1.0f);
266	EXPECT_EQ(a.lane<1>(), 2.0f);
267	EXPECT_EQ(a.lane<2>(), 3.0f);
268	EXPECT_EQ(a.lane<3>(), 4.0f);
269}
270
271/** @brief Test scalar duplicated vfloat4 load. */
272TEST(vfloat4, ScalarDupLoad)
273{
274	vfloat4 a(1.1f);
275	EXPECT_EQ(a.lane<0>(), 1.1f);
276	EXPECT_EQ(a.lane<1>(), 1.1f);
277	EXPECT_EQ(a.lane<2>(), 1.1f);
278	EXPECT_EQ(a.lane<3>(), 1.1f);
279}
280
281/** @brief Test scalar vfloat4 load. */
282TEST(vfloat4, ScalarLoad)
283{
284	vfloat4 a(1.1f, 2.2f, 3.3f, 4.4f);
285	EXPECT_EQ(a.lane<0>(), 1.1f);
286	EXPECT_EQ(a.lane<1>(), 2.2f);
287	EXPECT_EQ(a.lane<2>(), 3.3f);
288	EXPECT_EQ(a.lane<3>(), 4.4f);
289}
290
291/** @brief Test copy vfloat4 load. */
292TEST(vfloat4, CopyLoad)
293{
294	vfloat4 s(1.1f, 2.2f, 3.3f, 4.4f);
295	vfloat4 a(s.m);
296	EXPECT_EQ(a.lane<0>(), 1.1f);
297	EXPECT_EQ(a.lane<1>(), 2.2f);
298	EXPECT_EQ(a.lane<2>(), 3.3f);
299	EXPECT_EQ(a.lane<3>(), 4.4f);
300}
301
302/** @brief Test vfloat4 scalar lane set. */
303TEST(vfloat4, SetLane)
304{
305	vfloat4 a(0.0f);
306
307	a.set_lane<0>(1.0f);
308	EXPECT_EQ(a.lane<0>(), 1.0f);
309	EXPECT_EQ(a.lane<1>(), 0.0f);
310	EXPECT_EQ(a.lane<2>(), 0.0f);
311	EXPECT_EQ(a.lane<3>(), 0.0f);
312
313	a.set_lane<1>(2.0f);
314	EXPECT_EQ(a.lane<0>(), 1.0f);
315	EXPECT_EQ(a.lane<1>(), 2.0f);
316	EXPECT_EQ(a.lane<2>(), 0.0f);
317	EXPECT_EQ(a.lane<3>(), 0.0f);
318
319	a.set_lane<2>(3.0f);
320	EXPECT_EQ(a.lane<0>(), 1.0f);
321	EXPECT_EQ(a.lane<1>(), 2.0f);
322	EXPECT_EQ(a.lane<2>(), 3.0f);
323	EXPECT_EQ(a.lane<3>(), 0.0f);
324
325	a.set_lane<3>(4.0f);
326	EXPECT_EQ(a.lane<0>(), 1.0f);
327	EXPECT_EQ(a.lane<1>(), 2.0f);
328	EXPECT_EQ(a.lane<2>(), 3.0f);
329	EXPECT_EQ(a.lane<3>(), 4.0f);
330}
331
332/** @brief Test vfloat4 zero. */
333TEST(vfloat4, Zero)
334{
335	vfloat4 a = vfloat4::zero();
336	EXPECT_EQ(a.lane<0>(), 0.0f);
337	EXPECT_EQ(a.lane<1>(), 0.0f);
338	EXPECT_EQ(a.lane<2>(), 0.0f);
339	EXPECT_EQ(a.lane<3>(), 0.0f);
340}
341
342/** @brief Test vfloat4 load1. */
343TEST(vfloat4, Load1)
344{
345	float s = 3.14f;
346	vfloat4 a = vfloat4::load1(&s);
347	EXPECT_EQ(a.lane<0>(), 3.14f);
348	EXPECT_EQ(a.lane<1>(), 3.14f);
349	EXPECT_EQ(a.lane<2>(), 3.14f);
350	EXPECT_EQ(a.lane<3>(), 3.14f);
351}
352
353/** @brief Test vfloat4 loada. */
354TEST(vfloat4, Loada)
355{
356	vfloat4 a = vfloat4::loada(&(f32_data[0]));
357	EXPECT_EQ(a.lane<0>(), 0.0f);
358	EXPECT_EQ(a.lane<1>(), 1.0f);
359	EXPECT_EQ(a.lane<2>(), 2.0f);
360	EXPECT_EQ(a.lane<3>(), 3.0f);
361}
362
363/** @brief Test vfloat4 lane_id. */
364TEST(vfloat4, LaneID)
365{
366	vfloat4 a = vfloat4::lane_id();
367	EXPECT_EQ(a.lane<0>(), 0.0f);
368	EXPECT_EQ(a.lane<1>(), 1.0f);
369	EXPECT_EQ(a.lane<2>(), 2.0f);
370	EXPECT_EQ(a.lane<3>(), 3.0f);
371}
372
373/** @brief Test vfloat4 swz to float4. */
374TEST(vfloat4, swz4)
375{
376	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
377	vfloat4 r = a.swz<0, 3, 2, 1>();
378	EXPECT_EQ(r.lane<0>(), 1.0f);
379	EXPECT_EQ(r.lane<1>(), 4.0f);
380	EXPECT_EQ(r.lane<2>(), 3.0f);
381	EXPECT_EQ(r.lane<3>(), 2.0f);
382
383	r = a.swz<3, 1, 1, 0>();
384	EXPECT_EQ(r.lane<0>(), 4.0f);
385	EXPECT_EQ(r.lane<1>(), 2.0f);
386	EXPECT_EQ(r.lane<2>(), 2.0f);
387	EXPECT_EQ(r.lane<3>(), 1.0f);
388}
389
390/** @brief Test vfloat4 swz to float3. */
391TEST(vfloat4, swz3)
392{
393	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
394	vfloat4 r = a.swz<0, 3, 2>();
395	EXPECT_EQ(r.lane<0>(), 1.0f);
396	EXPECT_EQ(r.lane<1>(), 4.0f);
397	EXPECT_EQ(r.lane<2>(), 3.0f);
398	EXPECT_EQ(r.lane<3>(), 0.0f);
399
400	r = a.swz<3, 1, 1>();
401	EXPECT_EQ(r.lane<0>(), 4.0f);
402	EXPECT_EQ(r.lane<1>(), 2.0f);
403	EXPECT_EQ(r.lane<2>(), 2.0f);
404	EXPECT_EQ(r.lane<3>(), 0.0f);
405}
406
407/** @brief Test vfloat4 swz to float2. */
408TEST(vfloat4, swz2)
409{
410	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
411	vfloat4 r = a.swz<0, 3>();
412	EXPECT_EQ(r.lane<0>(), 1.0f);
413	EXPECT_EQ(r.lane<1>(), 4.0f);
414
415	r = a.swz<2, 1>();
416	EXPECT_EQ(r.lane<0>(), 3.0f);
417	EXPECT_EQ(r.lane<1>(), 2.0f);
418}
419
420/** @brief Test vfloat4 add. */
421TEST(vfloat4, vadd)
422{
423	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
424	vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
425	a = a + b;
426	EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f);
427	EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f);
428	EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f);
429	EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f);
430}
431
432/** @brief Test vfloat4 self-add. */
433TEST(vfloat4, vselfadd1)
434{
435	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
436	vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
437
438	// Test increment by another variable
439	a += b;
440	EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f);
441	EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f);
442	EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f);
443	EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f);
444
445	// Test increment by an expression
446	a += b + b;
447	EXPECT_NEAR(a.lane<0>(), 1.0f + 0.3f, 0.001f);
448	EXPECT_NEAR(a.lane<1>(), 2.0f + 0.6f, 0.001f);
449	EXPECT_NEAR(a.lane<2>(), 3.0f + 0.9f, 0.001f);
450	EXPECT_NEAR(a.lane<3>(), 4.0f + 1.2f, 0.001f);
451}
452
453/** @brief Test vfloat4 sub. */
454TEST(vfloat4, vsub)
455{
456	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
457	vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
458	a = a - b;
459	EXPECT_EQ(a.lane<0>(), 1.0f - 0.1f);
460	EXPECT_EQ(a.lane<1>(), 2.0f - 0.2f);
461	EXPECT_EQ(a.lane<2>(), 3.0f - 0.3f);
462	EXPECT_EQ(a.lane<3>(), 4.0f - 0.4f);
463}
464
465/** @brief Test vfloat4 mul. */
466TEST(vfloat4, vmul)
467{
468	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
469	vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
470	a = a * b;
471	EXPECT_EQ(a.lane<0>(), 1.0f * 0.1f);
472	EXPECT_EQ(a.lane<1>(), 2.0f * 0.2f);
473	EXPECT_EQ(a.lane<2>(), 3.0f * 0.3f);
474	EXPECT_EQ(a.lane<3>(), 4.0f * 0.4f);
475}
476
477/** @brief Test vfloat4 mul. */
478TEST(vfloat4, vsmul)
479{
480	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
481	float b = 3.14f;
482	a = a * b;
483	EXPECT_EQ(a.lane<0>(), 1.0f * 3.14f);
484	EXPECT_EQ(a.lane<1>(), 2.0f * 3.14f);
485	EXPECT_EQ(a.lane<2>(), 3.0f * 3.14f);
486	EXPECT_EQ(a.lane<3>(), 4.0f * 3.14f);
487}
488
489/** @brief Test vfloat4 mul. */
490TEST(vfloat4, svmul)
491{
492	float a = 3.14f;
493	vfloat4 b(1.0f, 2.0f, 3.0f, 4.0f);
494	b = a * b;
495	EXPECT_EQ(b.lane<0>(), 3.14f * 1.0f);
496	EXPECT_EQ(b.lane<1>(), 3.14f * 2.0f);
497	EXPECT_EQ(b.lane<2>(), 3.14f * 3.0f);
498	EXPECT_EQ(b.lane<3>(), 3.14f * 4.0f);
499}
500
501/** @brief Test vfloat4 div. */
502TEST(vfloat4, vdiv)
503{
504	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
505	vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
506	a = a / b;
507	EXPECT_EQ(a.lane<0>(), 1.0f / 0.1f);
508	EXPECT_EQ(a.lane<1>(), 2.0f / 0.2f);
509	EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f);
510	EXPECT_EQ(a.lane<3>(), 4.0f / 0.4f);
511}
512
513/** @brief Test vfloat4 div. */
514TEST(vfloat4, vsdiv)
515{
516	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
517	float b = 0.3f;
518	a = a / b;
519	EXPECT_EQ(a.lane<0>(), 1.0f / 0.3f);
520	EXPECT_EQ(a.lane<1>(), 2.0f / 0.3f);
521	EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f);
522	EXPECT_EQ(a.lane<3>(), 4.0f / 0.3f);
523}
524
525/** @brief Test vfloat4 div. */
526TEST(vfloat4, svdiv)
527{
528	float a = 3.0f;
529	vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
530	b = a / b;
531	EXPECT_EQ(b.lane<0>(), 3.0f / 0.1f);
532	EXPECT_EQ(b.lane<1>(), 3.0f / 0.2f);
533	EXPECT_EQ(b.lane<2>(), 3.0f / 0.3f);
534	EXPECT_EQ(b.lane<3>(), 3.0f / 0.4f);
535}
536
537/** @brief Test vfloat4 ceq. */
538TEST(vfloat4, ceq)
539{
540	vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
541	vfloat4 b1(0.1f, 0.2f, 0.3f, 0.4f);
542	vmask4 r1 = a1 == b1;
543	EXPECT_EQ(0u, mask(r1));
544	EXPECT_EQ(false, any(r1));
545	EXPECT_EQ(false, all(r1));
546
547	vfloat4 a2(1.0f, 2.0f, 3.0f, 4.0f);
548	vfloat4 b2(1.0f, 0.2f, 0.3f, 0.4f);
549	vmask4 r2 = a2 == b2;
550	EXPECT_EQ(0x1u, mask(r2));
551	EXPECT_EQ(true, any(r2));
552	EXPECT_EQ(false, all(r2));
553
554	vfloat4 a3(1.0f, 2.0f, 3.0f, 4.0f);
555	vfloat4 b3(1.0f, 0.2f, 3.0f, 0.4f);
556	vmask4 r3 = a3 == b3;
557	EXPECT_EQ(0x5u, mask(r3));
558	EXPECT_EQ(true, any(r3));
559	EXPECT_EQ(false, all(r3));
560
561	vfloat4 a4(1.0f, 2.0f, 3.0f, 4.0f);
562	vmask4 r4 = a4 == a4;
563	EXPECT_EQ(0xFu, mask(r4));
564	EXPECT_EQ(true, any(r4));
565	EXPECT_EQ(true, all(r4));
566}
567
568/** @brief Test vfloat4 cne. */
569TEST(vfloat4, cne)
570{
571	vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
572	vfloat4 b1(0.1f, 0.2f, 0.3f, 0.4f);
573	vmask4 r1 = a1 != b1;
574	EXPECT_EQ(0xFu, mask(r1));
575	EXPECT_EQ(true, any(r1));
576	EXPECT_EQ(true, all(r1));
577
578	vfloat4 a2(1.0f, 2.0f, 3.0f, 4.0f);
579	vfloat4 b2(1.0f, 0.2f, 0.3f, 0.4f);
580	vmask4 r2 = a2 != b2;
581	EXPECT_EQ(0xEu, mask(r2));
582	EXPECT_EQ(true, any(r2));
583	EXPECT_EQ(false, all(r2));
584
585	vfloat4 a3(1.0f, 2.0f, 3.0f, 4.0f);
586	vfloat4 b3(1.0f, 0.2f, 3.0f, 0.4f);
587	vmask4 r3 = a3 != b3;
588	EXPECT_EQ(0xAu, mask(r3));
589	EXPECT_EQ(true, any(r3));
590	EXPECT_EQ(false, all(r3));
591
592	vfloat4 a4(1.0f, 2.0f, 3.0f, 4.0f);
593	vmask4 r4 = a4 != a4;
594	EXPECT_EQ(0u, mask(r4));
595	EXPECT_EQ(false, any(r4));
596	EXPECT_EQ(false, all(r4));
597}
598
599/** @brief Test vfloat4 clt. */
600TEST(vfloat4, clt)
601{
602	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
603	vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
604	vmask4 r = a < b;
605	EXPECT_EQ(0xAu, mask(r));
606}
607
608/** @brief Test vfloat4 cle. */
609TEST(vfloat4, cle)
610{
611	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
612	vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
613	vmask4 r = a <= b;
614	EXPECT_EQ(0xEu, mask(r));
615}
616
617/** @brief Test vfloat4 cgt. */
618TEST(vfloat4, cgt)
619{
620	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
621	vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
622	vmask4 r = a > b;
623	EXPECT_EQ(0x1u, mask(r));
624}
625
626/** @brief Test vfloat4 cge. */
627TEST(vfloat4, cge)
628{
629	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
630	vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
631	vmask4 r = a >= b;
632	EXPECT_EQ(0x5u, mask(r));
633}
634
635/** @brief Test vfloat4 min. */
636TEST(vfloat4, min)
637{
638	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
639	vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
640	vfloat4 r = min(a, b);
641	EXPECT_EQ(r.lane<0>(), 0.9f);
642	EXPECT_EQ(r.lane<1>(), 2.0f);
643	EXPECT_EQ(r.lane<2>(), 3.0f);
644	EXPECT_EQ(r.lane<3>(), 4.0f);
645
646	float c = 0.3f;
647	r = min(a, c);
648	EXPECT_EQ(r.lane<0>(), 0.3f);
649	EXPECT_EQ(r.lane<1>(), 0.3f);
650	EXPECT_EQ(r.lane<2>(), 0.3f);
651	EXPECT_EQ(r.lane<3>(), 0.3f);
652
653	float d = 1.5f;
654	r = min(a, d);
655	EXPECT_EQ(r.lane<0>(), 1.0f);
656	EXPECT_EQ(r.lane<1>(), 1.5f);
657	EXPECT_EQ(r.lane<2>(), 1.5f);
658	EXPECT_EQ(r.lane<3>(), 1.5f);
659}
660
661/** @brief Test vfloat4 max. */
662TEST(vfloat4, max)
663{
664	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
665	vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
666	vfloat4 r = max(a, b);
667	EXPECT_EQ(r.lane<0>(), 1.0f);
668	EXPECT_EQ(r.lane<1>(), 2.1f);
669	EXPECT_EQ(r.lane<2>(), 3.0f);
670	EXPECT_EQ(r.lane<3>(), 4.1f);
671
672	float c = 4.3f;
673	r = max(a, c);
674	EXPECT_EQ(r.lane<0>(), 4.3f);
675	EXPECT_EQ(r.lane<1>(), 4.3f);
676	EXPECT_EQ(r.lane<2>(), 4.3f);
677	EXPECT_EQ(r.lane<3>(), 4.3f);
678
679	float d = 1.5f;
680	r = max(a, d);
681	EXPECT_EQ(r.lane<0>(), 1.5f);
682	EXPECT_EQ(r.lane<1>(), 2.0f);
683	EXPECT_EQ(r.lane<2>(), 3.0f);
684	EXPECT_EQ(r.lane<3>(), 4.0f);
685}
686
687/** @brief Test vfloat4 clamp. */
688TEST(vfloat4, clamp)
689{
690	vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
691	vfloat4 r1 = clamp(2.1f, 3.0f, a1);
692	EXPECT_EQ(r1.lane<0>(), 2.1f);
693	EXPECT_EQ(r1.lane<1>(), 2.1f);
694	EXPECT_EQ(r1.lane<2>(), 3.0f);
695	EXPECT_EQ(r1.lane<3>(), 3.0f);
696
697	vfloat4 a2(1.0f, 2.0f, qnan, 4.0f);
698	vfloat4 r2 = clamp(2.1f, 3.0f, a2);
699	EXPECT_EQ(r2.lane<0>(), 2.1f);
700	EXPECT_EQ(r2.lane<1>(), 2.1f);
701	EXPECT_EQ(r2.lane<2>(), 2.1f);
702	EXPECT_EQ(r2.lane<3>(), 3.0f);
703}
704
705/** @brief Test vfloat4 clampz. */
706TEST(vfloat4, clampz)
707{
708	vfloat4 a1(-1.0f, 0.0f, 0.1f, 4.0f);
709	vfloat4 r1 = clampz(3.0f, a1);
710	EXPECT_EQ(r1.lane<0>(), 0.0f);
711	EXPECT_EQ(r1.lane<1>(), 0.0f);
712	EXPECT_EQ(r1.lane<2>(), 0.1f);
713	EXPECT_EQ(r1.lane<3>(), 3.0f);
714
715	vfloat4 a2(-1.0f, 0.0f, qnan, 4.0f);
716	vfloat4 r2 = clampz(3.0f, a2);
717	EXPECT_EQ(r2.lane<0>(), 0.0f);
718	EXPECT_EQ(r2.lane<1>(), 0.0f);
719	EXPECT_EQ(r2.lane<2>(), 0.0f);
720	EXPECT_EQ(r2.lane<3>(), 3.0f);
721}
722
723/** @brief Test vfloat4 clampz. */
724TEST(vfloat4, clampzo)
725{
726	vfloat4 a1(-1.0f, 0.0f, 0.1f, 4.0f);
727	vfloat4 r1 = clampzo(a1);
728	EXPECT_EQ(r1.lane<0>(), 0.0f);
729	EXPECT_EQ(r1.lane<1>(), 0.0f);
730	EXPECT_EQ(r1.lane<2>(), 0.1f);
731	EXPECT_EQ(r1.lane<3>(), 1.0f);
732
733	vfloat4 a2(-1.0f, 0.0f, qnan, 4.0f);
734	vfloat4 r2 = clampzo(a2);
735	EXPECT_EQ(r2.lane<0>(), 0.0f);
736	EXPECT_EQ(r2.lane<1>(), 0.0f);
737	EXPECT_EQ(r2.lane<2>(), 0.0f);
738	EXPECT_EQ(r2.lane<3>(), 1.0f);
739}
740
741/** @brief Test vfloat4 abs. */
742TEST(vfloat4, abs)
743{
744	vfloat4 a(-1.0f, 0.0f, 0.1f, 4.0f);
745	vfloat4 r = abs(a);
746	EXPECT_EQ(r.lane<0>(), 1.0f);
747	EXPECT_EQ(r.lane<1>(), 0.0f);
748	EXPECT_EQ(r.lane<2>(), 0.1f);
749	EXPECT_EQ(r.lane<3>(), 4.0f);
750}
751
752/** @brief Test vfloat4 round. */
753TEST(vfloat4, round)
754{
755	vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
756	vfloat4 r1 = round(a1);
757	EXPECT_EQ(r1.lane<0>(), 1.0f);
758	EXPECT_EQ(r1.lane<1>(), 2.0f);
759	EXPECT_EQ(r1.lane<2>(), 2.0f);
760	EXPECT_EQ(r1.lane<3>(), 4.0f);
761
762	vfloat4 a2(-2.5f, -2.5f, -3.5f, -3.5f);
763	vfloat4 r2 = round(a2);
764	EXPECT_EQ(r2.lane<0>(), -2.0f);
765	EXPECT_EQ(r2.lane<2>(), -4.0f);
766}
767
768/** @brief Test vfloat4 hmin. */
769TEST(vfloat4, hmin)
770{
771	vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
772	vfloat4 r1 = hmin(a1);
773	EXPECT_EQ(r1.lane<0>(), 1.1f);
774	EXPECT_EQ(r1.lane<1>(), 1.1f);
775	EXPECT_EQ(r1.lane<2>(), 1.1f);
776	EXPECT_EQ(r1.lane<3>(), 1.1f);
777
778	vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
779	vfloat4 r2 = hmin(a2);
780	EXPECT_EQ(r2.lane<0>(), 0.2f);
781	EXPECT_EQ(r2.lane<1>(), 0.2f);
782	EXPECT_EQ(r2.lane<2>(), 0.2f);
783	EXPECT_EQ(r2.lane<3>(), 0.2f);
784}
785
786/** @brief Test vfloat4 hmin_s. */
787TEST(vfloat4, hmin_s)
788{
789	vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
790	float r1 = hmin_s(a1);
791	EXPECT_EQ(r1, 1.1f);
792
793	vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
794	float r2 = hmin_s(a2);
795	EXPECT_EQ(r2, 0.2f);
796}
797
798/** @brief Test vfloat4 hmin_rgb_s. */
799TEST(vfloat4, hmin_rgb_s)
800{
801	vfloat4 a1(1.1f, 1.5f, 1.6f, 0.2f);
802	float r1 = hmin_rgb_s(a1);
803	EXPECT_EQ(r1, 1.1f);
804
805	vfloat4 a2(1.5f, 0.9f, 1.6f, 1.2f);
806	float r2 = hmin_rgb_s(a2);
807	EXPECT_EQ(r2, 0.9f);
808}
809
810/** @brief Test vfloat4 hmax. */
811TEST(vfloat4, hmax)
812{
813	vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
814	vfloat4 r1 = hmax(a1);
815	EXPECT_EQ(r1.lane<0>(), 4.0f);
816	EXPECT_EQ(r1.lane<1>(), 4.0f);
817	EXPECT_EQ(r1.lane<2>(), 4.0f);
818	EXPECT_EQ(r1.lane<3>(), 4.0f);
819
820	vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
821	vfloat4 r2 = hmax(a2);
822	EXPECT_EQ(r2.lane<0>(), 1.6f);
823	EXPECT_EQ(r2.lane<1>(), 1.6f);
824	EXPECT_EQ(r2.lane<2>(), 1.6f);
825	EXPECT_EQ(r2.lane<3>(), 1.6f);
826}
827
828/** @brief Test vfloat4 hmax_s. */
829TEST(vfloat4, hmax_s)
830{
831	vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
832	float r1 = hmax_s(a1);
833	EXPECT_EQ(r1, 4.0f);
834
835	vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
836	float r2 = hmax_s(a2);
837	EXPECT_EQ(r2, 1.6f);
838}
839
840/** @brief Test vfloat4 hadd_s. */
841TEST(vfloat4, hadd_s)
842{
843	vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
844	float sum = 1.1f + 1.5f + 1.6f + 4.0f;
845	float r = hadd_s(a1);
846	EXPECT_NEAR(r, sum, 0.005f);
847}
848
849/** @brief Test vfloat4 hadd_rgb_s. */
850TEST(vfloat4, hadd_rgb_s)
851{
852	vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
853	float sum = 1.1f + 1.5f + 1.6f;
854	float r = hadd_rgb_s(a1);
855	EXPECT_NEAR(r, sum, 0.005f);
856}
857
858/** @brief Test vfloat4 sqrt. */
859TEST(vfloat4, sqrt)
860{
861	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
862	vfloat4 r = sqrt(a);
863	EXPECT_EQ(r.lane<0>(), std::sqrt(1.0f));
864	EXPECT_EQ(r.lane<1>(), std::sqrt(2.0f));
865	EXPECT_EQ(r.lane<2>(), std::sqrt(3.0f));
866	EXPECT_EQ(r.lane<3>(), std::sqrt(4.0f));
867}
868
869/** @brief Test vfloat4 select. */
870TEST(vfloat4, select)
871{
872	vfloat4 m1(1.0f, 1.0f, 1.0f, 1.0f);
873	vfloat4 m2(1.0f, 2.0f, 1.0f, 2.0f);
874	vmask4 cond = m1 == m2;
875
876	vfloat4 a(1.0f, 3.0f, 3.0f, 1.0f);
877	vfloat4 b(4.0f, 2.0f, 2.0f, 4.0f);
878
879	// Select in one direction
880	vfloat4 r1 = select(a, b, cond);
881	EXPECT_EQ(r1.lane<0>(), 4.0f);
882	EXPECT_EQ(r1.lane<1>(), 3.0f);
883	EXPECT_EQ(r1.lane<2>(), 2.0f);
884	EXPECT_EQ(r1.lane<3>(), 1.0f);
885
886	// Select in the other
887	vfloat4 r2 = select(b, a, cond);
888	EXPECT_EQ(r2.lane<0>(), 1.0f);
889	EXPECT_EQ(r2.lane<1>(), 2.0f);
890	EXPECT_EQ(r2.lane<2>(), 3.0f);
891	EXPECT_EQ(r2.lane<3>(), 4.0f);
892}
893
894/** @brief Test vfloat4 select MSB only. */
895TEST(vfloat4, select_msb)
896{
897	int msb_set = static_cast<int>(0x80000000);
898	vint4 msb(msb_set, 0, msb_set, 0);
899	vmask4 cond(msb.m);
900
901	vfloat4 a(1.0f, 3.0f, 3.0f, 1.0f);
902	vfloat4 b(4.0f, 2.0f, 2.0f, 4.0f);
903
904	// Select in one direction
905	vfloat4 r1 = select_msb(a, b, cond);
906	EXPECT_EQ(r1.lane<0>(), 4.0f);
907	EXPECT_EQ(r1.lane<1>(), 3.0f);
908	EXPECT_EQ(r1.lane<2>(), 2.0f);
909	EXPECT_EQ(r1.lane<3>(), 1.0f);
910
911	// Select in the other
912	vfloat4 r2 = select_msb(b, a, cond);
913	EXPECT_EQ(r2.lane<0>(), 1.0f);
914	EXPECT_EQ(r2.lane<1>(), 2.0f);
915	EXPECT_EQ(r2.lane<2>(), 3.0f);
916	EXPECT_EQ(r2.lane<3>(), 4.0f);
917}
918
919/** @brief Test vfloat4 gatherf. */
920TEST(vfloat4, gatherf)
921{
922	vint4 indices(0, 4, 3, 2);
923	vfloat4 r = gatherf(f32_data, indices);
924	EXPECT_EQ(r.lane<0>(), 0.0f);
925	EXPECT_EQ(r.lane<1>(), 4.0f);
926	EXPECT_EQ(r.lane<2>(), 3.0f);
927	EXPECT_EQ(r.lane<3>(), 2.0f);
928}
929
930/** @brief Test vfloat4 storea. */
931TEST(vfloat4, storea)
932{
933	ASTCENC_ALIGNAS float out[4];
934	vfloat4 a(f32_data);
935	storea(a, out);
936	EXPECT_EQ(out[0], 0.0f);
937	EXPECT_EQ(out[1], 1.0f);
938	EXPECT_EQ(out[2], 2.0f);
939	EXPECT_EQ(out[3], 3.0f);
940}
941
942/** @brief Test vfloat4 store. */
943TEST(vfloat4, store)
944{
945	ASTCENC_ALIGNAS float out[5];
946	vfloat4 a(f32_data);
947	store(a, &(out[1]));
948	EXPECT_EQ(out[1], 0.0f);
949	EXPECT_EQ(out[2], 1.0f);
950	EXPECT_EQ(out[3], 2.0f);
951	EXPECT_EQ(out[4], 3.0f);
952}
953
954/** @brief Test vfloat4 dot. */
955TEST(vfloat4, dot)
956{
957	vfloat4 a1(1.0f, 2.0f, 4.0f, 8.0f);
958	vfloat4 b1(1.0f, 0.5f, 0.25f, 0.125f);
959	vfloat4 r1 = dot(a1, b1);
960	EXPECT_EQ(r1.lane<0>(), 4.0f);
961	EXPECT_EQ(r1.lane<1>(), 4.0f);
962	EXPECT_EQ(r1.lane<2>(), 4.0f);
963	EXPECT_EQ(r1.lane<3>(), 4.0f);
964
965	// These values will fail to add to the same value if reassociated
966	float l0 =          141.2540435791015625f;
967	float l1 =      5345345.5000000000000000f;
968	float l2 =       234234.7031250000000000f;
969	float l3 = 124353454080.0000000000000000f;
970
971	vfloat4 a2(1.0f, 1.0f, 1.0f, 1.0f);
972	vfloat4 b2(l0, l1, l2, l3);
973	vfloat4 r2 = dot(a2, b2);
974
975	// Test that reassociation causes a failure with the numbers we chose
976	EXPECT_FALSE(any(r2 == vfloat4(l0 + l1 + l2 + l3)));
977
978	// Test that the sum works, for the association pattern we want used
979	EXPECT_TRUE(all(r2 == vfloat4((l0 + l2) + (l1 + l3))));
980}
981
982/** @brief Test vfloat4 dot_s. */
983TEST(vfloat4, dot_s)
984{
985	vfloat4 a1(1.0f, 2.0f, 4.0f, 8.0f);
986	vfloat4 b1(1.0f, 0.5f, 0.25f, 0.125f);
987	float r1 = dot_s(a1, b1);
988	EXPECT_EQ(r1, 4.0f);
989
990	// These values will fail to add to the same value if reassociated
991	float l0 =          141.2540435791015625f;
992	float l1 =      5345345.5000000000000000f;
993	float l2 =       234234.7031250000000000f;
994	float l3 = 124353454080.0000000000000000f;
995
996	vfloat4 a2(1.0f, 1.0f, 1.0f, 1.0f);
997	vfloat4 b2(l0, l1, l2, l3);
998	float r2 = dot_s(a2, b2);
999
1000	// Test that reassociation causes a failure with the numbers we chose
1001	EXPECT_NE(r2, l0 + l1 + l2 + l3);
1002
1003	// Test that the sum works, for the association pattern we want used
1004	EXPECT_EQ(r2, (l0 + l2) + (l1 + l3));
1005}
1006
1007/** @brief Test vfloat4 dot3. */
1008TEST(vfloat4, dot3)
1009{
1010	vfloat4 a(1.0f, 2.0f, 4.0f, 8.0f);
1011	vfloat4 b(1.0f, 0.5f, 0.25f, 0.125f);
1012	vfloat4 r = dot3(a, b);
1013	EXPECT_EQ(r.lane<0>(), 3.0f);
1014	EXPECT_EQ(r.lane<1>(), 3.0f);
1015	EXPECT_EQ(r.lane<2>(), 3.0f);
1016	EXPECT_EQ(r.lane<3>(), 0.0f);
1017}
1018
1019/** @brief Test vfloat4 dot3_s. */
1020TEST(vfloat4, dot3_s)
1021{
1022	vfloat4 a(1.0f, 2.0f, 4.0f, 8.0f);
1023	vfloat4 b(1.0f, 0.5f, 0.25f, 0.125f);
1024	float r = dot3_s(a, b);
1025	EXPECT_EQ(r, 3.0f);
1026}
1027
1028/** @brief Test vfloat4 normalize. */
1029TEST(vfloat4, normalize)
1030{
1031	vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
1032	vfloat4 r = normalize(a);
1033	EXPECT_NEAR(r.lane<0>(), 1.0f / astc::sqrt(30.0f), 0.0005f);
1034	EXPECT_NEAR(r.lane<1>(), 2.0f / astc::sqrt(30.0f), 0.0005f);
1035	EXPECT_NEAR(r.lane<2>(), 3.0f / astc::sqrt(30.0f), 0.0005f);
1036	EXPECT_NEAR(r.lane<3>(), 4.0f / astc::sqrt(30.0f), 0.0005f);
1037}
1038
1039/** @brief Test vfloat4 normalize_safe. */
1040TEST(vfloat4, normalize_safe)
1041{
1042	vfloat4 s(-1.0f, -1.0f, -1.0f, -1.0f);
1043
1044	vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
1045	vfloat4 r1 = normalize_safe(a1, s);
1046	EXPECT_NEAR(r1.lane<0>(), 1.0f / astc::sqrt(30.0f), 0.0005f);
1047	EXPECT_NEAR(r1.lane<1>(), 2.0f / astc::sqrt(30.0f), 0.0005f);
1048	EXPECT_NEAR(r1.lane<2>(), 3.0f / astc::sqrt(30.0f), 0.0005f);
1049	EXPECT_NEAR(r1.lane<3>(), 4.0f / astc::sqrt(30.0f), 0.0005f);
1050
1051	vfloat4 a2(0.0f, 0.0f, 0.0f, 0.0f);
1052	vfloat4 r2 = normalize_safe(a2, s);
1053	EXPECT_EQ(r2.lane<0>(), -1.0f);
1054	EXPECT_EQ(r2.lane<1>(), -1.0f);
1055	EXPECT_EQ(r2.lane<2>(), -1.0f);
1056	EXPECT_EQ(r2.lane<3>(), -1.0f);
1057}
1058
1059/** @brief Test vfloat4 float_to_int. */
1060TEST(vfloat4, float_to_int)
1061{
1062	vfloat4 a(1.1f, 1.5f, -1.6f, 4.0f);
1063	vint4 r = float_to_int(a);
1064	EXPECT_EQ(r.lane<0>(), 1);
1065	EXPECT_EQ(r.lane<1>(), 1);
1066	EXPECT_EQ(r.lane<2>(), -1);
1067	EXPECT_EQ(r.lane<3>(), 4);
1068}
1069
1070/** @brief Test vfloat4 round. */
1071TEST(vfloat4, float_to_int_rtn)
1072{
1073	vfloat4 a(1.1f, 1.5f, 1.6f, 4.0f);
1074	vint4 r = float_to_int_rtn(a);
1075	EXPECT_EQ(r.lane<0>(), 1);
1076	EXPECT_EQ(r.lane<1>(), 2);
1077	EXPECT_EQ(r.lane<2>(), 2);
1078	EXPECT_EQ(r.lane<3>(), 4);
1079}
1080
1081/** @brief Test vfloat4 round. */
1082TEST(vfloat4, int_to_float)
1083{
1084	vint4 a(1, 2, 3, 4);
1085	vfloat4 r = int_to_float(a);
1086	EXPECT_EQ(r.lane<0>(), 1.0f);
1087	EXPECT_EQ(r.lane<1>(), 2.0f);
1088	EXPECT_EQ(r.lane<2>(), 3.0f);
1089	EXPECT_EQ(r.lane<3>(), 4.0f);
1090}
1091
1092/** @brief Test vfloat4 float to fp16 conversion. */
1093TEST(vfloat4, float_to_float16)
1094{
1095	vfloat4 a(1.5, 234.5, 345345.0, qnan);
1096	vint4 r = float_to_float16(a);
1097
1098	// Normal numbers
1099	EXPECT_EQ(r.lane<0>(), 0x3E00);
1100	EXPECT_EQ(r.lane<1>(), 0x5B54);
1101
1102	// Large numbers convert to infinity
1103	EXPECT_EQ(r.lane<2>(), 0x7C00);
1104
1105	// NaN must convert to any valid NaN encoding
1106	EXPECT_EQ((r.lane<3>() >> 10) & 0x1F, 0x1F); // Exponent must be all 1s
1107	EXPECT_NE(r.lane<3>() & (0x3FF), 0);         // Mantissa must be non-zero
1108}
1109
1110/** @brief Test float to fp16 conversion. */
1111TEST(sfloat, float_to_float16)
1112{
1113	int r = float_to_float16(234.5);
1114	EXPECT_EQ(r, 0x5B54);
1115}
1116
1117/** @brief Test vfloat4 fp16 to float conversion. */
1118TEST(vfloat4, float16_to_float)
1119{	vint4 a(0x3E00, 0x5B54, 0x7C00, 0xFFFF);
1120	vfloat4 r = float16_to_float(a);
1121
1122	// Normal numbers
1123	EXPECT_EQ(r.lane<0>(), 1.5);
1124	EXPECT_EQ(r.lane<1>(), 234.5);
1125
1126	// Infinities must be preserved
1127	EXPECT_NE(std::isinf(r.lane<2>()), 0);
1128
1129	// NaNs must be preserved
1130	EXPECT_NE(std::isnan(r.lane<3>()), 0);
1131}
1132
1133/** @brief Test fp16 to float conversion. */
1134TEST(sfloat, float16_to_float)
1135{
1136	float r = float16_to_float(0x5B54);
1137	EXPECT_EQ(r, 234.5);
1138}
1139
1140// VINT4 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1141
1142/** @brief Test unaligned vint4 data load. */
1143TEST(vint4, UnalignedLoad)
1144{
1145	vint4 a(&(s32_data[1]));
1146	EXPECT_EQ(a.lane<0>(), 1);
1147	EXPECT_EQ(a.lane<1>(), 2);
1148	EXPECT_EQ(a.lane<2>(), 3);
1149	EXPECT_EQ(a.lane<3>(), 4);
1150}
1151
1152/** @brief Test unaligned vint4 data load. */
1153TEST(vint4, UnalignedLoad8)
1154{
1155	vint4 a(&(u8_data[1]));
1156	EXPECT_EQ(a.lane<0>(), 1);
1157	EXPECT_EQ(a.lane<1>(), 2);
1158	EXPECT_EQ(a.lane<2>(), 3);
1159	EXPECT_EQ(a.lane<3>(), 4);
1160}
1161
1162/** @brief Test scalar duplicated vint4 load. */
1163TEST(vint4, ScalarDupLoad)
1164{
1165	vint4 a(42);
1166	EXPECT_EQ(a.lane<0>(), 42);
1167	EXPECT_EQ(a.lane<1>(), 42);
1168	EXPECT_EQ(a.lane<2>(), 42);
1169	EXPECT_EQ(a.lane<3>(), 42);
1170}
1171
1172/** @brief Test scalar vint4 load. */
1173TEST(vint4, ScalarLoad)
1174{
1175	vint4 a(11, 22, 33, 44);
1176	EXPECT_EQ(a.lane<0>(), 11);
1177	EXPECT_EQ(a.lane<1>(), 22);
1178	EXPECT_EQ(a.lane<2>(), 33);
1179	EXPECT_EQ(a.lane<3>(), 44);
1180}
1181
1182/** @brief Test copy vint4 load. */
1183TEST(vint4, CopyLoad)
1184{
1185	vint4 s(11, 22, 33, 44);
1186	vint4 a(s.m);
1187	EXPECT_EQ(a.lane<0>(), 11);
1188	EXPECT_EQ(a.lane<1>(), 22);
1189	EXPECT_EQ(a.lane<2>(), 33);
1190	EXPECT_EQ(a.lane<3>(), 44);
1191}
1192
1193/** @brief Test vint4 scalar lane set. */
1194TEST(int4, SetLane)
1195{
1196	vint4 a(0);
1197
1198	a.set_lane<0>(1);
1199	EXPECT_EQ(a.lane<0>(), 1);
1200	EXPECT_EQ(a.lane<1>(), 0);
1201	EXPECT_EQ(a.lane<2>(), 0);
1202	EXPECT_EQ(a.lane<3>(), 0);
1203
1204	a.set_lane<1>(2);
1205	EXPECT_EQ(a.lane<0>(), 1);
1206	EXPECT_EQ(a.lane<1>(), 2);
1207	EXPECT_EQ(a.lane<2>(), 0);
1208	EXPECT_EQ(a.lane<3>(), 0);
1209
1210	a.set_lane<2>(3);
1211	EXPECT_EQ(a.lane<0>(), 1);
1212	EXPECT_EQ(a.lane<1>(), 2);
1213	EXPECT_EQ(a.lane<2>(), 3);
1214	EXPECT_EQ(a.lane<3>(), 0);
1215
1216	a.set_lane<3>(4);
1217	EXPECT_EQ(a.lane<0>(), 1);
1218	EXPECT_EQ(a.lane<1>(), 2);
1219	EXPECT_EQ(a.lane<2>(), 3);
1220	EXPECT_EQ(a.lane<3>(), 4);
1221}
1222
1223/** @brief Test vint4 zero. */
1224TEST(vint4, Zero)
1225{
1226	vint4 a = vint4::zero();
1227	EXPECT_EQ(a.lane<0>(), 0);
1228	EXPECT_EQ(a.lane<1>(), 0);
1229	EXPECT_EQ(a.lane<2>(), 0);
1230	EXPECT_EQ(a.lane<3>(), 0);
1231}
1232
1233/** @brief Test vint4 load1. */
1234TEST(vint4, Load1)
1235{
1236	int s = 42;
1237	vint4 a = vint4::load1(&s);
1238	EXPECT_EQ(a.lane<0>(), 42);
1239	EXPECT_EQ(a.lane<1>(), 42);
1240	EXPECT_EQ(a.lane<2>(), 42);
1241	EXPECT_EQ(a.lane<3>(), 42);
1242}
1243
1244/** @brief Test vint4 loada. */
1245TEST(vint4, Loada)
1246{
1247	vint4 a = vint4::loada(&(s32_data[0]));
1248	EXPECT_EQ(a.lane<0>(), 0);
1249	EXPECT_EQ(a.lane<1>(), 1);
1250	EXPECT_EQ(a.lane<2>(), 2);
1251	EXPECT_EQ(a.lane<3>(), 3);
1252}
1253
1254/** @brief Test vint4 lane_id. */
1255TEST(vint4, LaneID)
1256{
1257	vint4 a = vint4::lane_id();
1258	EXPECT_EQ(a.lane<0>(), 0);
1259	EXPECT_EQ(a.lane<1>(), 1);
1260	EXPECT_EQ(a.lane<2>(), 2);
1261	EXPECT_EQ(a.lane<3>(), 3);
1262}
1263
1264/** @brief Test vint4 add. */
1265TEST(vint4, vadd)
1266{
1267	vint4 a(1, 2, 3, 4);
1268	vint4 b(2, 3, 4, 5);
1269	a = a + b;
1270	EXPECT_EQ(a.lane<0>(), 1 + 2);
1271	EXPECT_EQ(a.lane<1>(), 2 + 3);
1272	EXPECT_EQ(a.lane<2>(), 3 + 4);
1273	EXPECT_EQ(a.lane<3>(), 4 + 5);
1274}
1275
1276/** @brief Test vint4 self-add. */
1277TEST(vint4, vselfadd)
1278{
1279	vint4 a(1, 2, 3, 4);
1280	vint4 b(2, 3, 4, 5);
1281	a += b;
1282
1283	EXPECT_EQ(a.lane<0>(), 1 + 2);
1284	EXPECT_EQ(a.lane<1>(), 2 + 3);
1285	EXPECT_EQ(a.lane<2>(), 3 + 4);
1286	EXPECT_EQ(a.lane<3>(), 4 + 5);
1287}
1288
1289/** @brief Test vint4 add. */
1290TEST(vint4, vsadd)
1291{
1292	vint4 a(1, 2, 3, 4);
1293	int b = 5;
1294	a = a + b;
1295	EXPECT_EQ(a.lane<0>(), 1 + 5);
1296	EXPECT_EQ(a.lane<1>(), 2 + 5);
1297	EXPECT_EQ(a.lane<2>(), 3 + 5);
1298	EXPECT_EQ(a.lane<3>(), 4 + 5);
1299}
1300
1301/** @brief Test vint4 sub. */
1302TEST(vint4, vsub)
1303{
1304	vint4 a(1, 2, 4, 4);
1305	vint4 b(2, 3, 3, 5);
1306	a = a - b;
1307	EXPECT_EQ(a.lane<0>(), 1 - 2);
1308	EXPECT_EQ(a.lane<1>(), 2 - 3);
1309	EXPECT_EQ(a.lane<2>(), 4 - 3);
1310	EXPECT_EQ(a.lane<3>(), 4 - 5);
1311}
1312
1313/** @brief Test vint4 sub. */
1314TEST(vint4, vssub)
1315{
1316	vint4 a(1, 2, 4, 4);
1317	int b = 5;
1318	a = a - b;
1319	EXPECT_EQ(a.lane<0>(), 1 - 5);
1320	EXPECT_EQ(a.lane<1>(), 2 - 5);
1321	EXPECT_EQ(a.lane<2>(), 4 - 5);
1322	EXPECT_EQ(a.lane<3>(), 4 - 5);
1323}
1324
1325/** @brief Test vint4 mul. */
1326TEST(vint4, vmul)
1327{
1328	vint4 a(1, 2, 4, 4);
1329	vint4 b(2, 3, 3, 5);
1330	a = a * b;
1331	EXPECT_EQ(a.lane<0>(), 1 * 2);
1332	EXPECT_EQ(a.lane<1>(), 2 * 3);
1333	EXPECT_EQ(a.lane<2>(), 4 * 3);
1334	EXPECT_EQ(a.lane<3>(), 4 * 5);
1335}
1336
1337/** @brief Test vint4 mul. */
1338TEST(vint4, vsmul)
1339{
1340	vint4 a(1, 2, 4, 4);
1341	a = a * 3;
1342	EXPECT_EQ(a.lane<0>(), 1 * 3);
1343	EXPECT_EQ(a.lane<1>(), 2 * 3);
1344	EXPECT_EQ(a.lane<2>(), 4 * 3);
1345	EXPECT_EQ(a.lane<3>(), 4 * 3);
1346
1347	vint4 b(1, 2, -4, 4);
1348	b = b * -3;
1349	EXPECT_EQ(b.lane<0>(), 1 * -3);
1350	EXPECT_EQ(b.lane<1>(), 2 * -3);
1351	EXPECT_EQ(b.lane<2>(), -4 * -3);
1352	EXPECT_EQ(b.lane<3>(), 4 * -3);
1353}
1354
1355/** @brief Test vint4 bitwise invert. */
1356TEST(vint4, bit_invert)
1357{
1358	vint4 a(-1, 0, 1, 2);
1359	a = ~a;
1360	EXPECT_EQ(a.lane<0>(), ~-1);
1361	EXPECT_EQ(a.lane<1>(), ~0);
1362	EXPECT_EQ(a.lane<2>(), ~1);
1363	EXPECT_EQ(a.lane<3>(), ~2);
1364}
1365
1366/** @brief Test vint4 bitwise or. */
1367TEST(vint4, bit_vor)
1368{
1369	vint4 a(1, 2, 3, 4);
1370	vint4 b(2, 3, 4, 5);
1371	a = a | b;
1372	EXPECT_EQ(a.lane<0>(), 3);
1373	EXPECT_EQ(a.lane<1>(), 3);
1374	EXPECT_EQ(a.lane<2>(), 7);
1375	EXPECT_EQ(a.lane<3>(), 5);
1376}
1377
1378TEST(vint4, bit_vsor)
1379{
1380	vint4 a(1, 2, 3, 4);
1381	int b = 2;
1382	a = a | b;
1383	EXPECT_EQ(a.lane<0>(), 3);
1384	EXPECT_EQ(a.lane<1>(), 2);
1385	EXPECT_EQ(a.lane<2>(), 3);
1386	EXPECT_EQ(a.lane<3>(), 6);
1387}
1388
1389/** @brief Test vint4 bitwise and. */
1390TEST(vint4, bit_vand)
1391{
1392	vint4 a(1, 2, 3, 4);
1393	vint4 b(2, 3, 4, 5);
1394	a = a & b;
1395	EXPECT_EQ(a.lane<0>(), 0);
1396	EXPECT_EQ(a.lane<1>(), 2);
1397	EXPECT_EQ(a.lane<2>(), 0);
1398	EXPECT_EQ(a.lane<3>(), 4);
1399}
1400
1401/** @brief Test vint4 bitwise and. */
1402TEST(vint4, bit_vsand)
1403{
1404	vint4 a(1, 2, 3, 4);
1405	int b = 2;
1406	a = a & b;
1407	EXPECT_EQ(a.lane<0>(), 0);
1408	EXPECT_EQ(a.lane<1>(), 2);
1409	EXPECT_EQ(a.lane<2>(), 2);
1410	EXPECT_EQ(a.lane<3>(), 0);
1411}
1412
1413/** @brief Test vint4 bitwise xor. */
1414TEST(vint4, bit_vxor)
1415{
1416	vint4 a(1, 2, 3, 4);
1417	vint4 b(2, 3, 4, 5);
1418	a = a ^ b;
1419	EXPECT_EQ(a.lane<0>(), 3);
1420	EXPECT_EQ(a.lane<1>(), 1);
1421	EXPECT_EQ(a.lane<2>(), 7);
1422	EXPECT_EQ(a.lane<3>(), 1);
1423}
1424
1425/** @brief Test vint4 bitwise xor. */
1426TEST(vint4, bit_vsxor)
1427{
1428	vint4 a(1, 2, 3, 4);
1429	int b = 2;
1430	a = a ^ b;
1431	EXPECT_EQ(a.lane<0>(), 3);
1432	EXPECT_EQ(a.lane<1>(), 0);
1433	EXPECT_EQ(a.lane<2>(), 1);
1434	EXPECT_EQ(a.lane<3>(), 6);
1435}
1436
1437/** @brief Test vint4 ceq. */
1438TEST(vint4, ceq)
1439{
1440	vint4 a1(1, 2, 3, 4);
1441	vint4 b1(0, 1, 2, 3);
1442	vmask4 r1 = a1 == b1;
1443	EXPECT_EQ(0u, mask(r1));
1444	EXPECT_EQ(false, any(r1));
1445	EXPECT_EQ(false, all(r1));
1446
1447	vint4 a2(1, 2, 3, 4);
1448	vint4 b2(1, 0, 0, 0);
1449	vmask4 r2 = a2 == b2;
1450	EXPECT_EQ(0x1u, mask(r2));
1451	EXPECT_EQ(true, any(r2));
1452	EXPECT_EQ(false, all(r2));
1453
1454	vint4 a3(1, 2, 3, 4);
1455	vint4 b3(1, 0, 3, 0);
1456	vmask4 r3 = a3 == b3;
1457	EXPECT_EQ(0x5u, mask(r3));
1458	EXPECT_EQ(true, any(r3));
1459	EXPECT_EQ(false, all(r3));
1460
1461	vint4 a4(1, 2, 3, 4);
1462	vmask4 r4 = a4 == a4;
1463	EXPECT_EQ(0xFu, mask(r4));
1464	EXPECT_EQ(true, any(r4));
1465	EXPECT_EQ(true, all(r4));
1466}
1467
1468/** @brief Test vint4 cne. */
1469TEST(vint4, cne)
1470{
1471	vint4 a1(1, 2, 3, 4);
1472	vint4 b1(0, 1, 2, 3);
1473	vmask4 r1 = a1 != b1;
1474	EXPECT_EQ(0xFu, mask(r1));
1475	EXPECT_EQ(true, any(r1));
1476	EXPECT_EQ(true, all(r1));
1477
1478	vint4 a2(1, 2, 3, 4);
1479	vint4 b2(1, 0, 0, 0);
1480	vmask4 r2 = a2 != b2;
1481	EXPECT_EQ(0xEu, mask(r2));
1482	EXPECT_EQ(true, any(r2));
1483	EXPECT_EQ(false, all(r2));
1484
1485	vint4 a3(1, 2, 3, 4);
1486	vint4 b3(1, 0, 3, 0);
1487	vmask4 r3 = a3 != b3;
1488	EXPECT_EQ(0xAu, mask(r3));
1489	EXPECT_EQ(true, any(r3));
1490	EXPECT_EQ(false, all(r3));
1491
1492	vint4 a4(1, 2, 3, 4);
1493	vmask4 r4 = a4 != a4;
1494	EXPECT_EQ(0u, mask(r4));
1495	EXPECT_EQ(false, any(r4));
1496	EXPECT_EQ(false, all(r4));
1497}
1498
1499/** @brief Test vint4 clt. */
1500TEST(vint4, clt)
1501{
1502	vint4 a(1, 2, 3, 4);
1503	vint4 b(0, 3, 3, 5);
1504	vmask4 r = a < b;
1505	EXPECT_EQ(0xAu, mask(r));
1506}
1507
1508/** @brief Test vint4 cgt. */
1509TEST(vint4, cle)
1510{
1511	vint4 a(1, 2, 3, 4);
1512	vint4 b(0, 3, 3, 5);
1513	vmask4 r = a > b;
1514	EXPECT_EQ(0x1u, mask(r));
1515}
1516
1517/** @brief Test vint4 lsl. */
1518TEST(vint4, lsl)
1519{
1520	vint4 a(1, 2, 4, 4);
1521	a = lsl<0>(a);
1522	EXPECT_EQ(a.lane<0>(), 1);
1523	EXPECT_EQ(a.lane<1>(), 2);
1524	EXPECT_EQ(a.lane<2>(), 4);
1525	EXPECT_EQ(a.lane<3>(), 4);
1526
1527	a = lsl<1>(a);
1528	EXPECT_EQ(a.lane<0>(), 2);
1529	EXPECT_EQ(a.lane<1>(), 4);
1530	EXPECT_EQ(a.lane<2>(), 8);
1531	EXPECT_EQ(a.lane<3>(), 8);
1532
1533	a = lsl<2>(a);
1534	EXPECT_EQ(a.lane<0>(), 8);
1535	EXPECT_EQ(a.lane<1>(), 16);
1536	EXPECT_EQ(a.lane<2>(), 32);
1537	EXPECT_EQ(a.lane<3>(), 32);
1538}
1539
1540/** @brief Test vint4 lsr. */
1541TEST(vint4, lsr)
1542{
1543	vint4 a(1, 2, 4, -4);
1544	a = lsr<0>(a);
1545	EXPECT_EQ(a.lane<0>(),  1);
1546	EXPECT_EQ(a.lane<1>(),  2);
1547	EXPECT_EQ(a.lane<2>(),  4);
1548	EXPECT_EQ(a.lane<3>(),  static_cast<int>(0xFFFFFFFC));
1549
1550	a = lsr<1>(a);
1551	EXPECT_EQ(a.lane<0>(),  0);
1552	EXPECT_EQ(a.lane<1>(),  1);
1553	EXPECT_EQ(a.lane<2>(),  2);
1554	EXPECT_EQ(a.lane<3>(),  0x7FFFFFFE);
1555
1556	a = lsr<2>(a);
1557	EXPECT_EQ(a.lane<0>(),  0);
1558	EXPECT_EQ(a.lane<1>(),  0);
1559	EXPECT_EQ(a.lane<2>(),  0);
1560	EXPECT_EQ(a.lane<3>(),  0x1FFFFFFF);
1561}
1562
1563/** @brief Test vint4 asr. */
1564TEST(vint4, asr)
1565{
1566	vint4 a(1, 2, 4, -4);
1567	a = asr<0>(a);
1568	EXPECT_EQ(a.lane<0>(),  1);
1569	EXPECT_EQ(a.lane<1>(),  2);
1570	EXPECT_EQ(a.lane<2>(),  4);
1571	EXPECT_EQ(a.lane<3>(), -4);
1572
1573	a = asr<1>(a);
1574	EXPECT_EQ(a.lane<0>(),  0);
1575	EXPECT_EQ(a.lane<1>(),  1);
1576	EXPECT_EQ(a.lane<2>(),  2);
1577	EXPECT_EQ(a.lane<3>(), -2);
1578
1579	// Note - quirk of asr is that you will get "stuck" at -1
1580	a = asr<2>(a);
1581	EXPECT_EQ(a.lane<0>(),  0);
1582	EXPECT_EQ(a.lane<1>(),  0);
1583	EXPECT_EQ(a.lane<2>(),  0);
1584	EXPECT_EQ(a.lane<3>(), -1);
1585}
1586
1587/** @brief Test vint4 min. */
1588TEST(vint4, min)
1589{
1590	vint4 a(1, 2, 3, 4);
1591	vint4 b(0, 3, 3, 5);
1592	vint4 r = min(a, b);
1593	EXPECT_EQ(r.lane<0>(), 0);
1594	EXPECT_EQ(r.lane<1>(), 2);
1595	EXPECT_EQ(r.lane<2>(), 3);
1596	EXPECT_EQ(r.lane<3>(), 4);
1597}
1598
1599/** @brief Test vint4 max. */
1600TEST(vint4, max)
1601{
1602	vint4 a(1, 2, 3, 4);
1603	vint4 b(0, 3, 3, 5);
1604	vint4 r = max(a, b);
1605	EXPECT_EQ(r.lane<0>(), 1);
1606	EXPECT_EQ(r.lane<1>(), 3);
1607	EXPECT_EQ(r.lane<2>(), 3);
1608	EXPECT_EQ(r.lane<3>(), 5);
1609}
1610
1611/** @brief Test vint4 clamp. */
1612TEST(vint4, clamp)
1613{
1614	vint4 a(1, 2, 3, 4);
1615	vint4 r = clamp(2, 3, a);
1616	EXPECT_EQ(r.lane<0>(), 2);
1617	EXPECT_EQ(r.lane<1>(), 2);
1618	EXPECT_EQ(r.lane<2>(), 3);
1619	EXPECT_EQ(r.lane<3>(), 3);
1620}
1621
1622/** @brief Test vint4 hmin. */
1623TEST(vint4, hmin)
1624{
1625	vint4 a1(1, 2, 1, 2);
1626	vint4 r1 = hmin(a1);
1627	EXPECT_EQ(r1.lane<0>(), 1);
1628	EXPECT_EQ(r1.lane<1>(), 1);
1629	EXPECT_EQ(r1.lane<2>(), 1);
1630	EXPECT_EQ(r1.lane<3>(), 1);
1631
1632	vint4 a2(1, 2, -1, 5);
1633	vint4 r2 = hmin(a2);
1634	EXPECT_EQ(r2.lane<0>(), -1);
1635	EXPECT_EQ(r2.lane<1>(), -1);
1636	EXPECT_EQ(r2.lane<2>(), -1);
1637	EXPECT_EQ(r2.lane<3>(), -1);
1638}
1639
1640/** @brief Test vint4 hmax. */
1641TEST(vint4, hmax)
1642{
1643	vint4 a1(1, 3, 1, 2);
1644	vint4 r1 = hmax(a1);
1645	EXPECT_EQ(r1.lane<0>(), 3);
1646	EXPECT_EQ(r1.lane<1>(), 3);
1647	EXPECT_EQ(r1.lane<2>(), 3);
1648	EXPECT_EQ(r1.lane<3>(), 3);
1649
1650	vint4 a2(1, 2, -1, 5);
1651	vint4 r2 = hmax(a2);
1652	EXPECT_EQ(r2.lane<0>(), 5);
1653	EXPECT_EQ(r2.lane<1>(), 5);
1654	EXPECT_EQ(r2.lane<2>(), 5);
1655	EXPECT_EQ(r2.lane<3>(), 5);
1656}
1657
1658/** @brief Test vint4 hadd_s. */
1659TEST(vint4, hadd_s)
1660{
1661	vint4 a1(1, 3, 5, 7);
1662	int r1 = hadd_s(a1);
1663	EXPECT_EQ(r1, 16);
1664
1665	vint4 a2(1, 2, -1, 5);
1666	int r2 = hadd_s(a2);
1667	EXPECT_EQ(r2, 7);
1668}
1669
1670/** @brief Test vint4 hadd_rgb_s. */
1671TEST(vint4, hadd_rgb_s)
1672{
1673	vint4 a1(1, 3, 5, 7);
1674	int r1 = hadd_rgb_s(a1);
1675	EXPECT_EQ(r1, 9);
1676
1677	vint4 a2(1, 2, -1, 5);
1678	int r2 = hadd_rgb_s(a2);
1679	EXPECT_EQ(r2, 2);
1680}
1681
1682/** @brief Test vint4 clz. */
1683TEST(vint4, clz)
1684{
1685	int msb_set = static_cast<int>(0x80000000);
1686	vint4 a1(msb_set, 0x40000000, 0x20000000, 0x10000000);
1687	vint4 r1 = clz(a1);
1688	EXPECT_EQ(r1.lane<0>(), 0);
1689	EXPECT_EQ(r1.lane<1>(), 1);
1690	EXPECT_EQ(r1.lane<2>(), 2);
1691	EXPECT_EQ(r1.lane<3>(), 3);
1692
1693	vint4 a2(0x0, 0x1, 0x2, 0x4);
1694	vint4 r2 = clz(a2);
1695	EXPECT_EQ(r2.lane<0>(), 32);
1696	EXPECT_EQ(r2.lane<1>(), 31);
1697	EXPECT_EQ(r2.lane<2>(), 30);
1698	EXPECT_EQ(r2.lane<3>(), 29);
1699}
1700
1701/** @brief Test vint4 two_to_the_n. */
1702TEST(vint4, two_to_the_n)
1703{
1704	vint4 a1(0, 1, 2, 3);
1705	vint4 r1 = two_to_the_n(a1);
1706	EXPECT_EQ(r1.lane<0>(), 1 << 0);
1707	EXPECT_EQ(r1.lane<1>(), 1 << 1);
1708	EXPECT_EQ(r1.lane<2>(), 1 << 2);
1709	EXPECT_EQ(r1.lane<3>(), 1 << 3);
1710
1711	vint4 a2(27, 28, 29, 30);
1712	vint4 r2 = two_to_the_n(a2);
1713	EXPECT_EQ(r2.lane<0>(), 1 << 27);
1714	EXPECT_EQ(r2.lane<1>(), 1 << 28);
1715	EXPECT_EQ(r2.lane<2>(), 1 << 29);
1716	EXPECT_EQ(r2.lane<3>(), 1 << 30);
1717
1718	// Shifts higher than 30 are not allowed as it overflows the int type;
1719	// and results in implementation-defined behavior because of how we
1720	// generate the shifted result in two_to_the_n().
1721	// -  Shift by 31 shifts into sign bit
1722	// -  Shift by 32 shifts off the end
1723}
1724
1725/** @brief Test vint4 storea. */
1726TEST(vint4, storea)
1727{
1728	ASTCENC_ALIGNAS int out[4];
1729	vint4 a(s32_data);
1730	storea(a, out);
1731	EXPECT_EQ(out[0], 0);
1732	EXPECT_EQ(out[1], 1);
1733	EXPECT_EQ(out[2], 2);
1734	EXPECT_EQ(out[3], 3);
1735}
1736
1737/** @brief Test vint4 store. */
1738TEST(vint4, store)
1739{
1740	ASTCENC_ALIGNAS int out[5];
1741	vint4 a(s32_data);
1742	store(a, &(out[1]));
1743	EXPECT_EQ(out[1], 0);
1744	EXPECT_EQ(out[2], 1);
1745	EXPECT_EQ(out[3], 2);
1746	EXPECT_EQ(out[4], 3);
1747}
1748
1749/** @brief Test vint4 store_nbytes. */
1750TEST(vint4, store_nbytes)
1751{
1752	ASTCENC_ALIGNAS int out;
1753	vint4 a(42, 314, 75, 90);
1754	store_nbytes(a, reinterpret_cast<uint8_t*>(&out));
1755	EXPECT_EQ(out, 42);
1756}
1757
1758/** @brief Test vint4 store_lanes_masked. */
1759TEST(vint4, store_lanes_masked)
1760{
1761	uint8_t resulta[16] { 0 };
1762
1763	// Store nothing
1764	vmask4 mask1 = vint4(0) == vint4(1);
1765	vint4 data1 = vint4(1);
1766
1767	store_lanes_masked(resulta, data1, mask1);
1768	vint4 result1v = vint4::load(resulta);
1769	vint4 expect1v = vint4::zero();
1770	EXPECT_TRUE(all(result1v == expect1v));
1771
1772	// Store half
1773	vmask4 mask2 = vint4(1, 1, 0, 0) == vint4(1);
1774	vint4 data2 = vint4(2);
1775
1776	store_lanes_masked(resulta, data2, mask2);
1777	vint4 result2v = vint4::load(resulta);
1778	vint4 expect2v = vint4(2, 2, 0, 0);
1779	EXPECT_TRUE(all(result2v == expect2v));
1780
1781	// Store all
1782	vmask4 mask3 = vint4(1) == vint4(1);
1783	vint4 data3 = vint4(3);
1784
1785	store_lanes_masked(resulta, data3, mask3);
1786	vint4 result3v = vint4::load(resulta);
1787	vint4 expect3v = vint4(3);
1788	EXPECT_TRUE(all(result3v == expect3v));
1789}
1790
1791/** @brief Test vint4 store_lanes_masked to unaligned address. */
1792TEST(vint4, store_lanes_masked_unaligned)
1793{
1794	uint8_t resulta[17] { 0 };
1795
1796	// Store nothing
1797	vmask4 mask1 = vint4(0) == vint4(1);
1798	vint4 data1 = vint4(1);
1799
1800	store_lanes_masked(resulta + 1, data1, mask1);
1801	vint4 result1v = vint4::load(resulta + 1);
1802	vint4 expect1v = vint4::zero();
1803	EXPECT_TRUE(all(result1v == expect1v));
1804
1805	// Store half
1806	vmask4 mask2 = vint4(1, 1, 0, 0) == vint4(1);
1807	vint4 data2 = vint4(2);
1808
1809	store_lanes_masked(resulta + 1, data2, mask2);
1810	vint4 result2v = vint4::load(resulta + 1);
1811	vint4 expect2v = vint4(2, 2, 0, 0);
1812	EXPECT_TRUE(all(result2v == expect2v));
1813
1814	// Store all
1815	vmask4 mask3 = vint4(1) == vint4(1);
1816	vint4 data3 = vint4(3);
1817
1818	store_lanes_masked(resulta + 1, data3, mask3);
1819	vint4 result3v = vint4::load(resulta + 1);
1820	vint4 expect3v = vint4(3);
1821	EXPECT_TRUE(all(result3v == expect3v));
1822}
1823
1824/** @brief Test vint4 gatheri. */
1825TEST(vint4, gatheri)
1826{
1827	vint4 indices(0, 4, 3, 2);
1828	vint4 r = gatheri(s32_data, indices);
1829	EXPECT_EQ(r.lane<0>(), 0);
1830	EXPECT_EQ(r.lane<1>(), 4);
1831	EXPECT_EQ(r.lane<2>(), 3);
1832	EXPECT_EQ(r.lane<3>(), 2);
1833}
1834
1835/** @brief Test vint4 pack_low_bytes. */
1836TEST(vint4, pack_low_bytes)
1837{
1838	vint4 a(1, 2, 3, 4);
1839	vint4 r = pack_low_bytes(a);
1840	EXPECT_EQ(r.lane<0>(), (4 << 24) | (3 << 16) | (2  << 8) | (1 << 0));
1841}
1842
1843/** @brief Test vint4 select. */
1844TEST(vint4, select)
1845{
1846	vint4 m1(1, 1, 1, 1);
1847	vint4 m2(1, 2, 1, 2);
1848	vmask4 cond = m1 == m2;
1849
1850	vint4 a(1, 3, 3, 1);
1851	vint4 b(4, 2, 2, 4);
1852
1853	vint4 r1 = select(a, b, cond);
1854	EXPECT_EQ(r1.lane<0>(), 4);
1855	EXPECT_EQ(r1.lane<1>(), 3);
1856	EXPECT_EQ(r1.lane<2>(), 2);
1857	EXPECT_EQ(r1.lane<3>(), 1);
1858
1859	vint4 r2 = select(b, a, cond);
1860	EXPECT_EQ(r2.lane<0>(), 1);
1861	EXPECT_EQ(r2.lane<1>(), 2);
1862	EXPECT_EQ(r2.lane<2>(), 3);
1863	EXPECT_EQ(r2.lane<3>(), 4);
1864}
1865
1866// VMASK4 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1867/** @brief Test vmask4 scalar literal constructor. */
1868TEST(vmask4, scalar_literal_construct)
1869{
1870	vfloat4 m1a(0.0f, 0.0f, 0.0f, 0.0f);
1871	vfloat4 m1b(1.0f, 1.0f, 1.0f, 1.0f);
1872	vmask4 m1(true);
1873
1874	vfloat4 r = select(m1a, m1b, m1);
1875
1876	EXPECT_EQ(r.lane<0>(), 1.0f);
1877	EXPECT_EQ(r.lane<1>(), 1.0f);
1878	EXPECT_EQ(r.lane<2>(), 1.0f);
1879	EXPECT_EQ(r.lane<3>(), 1.0f);
1880
1881	r = select(m1b, m1a, m1);
1882
1883	EXPECT_EQ(r.lane<0>(), 0.0f);
1884	EXPECT_EQ(r.lane<1>(), 0.0f);
1885	EXPECT_EQ(r.lane<2>(), 0.0f);
1886	EXPECT_EQ(r.lane<3>(), 0.0f);
1887}
1888
1889/** @brief Test vmask4 literal constructor. */
1890TEST(vmask4, literal_construct)
1891{
1892	vfloat4 m1a(0.0f, 0.0f, 0.0f, 0.0f);
1893	vfloat4 m1b(1.0f, 1.0f, 1.0f, 1.0f);
1894	vmask4 m1(true, false, true, false);
1895
1896	vfloat4 r = select(m1a, m1b, m1);
1897
1898	EXPECT_EQ(r.lane<0>(), 1.0f);
1899	EXPECT_EQ(r.lane<1>(), 0.0f);
1900	EXPECT_EQ(r.lane<2>(), 1.0f);
1901	EXPECT_EQ(r.lane<3>(), 0.0f);
1902}
1903
1904/** @brief Test vmask4 or. */
1905TEST(vmask4, or)
1906{
1907	vfloat4 m1a(0, 1, 0, 1);
1908	vfloat4 m1b(1, 1, 1, 1);
1909	vmask4 m1 = m1a == m1b;
1910
1911	vfloat4 m2a(1, 1, 0, 0);
1912	vfloat4 m2b(1, 1, 1, 1);
1913	vmask4 m2 = m2a == m2b;
1914
1915	vmask4 r = m1 | m2;
1916	EXPECT_EQ(mask(r), 0xBu);
1917}
1918
1919/** @brief Test vmask4 and. */
1920TEST(vmask4, and)
1921{
1922	vfloat4 m1a(0, 1, 0, 1);
1923	vfloat4 m1b(1, 1, 1, 1);
1924	vmask4 m1 = m1a == m1b;
1925
1926	vfloat4 m2a(1, 1, 0, 0);
1927	vfloat4 m2b(1, 1, 1, 1);
1928	vmask4 m2 = m2a == m2b;
1929
1930	vmask4 r = m1 & m2;
1931	EXPECT_EQ(mask(r), 0x2u);
1932}
1933
1934/** @brief Test vmask4 xor. */
1935TEST(vmask4, xor)
1936{
1937	vfloat4 m1a(0, 1, 0, 1);
1938	vfloat4 m1b(1, 1, 1, 1);
1939	vmask4 m1 = m1a == m1b;
1940
1941	vfloat4 m2a(1, 1, 0, 0);
1942	vfloat4 m2b(1, 1, 1, 1);
1943	vmask4 m2 = m2a == m2b;
1944
1945	vmask4 r = m1 ^ m2;
1946	EXPECT_EQ(mask(r), 0x9u);
1947}
1948
1949/** @brief Test vmask4 not. */
1950TEST(vmask4, not)
1951{
1952	vfloat4 m1a(0, 1, 0, 1);
1953	vfloat4 m1b(1, 1, 1, 1);
1954	vmask4 m1 = m1a == m1b;
1955	vmask4 r = ~m1;
1956	EXPECT_EQ(mask(r), 0x5u);
1957}
1958
1959/** @brief Test vint4 table permute. */
1960TEST(vint4, vtable_8bt_32bi_32entry)
1961{
1962	vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
1963	vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);
1964
1965	vint4 table0p, table1p;
1966	vtable_prepare(table0, table1, table0p, table1p);
1967
1968	vint4 index(0, 7, 4, 31);
1969
1970	vint4 result = vtable_8bt_32bi(table0p, table1p, index);
1971
1972	EXPECT_EQ(result.lane<0>(),  3);
1973	EXPECT_EQ(result.lane<1>(),  4);
1974	EXPECT_EQ(result.lane<2>(),  7);
1975	EXPECT_EQ(result.lane<3>(), 28);
1976}
1977
1978/** @brief Test vint4 table permute. */
1979TEST(vint4, vtable_8bt_32bi_64entry)
1980{
1981	vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
1982	vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);
1983	vint4 table2(0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f);
1984	vint4 table3(0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f);
1985
1986	vint4 table0p, table1p, table2p, table3p;
1987	vtable_prepare(table0, table1, table2, table3, table0p, table1p, table2p, table3p);
1988
1989	vint4 index(0, 7, 38, 63);
1990
1991	vint4 result = vtable_8bt_32bi(table0p, table1p, table2p, table3p, index);
1992
1993	EXPECT_EQ(result.lane<0>(),  3);
1994	EXPECT_EQ(result.lane<1>(),  4);
1995	EXPECT_EQ(result.lane<2>(), 37);
1996	EXPECT_EQ(result.lane<3>(), 60);
1997}
1998
1999/** @brief Test vint4 rgba byte interleave. */
2000TEST(vint4, interleave_rgba8)
2001{
2002	vint4 r(0x01, 0x11, 0x21, 0x31);
2003	vint4 g(0x02, 0x12, 0x22, 0x32);
2004	vint4 b(0x03, 0x13, 0x23, 0x33);
2005	vint4 a(0x04, 0x14, 0x24, 0x34);
2006
2007	vint4 result = interleave_rgba8(r, g, b, a);
2008
2009	EXPECT_EQ(result.lane<0>(), 0x04030201);
2010	EXPECT_EQ(result.lane<1>(), 0x14131211);
2011	EXPECT_EQ(result.lane<2>(), 0x24232221);
2012	EXPECT_EQ(result.lane<3>(), 0x34333231);
2013}
2014
2015# if ASTCENC_SIMD_WIDTH == 8
2016
2017// VFLOAT8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2018
2019/** @brief Test unaligned vfloat8 data load. */
2020TEST(vfloat8, UnalignedLoad)
2021{
2022	vfloat8 a(&(f32_data[1]));
2023	EXPECT_EQ(a.lane<0>(), 1.0f);
2024	EXPECT_EQ(a.lane<1>(), 2.0f);
2025	EXPECT_EQ(a.lane<2>(), 3.0f);
2026	EXPECT_EQ(a.lane<3>(), 4.0f);
2027	EXPECT_EQ(a.lane<4>(), 5.0f);
2028	EXPECT_EQ(a.lane<5>(), 6.0f);
2029	EXPECT_EQ(a.lane<6>(), 7.0f);
2030	EXPECT_EQ(a.lane<7>(), 8.0f);
2031}
2032
2033/** @brief Test scalar duplicated vfloat8 load. */
2034TEST(vfloat8, ScalarDupLoad)
2035{
2036	vfloat8 a(1.1f);
2037	EXPECT_EQ(a.lane<0>(), 1.1f);
2038	EXPECT_EQ(a.lane<1>(), 1.1f);
2039	EXPECT_EQ(a.lane<2>(), 1.1f);
2040	EXPECT_EQ(a.lane<3>(), 1.1f);
2041	EXPECT_EQ(a.lane<4>(), 1.1f);
2042	EXPECT_EQ(a.lane<5>(), 1.1f);
2043	EXPECT_EQ(a.lane<6>(), 1.1f);
2044	EXPECT_EQ(a.lane<7>(), 1.1f);
2045}
2046
2047/** @brief Test scalar vfloat8 load. */
2048TEST(vfloat8, ScalarLoad)
2049{
2050	vfloat8 a(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f);
2051	EXPECT_EQ(a.lane<0>(), 1.1f);
2052	EXPECT_EQ(a.lane<1>(), 2.2f);
2053	EXPECT_EQ(a.lane<2>(), 3.3f);
2054	EXPECT_EQ(a.lane<3>(), 4.4f);
2055	EXPECT_EQ(a.lane<4>(), 5.5f);
2056	EXPECT_EQ(a.lane<5>(), 6.6f);
2057	EXPECT_EQ(a.lane<6>(), 7.7f);
2058	EXPECT_EQ(a.lane<7>(), 8.8f);
2059}
2060
2061/** @brief Test copy vfloat8 load. */
2062TEST(vfloat8, CopyLoad)
2063{
2064	vfloat8 s(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f);
2065	vfloat8 a(s.m);
2066	EXPECT_EQ(a.lane<0>(), 1.1f);
2067	EXPECT_EQ(a.lane<1>(), 2.2f);
2068	EXPECT_EQ(a.lane<2>(), 3.3f);
2069	EXPECT_EQ(a.lane<3>(), 4.4f);
2070	EXPECT_EQ(a.lane<4>(), 5.5f);
2071	EXPECT_EQ(a.lane<5>(), 6.6f);
2072	EXPECT_EQ(a.lane<6>(), 7.7f);
2073	EXPECT_EQ(a.lane<7>(), 8.8f);
2074}
2075
2076/** @brief Test vfloat8 zero. */
2077TEST(vfloat8, Zero)
2078{
2079	vfloat8 a = vfloat8::zero();
2080	EXPECT_EQ(a.lane<0>(), 0.0f);
2081	EXPECT_EQ(a.lane<1>(), 0.0f);
2082	EXPECT_EQ(a.lane<2>(), 0.0f);
2083	EXPECT_EQ(a.lane<3>(), 0.0f);
2084	EXPECT_EQ(a.lane<4>(), 0.0f);
2085	EXPECT_EQ(a.lane<5>(), 0.0f);
2086	EXPECT_EQ(a.lane<6>(), 0.0f);
2087	EXPECT_EQ(a.lane<7>(), 0.0f);
2088}
2089
2090/** @brief Test vfloat8 load1. */
2091TEST(vfloat8, Load1)
2092{
2093	float s = 3.14f;
2094	vfloat8 a = vfloat8::load1(&s);
2095	EXPECT_EQ(a.lane<0>(), 3.14f);
2096	EXPECT_EQ(a.lane<1>(), 3.14f);
2097	EXPECT_EQ(a.lane<2>(), 3.14f);
2098	EXPECT_EQ(a.lane<3>(), 3.14f);
2099	EXPECT_EQ(a.lane<4>(), 3.14f);
2100	EXPECT_EQ(a.lane<5>(), 3.14f);
2101	EXPECT_EQ(a.lane<6>(), 3.14f);
2102	EXPECT_EQ(a.lane<7>(), 3.14f);
2103}
2104
2105/** @brief Test vfloat8 loada. */
2106TEST(vfloat8, Loada)
2107{
2108	vfloat8 a = vfloat8::loada(&(f32_data[0]));
2109	EXPECT_EQ(a.lane<0>(), 0.0f);
2110	EXPECT_EQ(a.lane<1>(), 1.0f);
2111	EXPECT_EQ(a.lane<2>(), 2.0f);
2112	EXPECT_EQ(a.lane<3>(), 3.0f);
2113	EXPECT_EQ(a.lane<4>(), 4.0f);
2114	EXPECT_EQ(a.lane<5>(), 5.0f);
2115	EXPECT_EQ(a.lane<6>(), 6.0f);
2116	EXPECT_EQ(a.lane<7>(), 7.0f);
2117}
2118
2119/** @brief Test vfloat8 lane_id. */
2120TEST(vfloat8, LaneID)
2121{
2122	vfloat8 a = vfloat8::lane_id();
2123	EXPECT_EQ(a.lane<0>(), 0.0f);
2124	EXPECT_EQ(a.lane<1>(), 1.0f);
2125	EXPECT_EQ(a.lane<2>(), 2.0f);
2126	EXPECT_EQ(a.lane<3>(), 3.0f);
2127	EXPECT_EQ(a.lane<4>(), 4.0f);
2128	EXPECT_EQ(a.lane<5>(), 5.0f);
2129	EXPECT_EQ(a.lane<6>(), 6.0f);
2130	EXPECT_EQ(a.lane<7>(), 7.0f);
2131}
2132
2133/** @brief Test vfloat8 add. */
2134TEST(vfloat8, vadd)
2135{
2136	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2137	vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2138	a = a + b;
2139	EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f);
2140	EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f);
2141	EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f);
2142	EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f);
2143	EXPECT_EQ(a.lane<4>(), 5.0f + 0.5f);
2144	EXPECT_EQ(a.lane<5>(), 6.0f + 0.6f);
2145	EXPECT_EQ(a.lane<6>(), 7.0f + 0.7f);
2146	EXPECT_EQ(a.lane<7>(), 8.0f + 0.8f);
2147}
2148
2149/** @brief Test vfloat8 sub. */
2150TEST(vfloat8, vsub)
2151{
2152	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2153	vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2154	a = a - b;
2155	EXPECT_EQ(a.lane<0>(), 1.0f - 0.1f);
2156	EXPECT_EQ(a.lane<1>(), 2.0f - 0.2f);
2157	EXPECT_EQ(a.lane<2>(), 3.0f - 0.3f);
2158	EXPECT_EQ(a.lane<3>(), 4.0f - 0.4f);
2159	EXPECT_EQ(a.lane<4>(), 5.0f - 0.5f);
2160	EXPECT_EQ(a.lane<5>(), 6.0f - 0.6f);
2161	EXPECT_EQ(a.lane<6>(), 7.0f - 0.7f);
2162	EXPECT_EQ(a.lane<7>(), 8.0f - 0.8f);
2163}
2164
2165/** @brief Test vfloat8 mul. */
2166TEST(vfloat8, vmul)
2167{
2168	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2169	vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2170	a = a * b;
2171	EXPECT_EQ(a.lane<0>(), 1.0f * 0.1f);
2172	EXPECT_EQ(a.lane<1>(), 2.0f * 0.2f);
2173	EXPECT_EQ(a.lane<2>(), 3.0f * 0.3f);
2174	EXPECT_EQ(a.lane<3>(), 4.0f * 0.4f);
2175	EXPECT_EQ(a.lane<4>(), 5.0f * 0.5f);
2176	EXPECT_EQ(a.lane<5>(), 6.0f * 0.6f);
2177	EXPECT_EQ(a.lane<6>(), 7.0f * 0.7f);
2178	EXPECT_EQ(a.lane<7>(), 8.0f * 0.8f);
2179}
2180
2181/** @brief Test vfloat8 mul. */
2182TEST(vfloat8, vsmul)
2183{
2184	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2185	float b = 3.14f;
2186	a = a * b;
2187	EXPECT_EQ(a.lane<0>(), 1.0f * 3.14f);
2188	EXPECT_EQ(a.lane<1>(), 2.0f * 3.14f);
2189	EXPECT_EQ(a.lane<2>(), 3.0f * 3.14f);
2190	EXPECT_EQ(a.lane<3>(), 4.0f * 3.14f);
2191	EXPECT_EQ(a.lane<4>(), 5.0f * 3.14f);
2192	EXPECT_EQ(a.lane<5>(), 6.0f * 3.14f);
2193	EXPECT_EQ(a.lane<6>(), 7.0f * 3.14f);
2194	EXPECT_EQ(a.lane<7>(), 8.0f * 3.14f);
2195}
2196
2197/** @brief Test vfloat8 mul. */
2198TEST(vfloat8, svmul)
2199{
2200	float a = 3.14f;
2201	vfloat8 b(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2202	b = a * b;
2203	EXPECT_EQ(b.lane<0>(), 3.14f * 1.0f);
2204	EXPECT_EQ(b.lane<1>(), 3.14f * 2.0f);
2205	EXPECT_EQ(b.lane<2>(), 3.14f * 3.0f);
2206	EXPECT_EQ(b.lane<3>(), 3.14f * 4.0f);
2207	EXPECT_EQ(b.lane<4>(), 3.14f * 5.0f);
2208	EXPECT_EQ(b.lane<5>(), 3.14f * 6.0f);
2209	EXPECT_EQ(b.lane<6>(), 3.14f * 7.0f);
2210	EXPECT_EQ(b.lane<7>(), 3.14f * 8.0f);
2211}
2212
2213/** @brief Test vfloat8 div. */
2214TEST(vfloat8, vdiv)
2215{
2216	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2217	vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2218	a = a / b;
2219	EXPECT_EQ(a.lane<0>(), 1.0f / 0.1f);
2220	EXPECT_EQ(a.lane<1>(), 2.0f / 0.2f);
2221	EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f);
2222	EXPECT_EQ(a.lane<3>(), 4.0f / 0.4f);
2223	EXPECT_EQ(a.lane<4>(), 5.0f / 0.5f);
2224	EXPECT_EQ(a.lane<5>(), 6.0f / 0.6f);
2225	EXPECT_EQ(a.lane<6>(), 7.0f / 0.7f);
2226	EXPECT_EQ(a.lane<7>(), 8.0f / 0.8f);
2227}
2228
2229/** @brief Test vfloat8 div. */
2230TEST(vfloat8, vsdiv)
2231{
2232	vfloat8 a(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2233	float b = 3.14f;
2234	vfloat8 r = a / b;
2235
2236	EXPECT_EQ(r.lane<0>(), 0.1f / 3.14f);
2237	EXPECT_EQ(r.lane<1>(), 0.2f / 3.14f);
2238	EXPECT_EQ(r.lane<2>(), 0.3f / 3.14f);
2239	EXPECT_EQ(r.lane<3>(), 0.4f / 3.14f);
2240	EXPECT_EQ(r.lane<4>(), 0.5f / 3.14f);
2241	EXPECT_EQ(r.lane<5>(), 0.6f / 3.14f);
2242	EXPECT_EQ(r.lane<6>(), 0.7f / 3.14f);
2243	EXPECT_EQ(r.lane<7>(), 0.8f / 3.14f);
2244}
2245
2246/** @brief Test vfloat8 div. */
2247TEST(vfloat8, svdiv)
2248{
2249	float a = 3.14f;
2250	vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2251	vfloat8 r = a / b;
2252
2253	EXPECT_EQ(r.lane<0>(), 3.14f / 0.1f);
2254	EXPECT_EQ(r.lane<1>(), 3.14f / 0.2f);
2255	EXPECT_EQ(r.lane<2>(), 3.14f / 0.3f);
2256	EXPECT_EQ(r.lane<3>(), 3.14f / 0.4f);
2257	EXPECT_EQ(r.lane<4>(), 3.14f / 0.5f);
2258	EXPECT_EQ(r.lane<5>(), 3.14f / 0.6f);
2259	EXPECT_EQ(r.lane<6>(), 3.14f / 0.7f);
2260	EXPECT_EQ(r.lane<7>(), 3.14f / 0.8f);
2261}
2262
2263/** @brief Test vfloat8 ceq. */
2264TEST(vfloat8, ceq)
2265{
2266	vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2267	vfloat8 b1(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2268	vmask8 r1 = a1 == b1;
2269	EXPECT_EQ(0u, mask(r1));
2270	EXPECT_EQ(false, any(r1));
2271	EXPECT_EQ(false, all(r1));
2272
2273	vfloat8 a2(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2274	vfloat8 b2(1.0f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2275	vmask8 r2 = a2 == b2;
2276	EXPECT_EQ(0x1u, mask(r2));
2277	EXPECT_EQ(true, any(r2));
2278	EXPECT_EQ(false, all(r2));
2279
2280	vfloat8 a3(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2281	vfloat8 b3(1.0f, 0.2f, 3.0f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2282	vmask8 r3 = a3 == b3;
2283	EXPECT_EQ(0x5u, mask(r3));
2284	EXPECT_EQ(true, any(r3));
2285	EXPECT_EQ(false, all(r3));
2286
2287	vfloat8 a4(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2288	vmask8 r4 = a4 == a4;
2289	EXPECT_EQ(0xFFu, mask(r4));
2290	EXPECT_EQ(true, any(r4));
2291	EXPECT_EQ(true, all(r4));
2292}
2293
2294/** @brief Test vfloat8 cne. */
2295TEST(vfloat8, cne)
2296{
2297	vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2298	vfloat8 b1(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2299	vmask8 r1 = a1 != b1;
2300	EXPECT_EQ(0xFFu, mask(r1));
2301	EXPECT_EQ(true, any(r1));
2302	EXPECT_EQ(true, all(r1));
2303
2304	vfloat8 a2(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2305	vfloat8 b2(1.0f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2306	vmask8 r2 = a2 != b2;
2307	EXPECT_EQ(0xFEu, mask(r2));
2308	EXPECT_EQ(true, any(r2));
2309	EXPECT_EQ(false, all(r2));
2310
2311	vfloat8 a3(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2312	vfloat8 b3(1.0f, 0.2f, 3.0f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2313	vmask8 r3 = a3 != b3;
2314	EXPECT_EQ(0xFAu, mask(r3));
2315	EXPECT_EQ(true, any(r3));
2316	EXPECT_EQ(false, all(r3));
2317
2318	vfloat8 a4(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2319	vmask8 r4 = a4 != a4;
2320	EXPECT_EQ(0u, mask(r4));
2321	EXPECT_EQ(false, any(r4));
2322	EXPECT_EQ(false, all(r4));
2323}
2324
2325/** @brief Test vfloat8 clt. */
2326TEST(vfloat8, clt)
2327{
2328	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2329	vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2330	vmask8 r = a < b;
2331	EXPECT_EQ(0xAAu, mask(r));
2332}
2333
2334/** @brief Test vfloat8 cle. */
2335TEST(vfloat8, cle)
2336{
2337	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2338	vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2339	vmask8 r = a <= b;
2340	EXPECT_EQ(0xEEu, mask(r));
2341}
2342
2343/** @brief Test vfloat8 cgt. */
2344TEST(vfloat8, cgt)
2345{
2346	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2347	vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2348	vmask8 r = a > b;
2349	EXPECT_EQ(0x11u, mask(r));
2350}
2351
2352/** @brief Test vfloat8 cge. */
2353TEST(vfloat8, cge)
2354{
2355	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2356	vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2357	vmask8 r = a >= b;
2358	EXPECT_EQ(0x55u, mask(r));
2359}
2360
2361/** @brief Test vfloat8 min. */
2362TEST(vfloat8, min)
2363{
2364	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2365	vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2366	vfloat8 r = min(a, b);
2367	EXPECT_EQ(r.lane<0>(), 0.9f);
2368	EXPECT_EQ(r.lane<1>(), 2.0f);
2369	EXPECT_EQ(r.lane<2>(), 3.0f);
2370	EXPECT_EQ(r.lane<3>(), 4.0f);
2371	EXPECT_EQ(r.lane<4>(), 0.9f);
2372	EXPECT_EQ(r.lane<5>(), 2.0f);
2373	EXPECT_EQ(r.lane<6>(), 3.0f);
2374	EXPECT_EQ(r.lane<7>(), 4.0f);
2375}
2376
2377/** @brief Test vfloat8 max. */
2378TEST(vfloat8, max)
2379{
2380	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2381	vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2382	vfloat8 r = max(a, b);
2383	EXPECT_EQ(r.lane<0>(), 1.0f);
2384	EXPECT_EQ(r.lane<1>(), 2.1f);
2385	EXPECT_EQ(r.lane<2>(), 3.0f);
2386	EXPECT_EQ(r.lane<3>(), 4.1f);
2387	EXPECT_EQ(r.lane<4>(), 1.0f);
2388	EXPECT_EQ(r.lane<5>(), 2.1f);
2389	EXPECT_EQ(r.lane<6>(), 3.0f);
2390	EXPECT_EQ(r.lane<7>(), 4.1f);
2391}
2392
2393/** @brief Test vfloat8 clamp. */
2394TEST(vfloat8, clamp)
2395{
2396	vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2397	vfloat8 r1 = clamp(2.1f, 3.0f, a1);
2398	EXPECT_EQ(r1.lane<0>(), 2.1f);
2399	EXPECT_EQ(r1.lane<1>(), 2.1f);
2400	EXPECT_EQ(r1.lane<2>(), 3.0f);
2401	EXPECT_EQ(r1.lane<3>(), 3.0f);
2402	EXPECT_EQ(r1.lane<4>(), 2.1f);
2403	EXPECT_EQ(r1.lane<5>(), 2.1f);
2404	EXPECT_EQ(r1.lane<6>(), 3.0f);
2405	EXPECT_EQ(r1.lane<7>(), 3.0f);
2406
2407	vfloat8 a2(1.0f, 2.0f, qnan, 4.0f, 1.0f, 2.0f, qnan, 4.0f);
2408	vfloat8 r2 = clamp(2.1f, 3.0f, a2);
2409	EXPECT_EQ(r2.lane<0>(), 2.1f);
2410	EXPECT_EQ(r2.lane<1>(), 2.1f);
2411	EXPECT_EQ(r2.lane<2>(), 2.1f);
2412	EXPECT_EQ(r2.lane<3>(), 3.0f);
2413	EXPECT_EQ(r2.lane<4>(), 2.1f);
2414	EXPECT_EQ(r2.lane<5>(), 2.1f);
2415	EXPECT_EQ(r2.lane<6>(), 2.1f);
2416	EXPECT_EQ(r2.lane<7>(), 3.0f);
2417}
2418
2419/** @brief Test vfloat8 clampz. */
2420TEST(vfloat8, clampz)
2421{
2422	vfloat8 a1(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f);
2423	vfloat8 r1 = clampz(3.0f, a1);
2424	EXPECT_EQ(r1.lane<0>(), 0.0f);
2425	EXPECT_EQ(r1.lane<1>(), 0.0f);
2426	EXPECT_EQ(r1.lane<2>(), 0.1f);
2427	EXPECT_EQ(r1.lane<3>(), 3.0f);
2428	EXPECT_EQ(r1.lane<4>(), 0.0f);
2429	EXPECT_EQ(r1.lane<5>(), 0.0f);
2430	EXPECT_EQ(r1.lane<6>(), 0.1f);
2431	EXPECT_EQ(r1.lane<7>(), 3.0f);
2432
2433	vfloat8 a2(-1.0f, 0.0f, qnan, 4.0f, -1.0f, 0.0f, qnan, 4.0f);
2434	vfloat8 r2 = clampz(3.0f, a2);
2435	EXPECT_EQ(r2.lane<0>(), 0.0f);
2436	EXPECT_EQ(r2.lane<1>(), 0.0f);
2437	EXPECT_EQ(r2.lane<2>(), 0.0f);
2438	EXPECT_EQ(r2.lane<3>(), 3.0f);
2439	EXPECT_EQ(r2.lane<4>(), 0.0f);
2440	EXPECT_EQ(r2.lane<5>(), 0.0f);
2441	EXPECT_EQ(r2.lane<6>(), 0.0f);
2442	EXPECT_EQ(r2.lane<7>(), 3.0f);
2443}
2444
2445/** @brief Test vfloat8 clampz. */
2446TEST(vfloat8, clampzo)
2447{
2448	vfloat8 a1(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f);
2449	vfloat8 r1 = clampzo(a1);
2450	EXPECT_EQ(r1.lane<0>(), 0.0f);
2451	EXPECT_EQ(r1.lane<1>(), 0.0f);
2452	EXPECT_EQ(r1.lane<2>(), 0.1f);
2453	EXPECT_EQ(r1.lane<3>(), 1.0f);
2454	EXPECT_EQ(r1.lane<4>(), 0.0f);
2455	EXPECT_EQ(r1.lane<5>(), 0.0f);
2456	EXPECT_EQ(r1.lane<6>(), 0.1f);
2457	EXPECT_EQ(r1.lane<7>(), 1.0f);
2458
2459	vfloat8 a2(-1.0f, 0.0f, qnan, 4.0f, -1.0f, 0.0f, qnan, 4.0f);
2460	vfloat8 r2 = clampzo(a2);
2461	EXPECT_EQ(r2.lane<0>(), 0.0f);
2462	EXPECT_EQ(r2.lane<1>(), 0.0f);
2463	EXPECT_EQ(r2.lane<2>(), 0.0f);
2464	EXPECT_EQ(r2.lane<3>(), 1.0f);
2465	EXPECT_EQ(r2.lane<4>(), 0.0f);
2466	EXPECT_EQ(r2.lane<5>(), 0.0f);
2467	EXPECT_EQ(r2.lane<6>(), 0.0f);
2468	EXPECT_EQ(r2.lane<7>(), 1.0f);
2469}
2470
2471/** @brief Test vfloat8 abs. */
2472TEST(vfloat8, abs)
2473{
2474	vfloat8 a(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f);
2475	vfloat8 r = abs(a);
2476	EXPECT_EQ(r.lane<0>(), 1.0f);
2477	EXPECT_EQ(r.lane<1>(), 0.0f);
2478	EXPECT_EQ(r.lane<2>(), 0.1f);
2479	EXPECT_EQ(r.lane<3>(), 4.0f);
2480	EXPECT_EQ(r.lane<4>(), 1.0f);
2481	EXPECT_EQ(r.lane<5>(), 0.0f);
2482	EXPECT_EQ(r.lane<6>(), 0.1f);
2483	EXPECT_EQ(r.lane<7>(), 4.0f);
2484}
2485
2486/** @brief Test vfloat8 round. */
2487TEST(vfloat8, round)
2488{
2489	vfloat8 a(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2490	vfloat8 r = round(a);
2491	EXPECT_EQ(r.lane<0>(), 1.0f);
2492	EXPECT_EQ(r.lane<1>(), 2.0f);
2493	EXPECT_EQ(r.lane<2>(), 2.0f);
2494	EXPECT_EQ(r.lane<3>(), 4.0f);
2495	EXPECT_EQ(r.lane<4>(), 1.0f);
2496	EXPECT_EQ(r.lane<5>(), 2.0f);
2497	EXPECT_EQ(r.lane<6>(), 2.0f);
2498	EXPECT_EQ(r.lane<7>(), 4.0f);
2499}
2500
2501/** @brief Test vfloat8 hmin. */
2502TEST(vfloat8, hmin)
2503{
2504	vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2505	vfloat8 r1 = hmin(a1);
2506	EXPECT_EQ(r1.lane<0>(), 1.1f);
2507	EXPECT_EQ(r1.lane<1>(), 1.1f);
2508	EXPECT_EQ(r1.lane<2>(), 1.1f);
2509	EXPECT_EQ(r1.lane<3>(), 1.1f);
2510	EXPECT_EQ(r1.lane<4>(), 1.1f);
2511	EXPECT_EQ(r1.lane<5>(), 1.1f);
2512	EXPECT_EQ(r1.lane<6>(), 1.1f);
2513	EXPECT_EQ(r1.lane<7>(), 1.1f);
2514
2515	vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2516	vfloat8 r2 = hmin(a2);
2517	EXPECT_EQ(r2.lane<0>(), 0.2f);
2518	EXPECT_EQ(r2.lane<1>(), 0.2f);
2519	EXPECT_EQ(r2.lane<2>(), 0.2f);
2520	EXPECT_EQ(r2.lane<3>(), 0.2f);
2521	EXPECT_EQ(r2.lane<4>(), 0.2f);
2522	EXPECT_EQ(r2.lane<5>(), 0.2f);
2523	EXPECT_EQ(r2.lane<6>(), 0.2f);
2524	EXPECT_EQ(r2.lane<7>(), 0.2f);
2525}
2526
2527/** @brief Test vfloat8 hmin_s. */
2528TEST(vfloat8, hmin_s)
2529{
2530	vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2531	float r1 = hmin_s(a1);
2532	EXPECT_EQ(r1, 1.1f);
2533
2534	vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2535	float r2 = hmin_s(a2);
2536	EXPECT_EQ(r2, 0.2f);
2537}
2538
2539/** @brief Test vfloat8 hmax. */
2540TEST(vfloat8, hmax)
2541{
2542	vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2543	vfloat8 r1 = hmax(a1);
2544	EXPECT_EQ(r1.lane<0>(), 4.0f);
2545	EXPECT_EQ(r1.lane<1>(), 4.0f);
2546	EXPECT_EQ(r1.lane<2>(), 4.0f);
2547	EXPECT_EQ(r1.lane<3>(), 4.0f);
2548	EXPECT_EQ(r1.lane<4>(), 4.0f);
2549	EXPECT_EQ(r1.lane<5>(), 4.0f);
2550	EXPECT_EQ(r1.lane<6>(), 4.0f);
2551	EXPECT_EQ(r1.lane<7>(), 4.0f);
2552
2553	vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2554	vfloat8 r2 = hmax(a2);
2555	EXPECT_EQ(r2.lane<0>(), 1.6f);
2556	EXPECT_EQ(r2.lane<1>(), 1.6f);
2557	EXPECT_EQ(r2.lane<2>(), 1.6f);
2558	EXPECT_EQ(r2.lane<3>(), 1.6f);
2559	EXPECT_EQ(r2.lane<4>(), 1.6f);
2560	EXPECT_EQ(r2.lane<5>(), 1.6f);
2561	EXPECT_EQ(r2.lane<6>(), 1.6f);
2562	EXPECT_EQ(r2.lane<7>(), 1.6f);
2563}
2564
2565/** @brief Test vfloat8 hmax_s. */
2566TEST(vfloat8, hmax_s)
2567{
2568	vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2569	float r1 = hmax_s(a1);
2570	EXPECT_EQ(r1, 4.0f);
2571
2572	vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2573	float r2 = hmax_s(a2);
2574	EXPECT_EQ(r2, 1.6f);
2575}
2576
2577/** @brief Test vfloat8 hadd_s. */
2578TEST(vfloat8, hadd_s)
2579{
2580	vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2581	float sum = 1.1f + 1.5f + 1.6f + 4.0f + 1.1f + 1.5f + 1.6f + 4.0f;
2582	float r = hadd_s(a1);
2583	EXPECT_NEAR(r, sum, 0.005f);
2584}
2585
2586/** @brief Test vfloat8 sqrt. */
2587TEST(vfloat8, sqrt)
2588{
2589	vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2590	vfloat8 r = sqrt(a);
2591	EXPECT_EQ(r.lane<0>(), std::sqrt(1.0f));
2592	EXPECT_EQ(r.lane<1>(), std::sqrt(2.0f));
2593	EXPECT_EQ(r.lane<2>(), std::sqrt(3.0f));
2594	EXPECT_EQ(r.lane<3>(), std::sqrt(4.0f));
2595	EXPECT_EQ(r.lane<4>(), std::sqrt(1.0f));
2596	EXPECT_EQ(r.lane<5>(), std::sqrt(2.0f));
2597	EXPECT_EQ(r.lane<6>(), std::sqrt(3.0f));
2598	EXPECT_EQ(r.lane<7>(), std::sqrt(4.0f));
2599}
2600
2601/** @brief Test vfloat8 select. */
2602TEST(vfloat8, select)
2603{
2604	vfloat8 m1(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f);
2605	vfloat8 m2(1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f);
2606	vmask8 cond = m1 == m2;
2607
2608	vfloat8 a(1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 3.0f, 3.0f, 1.0);
2609	vfloat8 b(4.0f, 2.0f, 2.0f, 4.0f, 4.0f, 2.0f, 2.0f, 4.0);
2610
2611	// Select in one direction
2612	vfloat8 r1 = select(a, b, cond);
2613	EXPECT_EQ(r1.lane<0>(), 4.0f);
2614	EXPECT_EQ(r1.lane<1>(), 3.0f);
2615	EXPECT_EQ(r1.lane<2>(), 2.0f);
2616	EXPECT_EQ(r1.lane<3>(), 1.0f);
2617	EXPECT_EQ(r1.lane<4>(), 4.0f);
2618	EXPECT_EQ(r1.lane<5>(), 3.0f);
2619	EXPECT_EQ(r1.lane<6>(), 2.0f);
2620	EXPECT_EQ(r1.lane<7>(), 1.0f);
2621
2622	// Select in the other
2623	vfloat8 r2 = select(b, a, cond);
2624	EXPECT_EQ(r2.lane<0>(), 1.0f);
2625	EXPECT_EQ(r2.lane<1>(), 2.0f);
2626	EXPECT_EQ(r2.lane<2>(), 3.0f);
2627	EXPECT_EQ(r2.lane<3>(), 4.0f);
2628	EXPECT_EQ(r2.lane<4>(), 1.0f);
2629	EXPECT_EQ(r2.lane<5>(), 2.0f);
2630	EXPECT_EQ(r2.lane<6>(), 3.0f);
2631	EXPECT_EQ(r2.lane<7>(), 4.0f);
2632}
2633
2634/** @brief Test vfloat8 select MSB only. */
2635TEST(vfloat8, select_msb)
2636{
2637	int msb_set = static_cast<int>(0x80000000);
2638	vint8 msb(msb_set, 0, msb_set, 0, msb_set, 0, msb_set, 0);
2639	vmask8 cond(msb.m);
2640
2641	vfloat8 a(1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 3.0f, 3.0f, 1.0f);
2642	vfloat8 b(4.0f, 2.0f, 2.0f, 4.0f, 4.0f, 2.0f, 2.0f, 4.0f);
2643
2644	// Select in one direction
2645	vfloat8 r1 = select(a, b, cond);
2646	EXPECT_EQ(r1.lane<0>(), 4.0f);
2647	EXPECT_EQ(r1.lane<1>(), 3.0f);
2648	EXPECT_EQ(r1.lane<2>(), 2.0f);
2649	EXPECT_EQ(r1.lane<3>(), 1.0f);
2650	EXPECT_EQ(r1.lane<4>(), 4.0f);
2651	EXPECT_EQ(r1.lane<5>(), 3.0f);
2652	EXPECT_EQ(r1.lane<6>(), 2.0f);
2653	EXPECT_EQ(r1.lane<7>(), 1.0f);
2654
2655	// Select in the other
2656	vfloat8 r2 = select(b, a, cond);
2657	EXPECT_EQ(r2.lane<0>(), 1.0f);
2658	EXPECT_EQ(r2.lane<1>(), 2.0f);
2659	EXPECT_EQ(r2.lane<2>(), 3.0f);
2660	EXPECT_EQ(r2.lane<3>(), 4.0f);
2661	EXPECT_EQ(r2.lane<4>(), 1.0f);
2662	EXPECT_EQ(r2.lane<5>(), 2.0f);
2663	EXPECT_EQ(r2.lane<6>(), 3.0f);
2664	EXPECT_EQ(r2.lane<7>(), 4.0f);
2665}
2666
2667/** @brief Test vfloat8 gatherf. */
2668TEST(vfloat8, gatherf)
2669{
2670	vint8 indices(0, 4, 3, 2, 7, 4, 3, 2);
2671	vfloat8 r = gatherf(f32_data, indices);
2672	EXPECT_EQ(r.lane<0>(), 0.0f);
2673	EXPECT_EQ(r.lane<1>(), 4.0f);
2674	EXPECT_EQ(r.lane<2>(), 3.0f);
2675	EXPECT_EQ(r.lane<3>(), 2.0f);
2676	EXPECT_EQ(r.lane<4>(), 7.0f);
2677	EXPECT_EQ(r.lane<5>(), 4.0f);
2678	EXPECT_EQ(r.lane<6>(), 3.0f);
2679	EXPECT_EQ(r.lane<7>(), 2.0f);
2680}
2681
2682/** @brief Test vfloat8 store. */
2683TEST(vfloat8, store)
2684{
2685	alignas(32) float out[9];
2686	vfloat8 a(f32_data);
2687	store(a, &(out[1]));
2688	EXPECT_EQ(out[1], 0.0f);
2689	EXPECT_EQ(out[2], 1.0f);
2690	EXPECT_EQ(out[3], 2.0f);
2691	EXPECT_EQ(out[4], 3.0f);
2692	EXPECT_EQ(out[5], 4.0f);
2693	EXPECT_EQ(out[6], 5.0f);
2694	EXPECT_EQ(out[7], 6.0f);
2695	EXPECT_EQ(out[8], 7.0f);
2696}
2697
2698/** @brief Test vfloat8 storea. */
2699TEST(vfloat8, storea)
2700{
2701	alignas(32) float out[9];
2702	vfloat8 a(f32_data);
2703	store(a, out);
2704	EXPECT_EQ(out[0], 0.0f);
2705	EXPECT_EQ(out[1], 1.0f);
2706	EXPECT_EQ(out[2], 2.0f);
2707	EXPECT_EQ(out[3], 3.0f);
2708	EXPECT_EQ(out[4], 4.0f);
2709	EXPECT_EQ(out[5], 5.0f);
2710	EXPECT_EQ(out[6], 6.0f);
2711	EXPECT_EQ(out[7], 7.0f);
2712}
2713
2714/** @brief Test vfloat8 float_to_int. */
2715TEST(vfloat8, float_to_int)
2716{
2717	vfloat8 a(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2718	vint8 r = float_to_int(a);
2719	EXPECT_EQ(r.lane<0>(), 1);
2720	EXPECT_EQ(r.lane<1>(), 1);
2721	EXPECT_EQ(r.lane<2>(), 1);
2722	EXPECT_EQ(r.lane<3>(), 4);
2723	EXPECT_EQ(r.lane<4>(), 1);
2724	EXPECT_EQ(r.lane<5>(), 1);
2725	EXPECT_EQ(r.lane<6>(), 1);
2726	EXPECT_EQ(r.lane<7>(), 4);
2727}
2728
2729// vint8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2730
2731/** @brief Test unaligned vint8 data load. */
2732TEST(vint8, UnalignedLoad)
2733{
2734	vint8 a(&(s32_data[1]));
2735	EXPECT_EQ(a.lane<0>(), 1);
2736	EXPECT_EQ(a.lane<1>(), 2);
2737	EXPECT_EQ(a.lane<2>(), 3);
2738	EXPECT_EQ(a.lane<3>(), 4);
2739	EXPECT_EQ(a.lane<4>(), 5);
2740	EXPECT_EQ(a.lane<5>(), 6);
2741	EXPECT_EQ(a.lane<6>(), 7);
2742	EXPECT_EQ(a.lane<7>(), 8);
2743}
2744
2745/** @brief Test unaligned vint8 data load. */
2746TEST(vint8, UnalignedLoad8)
2747{
2748	vint8 a(&(u8_data[1]));
2749	EXPECT_EQ(a.lane<0>(), 1);
2750	EXPECT_EQ(a.lane<1>(), 2);
2751	EXPECT_EQ(a.lane<2>(), 3);
2752	EXPECT_EQ(a.lane<3>(), 4);
2753	EXPECT_EQ(a.lane<4>(), 5);
2754	EXPECT_EQ(a.lane<5>(), 6);
2755	EXPECT_EQ(a.lane<6>(), 7);
2756	EXPECT_EQ(a.lane<7>(), 8);
2757}
2758
2759/** @brief Test scalar duplicated vint8 load. */
2760TEST(vint8, ScalarDupLoad)
2761{
2762	vint8 a(42);
2763	EXPECT_EQ(a.lane<0>(), 42);
2764	EXPECT_EQ(a.lane<1>(), 42);
2765	EXPECT_EQ(a.lane<2>(), 42);
2766	EXPECT_EQ(a.lane<3>(), 42);
2767	EXPECT_EQ(a.lane<4>(), 42);
2768	EXPECT_EQ(a.lane<5>(), 42);
2769	EXPECT_EQ(a.lane<6>(), 42);
2770	EXPECT_EQ(a.lane<7>(), 42);
2771}
2772
2773/** @brief Test scalar vint8 load. */
2774TEST(vint8, ScalarLoad)
2775{
2776	vint8 a(11, 22, 33, 44, 55, 66, 77, 88);
2777	EXPECT_EQ(a.lane<0>(), 11);
2778	EXPECT_EQ(a.lane<1>(), 22);
2779	EXPECT_EQ(a.lane<2>(), 33);
2780	EXPECT_EQ(a.lane<3>(), 44);
2781	EXPECT_EQ(a.lane<4>(), 55);
2782	EXPECT_EQ(a.lane<5>(), 66);
2783	EXPECT_EQ(a.lane<6>(), 77);
2784	EXPECT_EQ(a.lane<7>(), 88);
2785}
2786
2787/** @brief Test copy vint8 load. */
2788TEST(vint8, CopyLoad)
2789{
2790	vint8 s(11, 22, 33, 44, 55, 66, 77, 88);
2791	vint8 a(s.m);
2792	EXPECT_EQ(a.lane<0>(), 11);
2793	EXPECT_EQ(a.lane<1>(), 22);
2794	EXPECT_EQ(a.lane<2>(), 33);
2795	EXPECT_EQ(a.lane<3>(), 44);
2796	EXPECT_EQ(a.lane<4>(), 55);
2797	EXPECT_EQ(a.lane<5>(), 66);
2798	EXPECT_EQ(a.lane<6>(), 77);
2799	EXPECT_EQ(a.lane<7>(), 88);
2800}
2801
2802/** @brief Test vint8 zero. */
2803TEST(vint8, Zero)
2804{
2805	vint8 a = vint8::zero();
2806	EXPECT_EQ(a.lane<0>(), 0);
2807	EXPECT_EQ(a.lane<1>(), 0);
2808	EXPECT_EQ(a.lane<2>(), 0);
2809	EXPECT_EQ(a.lane<3>(), 0);
2810	EXPECT_EQ(a.lane<4>(), 0);
2811	EXPECT_EQ(a.lane<5>(), 0);
2812	EXPECT_EQ(a.lane<6>(), 0);
2813	EXPECT_EQ(a.lane<7>(), 0);
2814}
2815
2816/** @brief Test vint8 load1. */
2817TEST(vint8, Load1)
2818{
2819	int s = 42;
2820	vint8 a = vint8::load1(&s);
2821	EXPECT_EQ(a.lane<0>(), 42);
2822	EXPECT_EQ(a.lane<1>(), 42);
2823	EXPECT_EQ(a.lane<2>(), 42);
2824	EXPECT_EQ(a.lane<3>(), 42);
2825	EXPECT_EQ(a.lane<4>(), 42);
2826	EXPECT_EQ(a.lane<5>(), 42);
2827	EXPECT_EQ(a.lane<6>(), 42);
2828	EXPECT_EQ(a.lane<7>(), 42);
2829}
2830
2831/** @brief Test vint8 loada. */
2832TEST(vint8, Loada)
2833{
2834	vint8 a = vint8::loada(&(s32_data[0]));
2835	EXPECT_EQ(a.lane<0>(), 0);
2836	EXPECT_EQ(a.lane<1>(), 1);
2837	EXPECT_EQ(a.lane<2>(), 2);
2838	EXPECT_EQ(a.lane<3>(), 3);
2839	EXPECT_EQ(a.lane<4>(), 4);
2840	EXPECT_EQ(a.lane<5>(), 5);
2841	EXPECT_EQ(a.lane<6>(), 6);
2842	EXPECT_EQ(a.lane<7>(), 7);
2843}
2844
2845/** @brief Test vint8 lane_id. */
2846TEST(vint8, LaneID)
2847{
2848	vint8 a = vint8::lane_id();
2849	EXPECT_EQ(a.lane<0>(), 0);
2850	EXPECT_EQ(a.lane<1>(), 1);
2851	EXPECT_EQ(a.lane<2>(), 2);
2852	EXPECT_EQ(a.lane<3>(), 3);
2853	EXPECT_EQ(a.lane<4>(), 4);
2854	EXPECT_EQ(a.lane<5>(), 5);
2855	EXPECT_EQ(a.lane<6>(), 6);
2856	EXPECT_EQ(a.lane<7>(), 7);
2857}
2858
2859/** @brief Test vint8 add. */
2860TEST(vint8, vadd)
2861{
2862	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2863	vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2864	a = a + b;
2865	EXPECT_EQ(a.lane<0>(), 1 + 2);
2866	EXPECT_EQ(a.lane<1>(), 2 + 3);
2867	EXPECT_EQ(a.lane<2>(), 3 + 4);
2868	EXPECT_EQ(a.lane<3>(), 4 + 5);
2869	EXPECT_EQ(a.lane<4>(), 1 + 2);
2870	EXPECT_EQ(a.lane<5>(), 2 + 3);
2871	EXPECT_EQ(a.lane<6>(), 3 + 4);
2872	EXPECT_EQ(a.lane<7>(), 4 + 5);
2873}
2874
2875
2876/** @brief Test vint8 self-add. */
2877TEST(vint8, vselfadd1)
2878{
2879	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2880	vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2881	a += b;
2882
2883	EXPECT_EQ(a.lane<0>(), 1 + 2);
2884	EXPECT_EQ(a.lane<1>(), 2 + 3);
2885	EXPECT_EQ(a.lane<2>(), 3 + 4);
2886	EXPECT_EQ(a.lane<3>(), 4 + 5);
2887	EXPECT_EQ(a.lane<4>(), 1 + 2);
2888	EXPECT_EQ(a.lane<5>(), 2 + 3);
2889	EXPECT_EQ(a.lane<6>(), 3 + 4);
2890	EXPECT_EQ(a.lane<7>(), 4 + 5);
2891}
2892
2893/** @brief Test vint8 sub. */
2894TEST(vint8, vsub)
2895{
2896	vint8 a(1, 2, 4, 4, 1, 2, 4, 4);
2897	vint8 b(2, 3, 3, 5, 2, 3, 3, 5);
2898	a = a - b;
2899	EXPECT_EQ(a.lane<0>(), 1 - 2);
2900	EXPECT_EQ(a.lane<1>(), 2 - 3);
2901	EXPECT_EQ(a.lane<2>(), 4 - 3);
2902	EXPECT_EQ(a.lane<3>(), 4 - 5);
2903	EXPECT_EQ(a.lane<4>(), 1 - 2);
2904	EXPECT_EQ(a.lane<5>(), 2 - 3);
2905	EXPECT_EQ(a.lane<6>(), 4 - 3);
2906	EXPECT_EQ(a.lane<7>(), 4 - 5);
2907}
2908
2909/** @brief Test vint8 mul. */
2910TEST(vint8, vmul)
2911{
2912	vint8 a(1, 2, 4, 4, 1, 2, 4, 4);
2913	vint8 b(2, 3, 3, 5, 2, 3, 3, 5);
2914	a = a * b;
2915	EXPECT_EQ(a.lane<0>(), 1 * 2);
2916	EXPECT_EQ(a.lane<1>(), 2 * 3);
2917	EXPECT_EQ(a.lane<2>(), 4 * 3);
2918	EXPECT_EQ(a.lane<3>(), 4 * 5);
2919	EXPECT_EQ(a.lane<4>(), 1 * 2);
2920	EXPECT_EQ(a.lane<5>(), 2 * 3);
2921	EXPECT_EQ(a.lane<6>(), 4 * 3);
2922	EXPECT_EQ(a.lane<7>(), 4 * 5);
2923}
2924
2925/** @brief Test vint8 bitwise invert. */
2926TEST(vint8, bit_invert)
2927{
2928	vint8 a(-1, 0, 1, 2, -1, 0, 1, 2);
2929	a = ~a;
2930	EXPECT_EQ(a.lane<0>(), ~-1);
2931	EXPECT_EQ(a.lane<1>(), ~0);
2932	EXPECT_EQ(a.lane<2>(), ~1);
2933	EXPECT_EQ(a.lane<3>(), ~2);
2934	EXPECT_EQ(a.lane<4>(), ~-1);
2935	EXPECT_EQ(a.lane<5>(), ~0);
2936	EXPECT_EQ(a.lane<6>(), ~1);
2937	EXPECT_EQ(a.lane<7>(), ~2);
2938}
2939
2940/** @brief Test vint8 bitwise or. */
2941TEST(vint8, bit_vor)
2942{
2943	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2944	vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2945	a = a | b;
2946	EXPECT_EQ(a.lane<0>(), 3);
2947	EXPECT_EQ(a.lane<1>(), 3);
2948	EXPECT_EQ(a.lane<2>(), 7);
2949	EXPECT_EQ(a.lane<3>(), 5);
2950	EXPECT_EQ(a.lane<4>(), 3);
2951	EXPECT_EQ(a.lane<5>(), 3);
2952	EXPECT_EQ(a.lane<6>(), 7);
2953	EXPECT_EQ(a.lane<7>(), 5);
2954}
2955
2956/** @brief Test vint8 bitwise and. */
2957TEST(vint8, bit_vand)
2958{
2959	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2960	vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2961	a = a & b;
2962	EXPECT_EQ(a.lane<0>(), 0);
2963	EXPECT_EQ(a.lane<1>(), 2);
2964	EXPECT_EQ(a.lane<2>(), 0);
2965	EXPECT_EQ(a.lane<3>(), 4);
2966	EXPECT_EQ(a.lane<4>(), 0);
2967	EXPECT_EQ(a.lane<5>(), 2);
2968	EXPECT_EQ(a.lane<6>(), 0);
2969	EXPECT_EQ(a.lane<7>(), 4);
2970}
2971
2972/** @brief Test vint8 bitwise xor. */
2973TEST(vint8, bit_vxor)
2974{
2975	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2976	vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2977	a = a ^ b;
2978	EXPECT_EQ(a.lane<0>(), 3);
2979	EXPECT_EQ(a.lane<1>(), 1);
2980	EXPECT_EQ(a.lane<2>(), 7);
2981	EXPECT_EQ(a.lane<3>(), 1);
2982	EXPECT_EQ(a.lane<4>(), 3);
2983	EXPECT_EQ(a.lane<5>(), 1);
2984	EXPECT_EQ(a.lane<6>(), 7);
2985	EXPECT_EQ(a.lane<7>(), 1);
2986}
2987
2988/** @brief Test vint8 ceq. */
2989TEST(vint8, ceq)
2990{
2991	vint8 a1(1, 2, 3, 4, 1, 2, 3, 4);
2992	vint8 b1(0, 1, 2, 3, 0, 1, 2, 3);
2993	vmask8 r1 = a1 == b1;
2994	EXPECT_EQ(0u, mask(r1));
2995	EXPECT_EQ(false, any(r1));
2996	EXPECT_EQ(false, all(r1));
2997
2998	vint8 a2(1, 2, 3, 4, 1, 2, 3, 4);
2999	vint8 b2(1, 0, 0, 0, 1, 0, 0, 0);
3000	vmask8 r2 = a2 == b2;
3001	EXPECT_EQ(0x11u, mask(r2));
3002	EXPECT_EQ(true, any(r2));
3003	EXPECT_EQ(false, all(r2));
3004
3005	vint8 a3(1, 2, 3, 4, 1, 2, 3, 4);
3006	vint8 b3(1, 0, 3, 0, 1, 0, 3, 0);
3007	vmask8 r3 = a3 == b3;
3008	EXPECT_EQ(0x55u, mask(r3));
3009	EXPECT_EQ(true, any(r3));
3010	EXPECT_EQ(false, all(r3));
3011
3012	vint8 a4(1, 2, 3, 4, 1, 2, 3, 4);
3013	vmask8 r4 = a4 == a4;
3014	EXPECT_EQ(0xFFu, mask(r4));
3015	EXPECT_EQ(true, any(r4));
3016	EXPECT_EQ(true, all(r4));
3017}
3018
3019/** @brief Test vint8 cne. */
3020TEST(vint8, cne)
3021{
3022	vint8 a1(1, 2, 3, 4, 1, 2, 3, 4);
3023	vint8 b1(0, 1, 2, 3, 0, 1, 2, 3);
3024	vmask8 r1 = a1 != b1;
3025	EXPECT_EQ(0xFFu, mask(r1));
3026	EXPECT_EQ(true, any(r1));
3027	EXPECT_EQ(true, all(r1));
3028
3029	vint8 a2(1, 2, 3, 4, 1, 2, 3, 4);
3030	vint8 b2(1, 0, 0, 0, 1, 0, 0, 0);
3031	vmask8 r2 = a2 != b2;
3032	EXPECT_EQ(0xEEu, mask(r2));
3033	EXPECT_EQ(true, any(r2));
3034	EXPECT_EQ(false, all(r2));
3035
3036	vint8 a3(1, 2, 3, 4, 1, 2, 3, 4);
3037	vint8 b3(1, 0, 3, 0, 1, 0, 3, 0);
3038	vmask8 r3 = a3 != b3;
3039	EXPECT_EQ(0xAAu, mask(r3));
3040	EXPECT_EQ(true, any(r3));
3041	EXPECT_EQ(false, all(r3));
3042
3043	vint8 a4(1, 2, 3, 4, 1, 2, 3, 4);
3044	vmask8 r4 = a4 != a4;
3045	EXPECT_EQ(0u, mask(r4));
3046	EXPECT_EQ(false, any(r4));
3047	EXPECT_EQ(false, all(r4));
3048}
3049
3050/** @brief Test vint8 clt. */
3051TEST(vint8, clt)
3052{
3053	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
3054	vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
3055	vmask8 r = a < b;
3056	EXPECT_EQ(0xAAu, mask(r));
3057}
3058
3059/** @brief Test vint8 cgt. */
3060TEST(vint8, cgt)
3061{
3062	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
3063	vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
3064	vmask8 r = a > b;
3065	EXPECT_EQ(0x11u, mask(r));
3066}
3067
3068/** @brief Test vint8 min. */
3069TEST(vint8, min)
3070{
3071	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
3072	vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
3073	vint8 r = min(a, b);
3074	EXPECT_EQ(r.lane<0>(), 0);
3075	EXPECT_EQ(r.lane<1>(), 2);
3076	EXPECT_EQ(r.lane<2>(), 3);
3077	EXPECT_EQ(r.lane<3>(), 4);
3078	EXPECT_EQ(r.lane<4>(), 0);
3079	EXPECT_EQ(r.lane<5>(), 2);
3080	EXPECT_EQ(r.lane<6>(), 3);
3081	EXPECT_EQ(r.lane<7>(), 4);
3082}
3083
3084/** @brief Test vint8 max. */
3085TEST(vint8, max)
3086{
3087	vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
3088	vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
3089	vint8 r = max(a, b);
3090	EXPECT_EQ(r.lane<0>(), 1);
3091	EXPECT_EQ(r.lane<1>(), 3);
3092	EXPECT_EQ(r.lane<2>(), 3);
3093	EXPECT_EQ(r.lane<3>(), 5);
3094	EXPECT_EQ(r.lane<4>(), 1);
3095	EXPECT_EQ(r.lane<5>(), 3);
3096	EXPECT_EQ(r.lane<6>(), 3);
3097	EXPECT_EQ(r.lane<7>(), 5);
3098}
3099
3100/** @brief Test vint8 lsl. */
3101TEST(vint8, lsl)
3102{
3103	vint8 a(1, 2, 4, -4, 1, 2, 4, -4);
3104	a = lsl<0>(a);
3105	EXPECT_EQ(a.lane<0>(), 1);
3106	EXPECT_EQ(a.lane<1>(), 2);
3107	EXPECT_EQ(a.lane<2>(), 4);
3108	EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFFC));
3109	EXPECT_EQ(a.lane<4>(), 1);
3110	EXPECT_EQ(a.lane<5>(), 2);
3111	EXPECT_EQ(a.lane<6>(), 4);
3112	EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFFC));
3113
3114
3115	a = lsl<1>(a);
3116	EXPECT_EQ(a.lane<0>(), 2);
3117	EXPECT_EQ(a.lane<1>(), 4);
3118	EXPECT_EQ(a.lane<2>(), 8);
3119	EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFF8));
3120	EXPECT_EQ(a.lane<4>(), 2);
3121	EXPECT_EQ(a.lane<5>(), 4);
3122	EXPECT_EQ(a.lane<6>(), 8);
3123	EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFF8));
3124
3125	a = lsl<2>(a);
3126	EXPECT_EQ(a.lane<0>(), 8);
3127	EXPECT_EQ(a.lane<1>(), 16);
3128	EXPECT_EQ(a.lane<2>(), 32);
3129	EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFE0));
3130	EXPECT_EQ(a.lane<4>(), 8);
3131	EXPECT_EQ(a.lane<5>(), 16);
3132	EXPECT_EQ(a.lane<6>(), 32);
3133	EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFE0));
3134}
3135
3136/** @brief Test vint8 lsr. */
3137TEST(vint8, lsr)
3138{
3139	vint8 a(1, 2, 4, -4, 1, 2, 4, -4);
3140	a = lsr<0>(a);
3141	EXPECT_EQ(a.lane<0>(), 1);
3142	EXPECT_EQ(a.lane<1>(), 2);
3143	EXPECT_EQ(a.lane<2>(), 4);
3144	EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFFC));
3145	EXPECT_EQ(a.lane<4>(), 1);
3146	EXPECT_EQ(a.lane<5>(), 2);
3147	EXPECT_EQ(a.lane<6>(), 4);
3148	EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFFC));
3149
3150
3151	a = lsr<1>(a);
3152	EXPECT_EQ(a.lane<0>(), 0);
3153	EXPECT_EQ(a.lane<1>(), 1);
3154	EXPECT_EQ(a.lane<2>(), 2);
3155	EXPECT_EQ(a.lane<3>(), 0x7FFFFFFE);
3156	EXPECT_EQ(a.lane<4>(), 0);
3157	EXPECT_EQ(a.lane<5>(), 1);
3158	EXPECT_EQ(a.lane<6>(), 2);
3159	EXPECT_EQ(a.lane<7>(), 0x7FFFFFFE);
3160
3161	a = lsr<2>(a);
3162	EXPECT_EQ(a.lane<0>(),  0);
3163	EXPECT_EQ(a.lane<1>(),  0);
3164	EXPECT_EQ(a.lane<2>(),  0);
3165	EXPECT_EQ(a.lane<3>(),  0x1FFFFFFF);
3166	EXPECT_EQ(a.lane<4>(),  0);
3167	EXPECT_EQ(a.lane<5>(),  0);
3168	EXPECT_EQ(a.lane<6>(),  0);
3169	EXPECT_EQ(a.lane<7>(),  0x1FFFFFFF);
3170}
3171
3172/** @brief Test vint8 asr. */
3173TEST(vint8, asr)
3174{
3175	vint8 a(1, 2, 4, -4, 1, 2, 4, -4);
3176	a = asr<0>(a);
3177	EXPECT_EQ(a.lane<0>(),  1);
3178	EXPECT_EQ(a.lane<1>(),  2);
3179	EXPECT_EQ(a.lane<2>(),  4);
3180	EXPECT_EQ(a.lane<3>(), -4);
3181	EXPECT_EQ(a.lane<4>(),  1);
3182	EXPECT_EQ(a.lane<5>(),  2);
3183	EXPECT_EQ(a.lane<6>(),  4);
3184	EXPECT_EQ(a.lane<7>(), -4);
3185
3186	a = asr<1>(a);
3187	EXPECT_EQ(a.lane<0>(),  0);
3188	EXPECT_EQ(a.lane<1>(),  1);
3189	EXPECT_EQ(a.lane<2>(),  2);
3190	EXPECT_EQ(a.lane<3>(), -2);
3191	EXPECT_EQ(a.lane<4>(),  0);
3192	EXPECT_EQ(a.lane<5>(),  1);
3193	EXPECT_EQ(a.lane<6>(),  2);
3194	EXPECT_EQ(a.lane<7>(), -2);
3195
3196	// Note - quirk of asr is that you will get "stuck" at -1
3197	a = asr<2>(a);
3198	EXPECT_EQ(a.lane<0>(),  0);
3199	EXPECT_EQ(a.lane<1>(),  0);
3200	EXPECT_EQ(a.lane<2>(),  0);
3201	EXPECT_EQ(a.lane<3>(), -1);
3202	EXPECT_EQ(a.lane<4>(),  0);
3203	EXPECT_EQ(a.lane<5>(),  0);
3204	EXPECT_EQ(a.lane<6>(),  0);
3205	EXPECT_EQ(a.lane<7>(), -1);
3206}
3207
3208/** @brief Test vint8 hmin. */
3209TEST(vint8, hmin)
3210{
3211	vint8 a1(1, 2, 1, 2, 1, 2, 1, 2);
3212	vint8 r1 = hmin(a1);
3213	EXPECT_EQ(r1.lane<0>(), 1);
3214	EXPECT_EQ(r1.lane<1>(), 1);
3215	EXPECT_EQ(r1.lane<2>(), 1);
3216	EXPECT_EQ(r1.lane<3>(), 1);
3217	EXPECT_EQ(r1.lane<4>(), 1);
3218	EXPECT_EQ(r1.lane<5>(), 1);
3219	EXPECT_EQ(r1.lane<6>(), 1);
3220	EXPECT_EQ(r1.lane<7>(), 1);
3221
3222	vint8 a2(1, 2, -1, 5, 1, 2, -1, 5);
3223	vint8 r2 = hmin(a2);
3224	EXPECT_EQ(r2.lane<0>(), -1);
3225	EXPECT_EQ(r2.lane<1>(), -1);
3226	EXPECT_EQ(r2.lane<2>(), -1);
3227	EXPECT_EQ(r2.lane<3>(), -1);
3228	EXPECT_EQ(r2.lane<4>(), -1);
3229	EXPECT_EQ(r2.lane<5>(), -1);
3230	EXPECT_EQ(r2.lane<6>(), -1);
3231	EXPECT_EQ(r2.lane<7>(), -1);
3232}
3233
3234/** @brief Test vint8 hmax. */
3235TEST(vint8, hmax)
3236{
3237	vint8 a1(1, 2, 1, 2, 1, 3, 1, 2);
3238	vint8 r1 = hmax(a1);
3239	EXPECT_EQ(r1.lane<0>(), 3);
3240	EXPECT_EQ(r1.lane<1>(), 3);
3241	EXPECT_EQ(r1.lane<2>(), 3);
3242	EXPECT_EQ(r1.lane<3>(), 3);
3243	EXPECT_EQ(r1.lane<4>(), 3);
3244	EXPECT_EQ(r1.lane<5>(), 3);
3245	EXPECT_EQ(r1.lane<6>(), 3);
3246	EXPECT_EQ(r1.lane<7>(), 3);
3247
3248	vint8 a2(1, 2, -1, 5, 1, 2, -1, 5);
3249	vint8 r2 = hmax(a2);
3250	EXPECT_EQ(r2.lane<0>(), 5);
3251	EXPECT_EQ(r2.lane<1>(), 5);
3252	EXPECT_EQ(r2.lane<2>(), 5);
3253	EXPECT_EQ(r2.lane<3>(), 5);
3254	EXPECT_EQ(r2.lane<4>(), 5);
3255	EXPECT_EQ(r2.lane<5>(), 5);
3256	EXPECT_EQ(r2.lane<6>(), 5);
3257	EXPECT_EQ(r2.lane<7>(), 5);
3258}
3259
3260/** @brief Test vint8 storea. */
3261TEST(vint8, storea)
3262{
3263	alignas(32) int out[8];
3264	vint8 a(s32_data);
3265	storea(a, out);
3266	EXPECT_EQ(out[0], 0);
3267	EXPECT_EQ(out[1], 1);
3268	EXPECT_EQ(out[2], 2);
3269	EXPECT_EQ(out[3], 3);
3270	EXPECT_EQ(out[4], 4);
3271	EXPECT_EQ(out[5], 5);
3272	EXPECT_EQ(out[6], 6);
3273	EXPECT_EQ(out[7], 7);
3274}
3275
3276/** @brief Test vint8 store. */
3277TEST(vint8, store)
3278{
3279	alignas(32) int out[9];
3280	vint8 a(s32_data);
3281	store(a, out + 1);
3282	EXPECT_EQ(out[1], 0);
3283	EXPECT_EQ(out[2], 1);
3284	EXPECT_EQ(out[3], 2);
3285	EXPECT_EQ(out[4], 3);
3286	EXPECT_EQ(out[5], 4);
3287	EXPECT_EQ(out[6], 5);
3288	EXPECT_EQ(out[7], 6);
3289	EXPECT_EQ(out[8], 7);
3290}
3291
3292/** @brief Test vint8 store_nbytes. */
3293TEST(vint8, store_nbytes)
3294{
3295	alignas(32) int out[2];
3296	vint8 a(42, 314, 75, 90, 42, 314, 75, 90);
3297	store_nbytes(a, reinterpret_cast<uint8_t*>(&out));
3298	EXPECT_EQ(out[0], 42);
3299	EXPECT_EQ(out[1], 314);
3300}
3301
3302/** @brief Test vint8 store_lanes_masked. */
3303TEST(vint8, store_lanes_masked)
3304{
3305	uint8_t resulta[32] { 0 };
3306
3307	// Store nothing
3308	vmask8 mask1 = vint8(0) == vint8(1);
3309	vint8 data1 = vint8(1);
3310
3311	store_lanes_masked(resulta, data1, mask1);
3312	vint8 result1v = vint8::load(resulta);
3313	vint8 expect1v = vint8::zero();
3314	EXPECT_TRUE(all(result1v == expect1v));
3315
3316	// Store half
3317	vmask8 mask2 = vint8(1, 1, 1, 1, 0, 0, 0, 0) == vint8(1);
3318	vint8 data2 = vint8(2);
3319
3320	store_lanes_masked(resulta, data2, mask2);
3321	vint8 result2v = vint8::load(resulta);
3322	vint8 expect2v = vint8(2, 2, 2, 2, 0, 0, 0, 0);
3323	EXPECT_TRUE(all(result2v == expect2v));
3324
3325	// Store all
3326	vmask8 mask3 = vint8(1) == vint8(1);
3327	vint8 data3 = vint8(3);
3328
3329	store_lanes_masked(resulta, data3, mask3);
3330	vint8 result3v = vint8::load(resulta);
3331	vint8 expect3v = vint8(3);
3332	EXPECT_TRUE(all(result3v == expect3v));
3333}
3334
3335/** @brief Test vint8 store_lanes_masked to unaligned address. */
3336TEST(vint8, store_lanes_masked_unaligned)
3337{
3338	uint8_t resulta[33] { 0 };
3339
3340	// Store nothing
3341	vmask8 mask1 = vint8(0) == vint8(1);
3342	vint8 data1 = vint8(1);
3343
3344	store_lanes_masked(resulta + 1, data1, mask1);
3345	vint8 result1v = vint8::load(resulta + 1);
3346	vint8 expect1v = vint8::zero();
3347	EXPECT_TRUE(all(result1v == expect1v));
3348
3349	// Store half
3350	vmask8 mask2 = vint8(1, 1, 1, 1, 0, 0, 0, 0) == vint8(1);
3351	vint8 data2 = vint8(2);
3352
3353	store_lanes_masked(resulta + 1, data2, mask2);
3354	vint8 result2v = vint8::load(resulta + 1);
3355	vint8 expect2v = vint8(2, 2, 2, 2, 0, 0, 0, 0);
3356	EXPECT_TRUE(all(result2v == expect2v));
3357
3358	// Store all
3359	vmask8 mask3 = vint8(1) == vint8(1);
3360	vint8 data3 = vint8(3);
3361
3362	store_lanes_masked(resulta + 1, data3, mask3);
3363	vint8 result3v = vint8::load(resulta + 1);
3364	vint8 expect3v = vint8(3);
3365	EXPECT_TRUE(all(result3v == expect3v));
3366}
3367
3368/** @brief Test vint8 gatheri. */
3369TEST(vint8, gatheri)
3370{
3371	vint8 indices(0, 4, 3, 2, 7, 4, 3, 2);
3372	vint8 r = gatheri(s32_data, indices);
3373	EXPECT_EQ(r.lane<0>(), 0);
3374	EXPECT_EQ(r.lane<1>(), 4);
3375	EXPECT_EQ(r.lane<2>(), 3);
3376	EXPECT_EQ(r.lane<3>(), 2);
3377	EXPECT_EQ(r.lane<4>(), 7);
3378	EXPECT_EQ(r.lane<5>(), 4);
3379	EXPECT_EQ(r.lane<6>(), 3);
3380	EXPECT_EQ(r.lane<7>(), 2);
3381}
3382
3383/** @brief Test vint8 pack_low_bytes. */
3384TEST(vint8, pack_low_bytes)
3385{
3386	vint8 a(1, 2, 3, 4, 2, 3, 4, 5);
3387	vint8 r = pack_low_bytes(a);
3388	EXPECT_EQ(r.lane<0>(), (4 << 24) | (3 << 16) | (2  << 8) | (1 << 0));
3389	EXPECT_EQ(r.lane<1>(), (5 << 24) | (4 << 16) | (3  << 8) | (2 << 0));
3390}
3391
3392/** @brief Test vint8 select. */
3393TEST(vint8, select)
3394{
3395	vint8 m1(1, 1, 1, 1, 1, 1, 1, 1);
3396	vint8 m2(1, 2, 1, 2, 1, 2, 1, 2);
3397	vmask8 cond = m1 == m2;
3398
3399	vint8 a(1, 3, 3, 1, 1, 3, 3, 1);
3400	vint8 b(4, 2, 2, 4, 4, 2, 2, 4);
3401
3402	vint8 r1 = select(a, b, cond);
3403	EXPECT_EQ(r1.lane<0>(), 4);
3404	EXPECT_EQ(r1.lane<1>(), 3);
3405	EXPECT_EQ(r1.lane<2>(), 2);
3406	EXPECT_EQ(r1.lane<3>(), 1);
3407	EXPECT_EQ(r1.lane<4>(), 4);
3408	EXPECT_EQ(r1.lane<5>(), 3);
3409	EXPECT_EQ(r1.lane<6>(), 2);
3410	EXPECT_EQ(r1.lane<7>(), 1);
3411
3412	vint8 r2 = select(b, a, cond);
3413	EXPECT_EQ(r2.lane<0>(), 1);
3414	EXPECT_EQ(r2.lane<1>(), 2);
3415	EXPECT_EQ(r2.lane<2>(), 3);
3416	EXPECT_EQ(r2.lane<3>(), 4);
3417	EXPECT_EQ(r2.lane<4>(), 1);
3418	EXPECT_EQ(r2.lane<5>(), 2);
3419	EXPECT_EQ(r2.lane<6>(), 3);
3420	EXPECT_EQ(r2.lane<7>(), 4);
3421}
3422
3423// vmask8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3424
3425/** @brief Test vmask8 scalar literal constructor. */
3426TEST(vmask8, scalar_literal_construct)
3427{
3428	vfloat8 ma(0.0f);
3429	vfloat8 mb(1.0f);
3430
3431	vmask8 m1(true);
3432	vfloat8 r1 = select(ma, mb, m1);
3433	vmask8 rm1 = r1 == mb;
3434	EXPECT_EQ(all(rm1), true);
3435
3436	vmask8 m2(false);
3437	vfloat8 r2 = select(ma, mb, m2);
3438	vmask8 rm2 = r2 == mb;
3439	EXPECT_EQ(any(rm2), false);
3440}
3441
3442/** @brief Test vmask8 or. */
3443TEST(vmask8, or)
3444{
3445	vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3446	vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3447	vmask8 m1 = m1a == m1b;
3448
3449	vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0);
3450	vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1);
3451	vmask8 m2 = m2a == m2b;
3452
3453	vmask8 r = m1 | m2;
3454	EXPECT_EQ(mask(r), 0xBBu);
3455}
3456
3457/** @brief Test vmask8 and. */
3458TEST(vmask8, and)
3459{
3460	vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3461	vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3462	vmask8 m1 = m1a == m1b;
3463
3464	vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0);
3465	vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1);
3466	vmask8 m2 = m2a == m2b;
3467
3468	vmask8 r = m1 & m2;
3469	EXPECT_EQ(mask(r), 0x22u);
3470}
3471
3472/** @brief Test vmask8 xor. */
3473TEST(vmask8, xor)
3474{
3475	vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3476	vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3477	vmask8 m1 = m1a == m1b;
3478
3479	vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0);
3480	vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1);
3481	vmask8 m2 = m2a == m2b;
3482
3483	vmask8 r = m1 ^ m2;
3484	EXPECT_EQ(mask(r), 0x99u);
3485}
3486
3487/** @brief Test vmask8 not. */
3488TEST(vmask8, not)
3489{
3490	vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3491	vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3492	vmask8 m1 = m1a == m1b;
3493	vmask8 r = ~m1;
3494	EXPECT_EQ(mask(r), 0x55u);
3495}
3496
3497/** @brief Test vint8 table permute. */
3498TEST(vint8, vtable_8bt_32bi_32entry)
3499{
3500	vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
3501	vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);
3502
3503	vint8 table0p, table1p;
3504	vtable_prepare(table0, table1, table0p, table1p);
3505
3506	vint8 index(0, 7, 4, 15, 16, 20, 23, 31);
3507
3508	vint8 result = vtable_8bt_32bi(table0p, table1p, index);
3509
3510	EXPECT_EQ(result.lane<0>(),  3);
3511	EXPECT_EQ(result.lane<1>(),  4);
3512	EXPECT_EQ(result.lane<2>(),  7);
3513	EXPECT_EQ(result.lane<3>(), 12);
3514	EXPECT_EQ(result.lane<4>(), 19);
3515	EXPECT_EQ(result.lane<5>(), 23);
3516	EXPECT_EQ(result.lane<6>(), 20);
3517	EXPECT_EQ(result.lane<7>(), 28);
3518}
3519
3520/** @brief Test vint4 table permute. */
3521TEST(vint8, vtable_8bt_32bi_64entry)
3522{
3523	vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
3524	vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f);
3525	vint4 table2(0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f);
3526	vint4 table3(0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f);
3527
3528	vint8 table0p, table1p, table2p, table3p;
3529	vtable_prepare(table0, table1, table2, table3, table0p, table1p, table2p, table3p);
3530
3531	vint8 index(0, 7, 4, 15, 16, 20, 38, 63);
3532
3533	vint8 result = vtable_8bt_32bi(table0p, table1p, table2p, table3p, index);
3534
3535	EXPECT_EQ(result.lane<0>(),  3);
3536	EXPECT_EQ(result.lane<1>(),  4);
3537	EXPECT_EQ(result.lane<2>(),  7);
3538	EXPECT_EQ(result.lane<3>(), 12);
3539	EXPECT_EQ(result.lane<4>(), 19);
3540	EXPECT_EQ(result.lane<5>(), 23);
3541	EXPECT_EQ(result.lane<6>(), 37);
3542	EXPECT_EQ(result.lane<7>(), 60);
3543}
3544
3545#endif
3546
3547}
3548