1// SPDX-License-Identifier: Apache-2.0 2// ---------------------------------------------------------------------------- 3// Copyright 2020-2024 Arm Limited 4// 5// Licensed under the Apache License, Version 2.0 (the "License"); you may not 6// use this file except in compliance with the License. You may obtain a copy 7// of the License at: 8// 9// http://www.apache.org/licenses/LICENSE-2.0 10// 11// Unless required by applicable law or agreed to in writing, software 12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14// License for the specific language governing permissions and limitations 15// under the License. 16// ---------------------------------------------------------------------------- 17 18/** 19 * @brief Unit tests for the vectorized SIMD functionality. 20 */ 21 22#include <limits> 23 24#include "gtest/gtest.h" 25 26#include "../astcenc_internal.h" 27#include "../astcenc_vecmathlib.h" 28 29namespace astcenc 30{ 31 32// Misc utility tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 33 34static unsigned int round_down(unsigned int x) 35{ 36 unsigned int remainder = x % ASTCENC_SIMD_WIDTH; 37 return x - remainder; 38} 39 40static unsigned int round_up(unsigned int x) 41{ 42 unsigned int remainder = x % ASTCENC_SIMD_WIDTH; 43 if (!remainder) 44 { 45 return x; 46 } 47 48 return x - remainder + ASTCENC_SIMD_WIDTH; 49} 50 51/** @brief Test VLA loop limit round down. */ 52TEST(misc, RoundDownVLA) 53{ 54 // Static ones which are valid for all VLA widths 55 EXPECT_EQ(round_down_to_simd_multiple_vla(0), 0u); 56 EXPECT_EQ(round_down_to_simd_multiple_vla(8), 8u); 57 EXPECT_EQ(round_down_to_simd_multiple_vla(16), 16u); 58 59 // Variable ones which depend on VLA width 60 EXPECT_EQ(round_down_to_simd_multiple_vla(3), round_down(3)); 61 EXPECT_EQ(round_down_to_simd_multiple_vla(5), round_down(5)); 62 EXPECT_EQ(round_down_to_simd_multiple_vla(7), round_down(7)); 63 EXPECT_EQ(round_down_to_simd_multiple_vla(231), round_down(231)); 64} 65 66/** @brief Test VLA loop limit round up. */ 67TEST(misc, RoundUpVLA) 68{ 69 // Static ones which are valid for all VLA widths 70 EXPECT_EQ(round_up_to_simd_multiple_vla(0), 0u); 71 EXPECT_EQ(round_up_to_simd_multiple_vla(8), 8u); 72 EXPECT_EQ(round_up_to_simd_multiple_vla(16), 16u); 73 74 // Variable ones which depend on VLA width 75 EXPECT_EQ(round_up_to_simd_multiple_vla(3), round_up(3)); 76 EXPECT_EQ(round_up_to_simd_multiple_vla(5), round_up(5)); 77 EXPECT_EQ(round_up_to_simd_multiple_vla(7), round_up(7)); 78 EXPECT_EQ(round_up_to_simd_multiple_vla(231), round_up(231)); 79} 80 81#if ASTCENC_SIMD_WIDTH == 1 82 83// VLA (1-wide) tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 84 85/** @brief Test VLA change_sign. */ 86TEST(vfloat, ChangeSign) 87{ 88 vfloat a0(-1.0f); 89 vfloat b0(-1.0f); 90 vfloat r0 = change_sign(a0, b0); 91 EXPECT_EQ(r0.lane<0>(), 1.0f); 92 93 vfloat a1( 1.0f); 94 vfloat b1(-1.0f); 95 vfloat r1 = change_sign(a1, b1); 96 EXPECT_EQ(r1.lane<0>(), -1.0f); 97 98 vfloat a2(-3.12f); 99 vfloat b2( 3.12f); 100 vfloat r2 = change_sign(a2, b2); 101 EXPECT_EQ(r2.lane<0>(), -3.12f); 102 103 vfloat a3( 3.12f); 104 vfloat b3( 3.12f); 105 vfloat r3 = change_sign(a3, b3); 106 EXPECT_EQ(r3.lane<0>(), 3.12f); 107} 108 109/** @brief Test VLA atan. */ 110TEST(vfloat, Atan) 111{ 112 vfloat a0(-0.15f); 113 vfloat r0 = atan(a0); 114 EXPECT_NEAR(r0.lane<0>(), -0.149061f, 0.005f); 115 116 vfloat a1(0.0f); 117 vfloat r1 = atan(a1); 118 EXPECT_NEAR(r1.lane<0>(), 0.000000f, 0.005f); 119 120 vfloat a2(0.9f); 121 vfloat r2 = atan(a2); 122 EXPECT_NEAR(r2.lane<0>(), 0.733616f, 0.005f); 123 124 vfloat a3(2.1f); 125 vfloat r3 = atan(a3); 126 EXPECT_NEAR(r3.lane<0>(), 1.123040f, 0.005f); 127} 128 129/** @brief Test VLA atan2. */ 130TEST(vfloat, Atan2) 131{ 132 vfloat a0(-0.15f); 133 vfloat b0( 1.15f); 134 vfloat r0 = atan2(a0, b0); 135 EXPECT_NEAR(r0.lane<0>(), -0.129816f, 0.005f); 136 137 vfloat a1( 0.0f); 138 vfloat b1(-3.0f); 139 vfloat r1 = atan2(a1, b1); 140 EXPECT_NEAR(r1.lane<0>(), 3.141592f, 0.005f); 141 142 vfloat a2( 0.9f); 143 vfloat b2(-0.9f); 144 vfloat r2 = atan2(a2, b2); 145 EXPECT_NEAR(r2.lane<0>(), 2.360342f, 0.005f); 146 147 vfloat a3( 2.1f); 148 vfloat b3( 1.1f); 149 vfloat r3 = atan2(a3, b3); 150 EXPECT_NEAR(r3.lane<0>(), 1.084357f, 0.005f); 151} 152 153#elif ASTCENC_SIMD_WIDTH == 4 154 155// VLA (4-wide) tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 156 157/** @brief Test VLA change_sign. */ 158TEST(vfloat, ChangeSign) 159{ 160 vfloat a(-1.0f, 1.0f, -3.12f, 3.12f); 161 vfloat b(-1.0f, -1.0f, 3.12f, 3.12f); 162 vfloat r = change_sign(a, b); 163 EXPECT_EQ(r.lane<0>(), 1.0f); 164 EXPECT_EQ(r.lane<1>(), -1.0f); 165 EXPECT_EQ(r.lane<2>(), -3.12f); 166 EXPECT_EQ(r.lane<3>(), 3.12f); 167} 168 169/** @brief Test VLA atan. */ 170TEST(vfloat, Atan) 171{ 172 vfloat a(-0.15f, 0.0f, 0.9f, 2.1f); 173 vfloat r = atan(a); 174 EXPECT_NEAR(r.lane<0>(), -0.149061f, 0.005f); 175 EXPECT_NEAR(r.lane<1>(), 0.000000f, 0.005f); 176 EXPECT_NEAR(r.lane<2>(), 0.733616f, 0.005f); 177 EXPECT_NEAR(r.lane<3>(), 1.123040f, 0.005f); 178} 179 180/** @brief Test VLA atan2. */ 181TEST(vfloat, Atan2) 182{ 183 vfloat a(-0.15f, 0.0f, 0.9f, 2.1f); 184 vfloat b(1.15f, -3.0f, -0.9f, 1.1f); 185 vfloat r = atan2(a, b); 186 EXPECT_NEAR(r.lane<0>(), -0.129816f, 0.005f); 187 EXPECT_NEAR(r.lane<1>(), 3.141592f, 0.005f); 188 EXPECT_NEAR(r.lane<2>(), 2.360342f, 0.005f); 189 EXPECT_NEAR(r.lane<3>(), 1.084357f, 0.005f); 190} 191 192#elif ASTCENC_SIMD_WIDTH == 8 193 194// VLA (8-wide) tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 195 196/** @brief Test VLA change_sign. */ 197TEST(vfloat, ChangeSign) 198{ 199 vfloat a(-1.0f, 1.0f, -3.12f, 3.12f, -1.0f, 1.0f, -3.12f, 3.12f); 200 vfloat b(-1.0f, -1.0f, 3.12f, 3.12f, -1.0f, -1.0f, 3.12f, 3.12f); 201 vfloat r = change_sign(a, b); 202 EXPECT_EQ(r.lane<0>(), 1.0f); 203 EXPECT_EQ(r.lane<1>(), -1.0f); 204 EXPECT_EQ(r.lane<2>(), -3.12f); 205 EXPECT_EQ(r.lane<3>(), 3.12f); 206 EXPECT_EQ(r.lane<4>(), 1.0f); 207 EXPECT_EQ(r.lane<5>(), -1.0f); 208 EXPECT_EQ(r.lane<6>(), -3.12f); 209 EXPECT_EQ(r.lane<7>(), 3.12f); 210} 211 212/** @brief Test VLA atan. */ 213TEST(vfloat, Atan) 214{ 215 vfloat a(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f); 216 vfloat r = atan(a); 217 EXPECT_NEAR(r.lane<0>(), -0.149061f, 0.005f); 218 EXPECT_NEAR(r.lane<1>(), 0.000000f, 0.005f); 219 EXPECT_NEAR(r.lane<2>(), 0.733616f, 0.005f); 220 EXPECT_NEAR(r.lane<3>(), 1.123040f, 0.005f); 221 EXPECT_NEAR(r.lane<4>(), -0.149061f, 0.005f); 222 EXPECT_NEAR(r.lane<5>(), 0.000000f, 0.005f); 223 EXPECT_NEAR(r.lane<6>(), 0.733616f, 0.005f); 224 EXPECT_NEAR(r.lane<7>(), 1.123040f, 0.005f); 225} 226 227/** @brief Test VLA atan2. */ 228TEST(vfloat, Atan2) 229{ 230 vfloat a(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f); 231 vfloat b(1.15f, -3.0f, -0.9f, 1.1f, 1.15f, -3.0f, -0.9f, 1.1f); 232 vfloat r = atan2(a, b); 233 EXPECT_NEAR(r.lane<0>(), -0.129816f, 0.005f); 234 EXPECT_NEAR(r.lane<1>(), 3.141592f, 0.005f); 235 EXPECT_NEAR(r.lane<2>(), 2.360342f, 0.005f); 236 EXPECT_NEAR(r.lane<3>(), 1.084357f, 0.005f); 237 EXPECT_NEAR(r.lane<4>(), -0.129816f, 0.005f); 238 EXPECT_NEAR(r.lane<5>(), 3.141592f, 0.005f); 239 EXPECT_NEAR(r.lane<6>(), 2.360342f, 0.005f); 240 EXPECT_NEAR(r.lane<7>(), 1.084357f, 0.005f); 241} 242 243#endif 244 245static const float qnan = std::numeric_limits<float>::quiet_NaN(); 246 247alignas(32) static const float f32_data[9] { 248 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f 249}; 250 251alignas(32) static const int s32_data[9] { 252 0, 1, 2, 3, 4, 5 , 6, 7, 8 253}; 254 255alignas(32) static const uint8_t u8_data[9] { 256 0, 1, 2, 3, 4, 5 , 6, 7, 8 257}; 258 259// VFLOAT4 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 260 261/** @brief Test unaligned vfloat4 data load. */ 262TEST(vfloat4, UnalignedLoad) 263{ 264 vfloat4 a(&(f32_data[1])); 265 EXPECT_EQ(a.lane<0>(), 1.0f); 266 EXPECT_EQ(a.lane<1>(), 2.0f); 267 EXPECT_EQ(a.lane<2>(), 3.0f); 268 EXPECT_EQ(a.lane<3>(), 4.0f); 269} 270 271/** @brief Test scalar duplicated vfloat4 load. */ 272TEST(vfloat4, ScalarDupLoad) 273{ 274 vfloat4 a(1.1f); 275 EXPECT_EQ(a.lane<0>(), 1.1f); 276 EXPECT_EQ(a.lane<1>(), 1.1f); 277 EXPECT_EQ(a.lane<2>(), 1.1f); 278 EXPECT_EQ(a.lane<3>(), 1.1f); 279} 280 281/** @brief Test scalar vfloat4 load. */ 282TEST(vfloat4, ScalarLoad) 283{ 284 vfloat4 a(1.1f, 2.2f, 3.3f, 4.4f); 285 EXPECT_EQ(a.lane<0>(), 1.1f); 286 EXPECT_EQ(a.lane<1>(), 2.2f); 287 EXPECT_EQ(a.lane<2>(), 3.3f); 288 EXPECT_EQ(a.lane<3>(), 4.4f); 289} 290 291/** @brief Test copy vfloat4 load. */ 292TEST(vfloat4, CopyLoad) 293{ 294 vfloat4 s(1.1f, 2.2f, 3.3f, 4.4f); 295 vfloat4 a(s.m); 296 EXPECT_EQ(a.lane<0>(), 1.1f); 297 EXPECT_EQ(a.lane<1>(), 2.2f); 298 EXPECT_EQ(a.lane<2>(), 3.3f); 299 EXPECT_EQ(a.lane<3>(), 4.4f); 300} 301 302/** @brief Test vfloat4 scalar lane set. */ 303TEST(vfloat4, SetLane) 304{ 305 vfloat4 a(0.0f); 306 307 a.set_lane<0>(1.0f); 308 EXPECT_EQ(a.lane<0>(), 1.0f); 309 EXPECT_EQ(a.lane<1>(), 0.0f); 310 EXPECT_EQ(a.lane<2>(), 0.0f); 311 EXPECT_EQ(a.lane<3>(), 0.0f); 312 313 a.set_lane<1>(2.0f); 314 EXPECT_EQ(a.lane<0>(), 1.0f); 315 EXPECT_EQ(a.lane<1>(), 2.0f); 316 EXPECT_EQ(a.lane<2>(), 0.0f); 317 EXPECT_EQ(a.lane<3>(), 0.0f); 318 319 a.set_lane<2>(3.0f); 320 EXPECT_EQ(a.lane<0>(), 1.0f); 321 EXPECT_EQ(a.lane<1>(), 2.0f); 322 EXPECT_EQ(a.lane<2>(), 3.0f); 323 EXPECT_EQ(a.lane<3>(), 0.0f); 324 325 a.set_lane<3>(4.0f); 326 EXPECT_EQ(a.lane<0>(), 1.0f); 327 EXPECT_EQ(a.lane<1>(), 2.0f); 328 EXPECT_EQ(a.lane<2>(), 3.0f); 329 EXPECT_EQ(a.lane<3>(), 4.0f); 330} 331 332/** @brief Test vfloat4 zero. */ 333TEST(vfloat4, Zero) 334{ 335 vfloat4 a = vfloat4::zero(); 336 EXPECT_EQ(a.lane<0>(), 0.0f); 337 EXPECT_EQ(a.lane<1>(), 0.0f); 338 EXPECT_EQ(a.lane<2>(), 0.0f); 339 EXPECT_EQ(a.lane<3>(), 0.0f); 340} 341 342/** @brief Test vfloat4 load1. */ 343TEST(vfloat4, Load1) 344{ 345 float s = 3.14f; 346 vfloat4 a = vfloat4::load1(&s); 347 EXPECT_EQ(a.lane<0>(), 3.14f); 348 EXPECT_EQ(a.lane<1>(), 3.14f); 349 EXPECT_EQ(a.lane<2>(), 3.14f); 350 EXPECT_EQ(a.lane<3>(), 3.14f); 351} 352 353/** @brief Test vfloat4 loada. */ 354TEST(vfloat4, Loada) 355{ 356 vfloat4 a = vfloat4::loada(&(f32_data[0])); 357 EXPECT_EQ(a.lane<0>(), 0.0f); 358 EXPECT_EQ(a.lane<1>(), 1.0f); 359 EXPECT_EQ(a.lane<2>(), 2.0f); 360 EXPECT_EQ(a.lane<3>(), 3.0f); 361} 362 363/** @brief Test vfloat4 lane_id. */ 364TEST(vfloat4, LaneID) 365{ 366 vfloat4 a = vfloat4::lane_id(); 367 EXPECT_EQ(a.lane<0>(), 0.0f); 368 EXPECT_EQ(a.lane<1>(), 1.0f); 369 EXPECT_EQ(a.lane<2>(), 2.0f); 370 EXPECT_EQ(a.lane<3>(), 3.0f); 371} 372 373/** @brief Test vfloat4 swz to float4. */ 374TEST(vfloat4, swz4) 375{ 376 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 377 vfloat4 r = a.swz<0, 3, 2, 1>(); 378 EXPECT_EQ(r.lane<0>(), 1.0f); 379 EXPECT_EQ(r.lane<1>(), 4.0f); 380 EXPECT_EQ(r.lane<2>(), 3.0f); 381 EXPECT_EQ(r.lane<3>(), 2.0f); 382 383 r = a.swz<3, 1, 1, 0>(); 384 EXPECT_EQ(r.lane<0>(), 4.0f); 385 EXPECT_EQ(r.lane<1>(), 2.0f); 386 EXPECT_EQ(r.lane<2>(), 2.0f); 387 EXPECT_EQ(r.lane<3>(), 1.0f); 388} 389 390/** @brief Test vfloat4 swz to float3. */ 391TEST(vfloat4, swz3) 392{ 393 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 394 vfloat4 r = a.swz<0, 3, 2>(); 395 EXPECT_EQ(r.lane<0>(), 1.0f); 396 EXPECT_EQ(r.lane<1>(), 4.0f); 397 EXPECT_EQ(r.lane<2>(), 3.0f); 398 EXPECT_EQ(r.lane<3>(), 0.0f); 399 400 r = a.swz<3, 1, 1>(); 401 EXPECT_EQ(r.lane<0>(), 4.0f); 402 EXPECT_EQ(r.lane<1>(), 2.0f); 403 EXPECT_EQ(r.lane<2>(), 2.0f); 404 EXPECT_EQ(r.lane<3>(), 0.0f); 405} 406 407/** @brief Test vfloat4 swz to float2. */ 408TEST(vfloat4, swz2) 409{ 410 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 411 vfloat4 r = a.swz<0, 3>(); 412 EXPECT_EQ(r.lane<0>(), 1.0f); 413 EXPECT_EQ(r.lane<1>(), 4.0f); 414 415 r = a.swz<2, 1>(); 416 EXPECT_EQ(r.lane<0>(), 3.0f); 417 EXPECT_EQ(r.lane<1>(), 2.0f); 418} 419 420/** @brief Test vfloat4 add. */ 421TEST(vfloat4, vadd) 422{ 423 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 424 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f); 425 a = a + b; 426 EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f); 427 EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f); 428 EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f); 429 EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f); 430} 431 432/** @brief Test vfloat4 self-add. */ 433TEST(vfloat4, vselfadd1) 434{ 435 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 436 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f); 437 438 // Test increment by another variable 439 a += b; 440 EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f); 441 EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f); 442 EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f); 443 EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f); 444 445 // Test increment by an expression 446 a += b + b; 447 EXPECT_NEAR(a.lane<0>(), 1.0f + 0.3f, 0.001f); 448 EXPECT_NEAR(a.lane<1>(), 2.0f + 0.6f, 0.001f); 449 EXPECT_NEAR(a.lane<2>(), 3.0f + 0.9f, 0.001f); 450 EXPECT_NEAR(a.lane<3>(), 4.0f + 1.2f, 0.001f); 451} 452 453/** @brief Test vfloat4 sub. */ 454TEST(vfloat4, vsub) 455{ 456 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 457 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f); 458 a = a - b; 459 EXPECT_EQ(a.lane<0>(), 1.0f - 0.1f); 460 EXPECT_EQ(a.lane<1>(), 2.0f - 0.2f); 461 EXPECT_EQ(a.lane<2>(), 3.0f - 0.3f); 462 EXPECT_EQ(a.lane<3>(), 4.0f - 0.4f); 463} 464 465/** @brief Test vfloat4 mul. */ 466TEST(vfloat4, vmul) 467{ 468 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 469 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f); 470 a = a * b; 471 EXPECT_EQ(a.lane<0>(), 1.0f * 0.1f); 472 EXPECT_EQ(a.lane<1>(), 2.0f * 0.2f); 473 EXPECT_EQ(a.lane<2>(), 3.0f * 0.3f); 474 EXPECT_EQ(a.lane<3>(), 4.0f * 0.4f); 475} 476 477/** @brief Test vfloat4 mul. */ 478TEST(vfloat4, vsmul) 479{ 480 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 481 float b = 3.14f; 482 a = a * b; 483 EXPECT_EQ(a.lane<0>(), 1.0f * 3.14f); 484 EXPECT_EQ(a.lane<1>(), 2.0f * 3.14f); 485 EXPECT_EQ(a.lane<2>(), 3.0f * 3.14f); 486 EXPECT_EQ(a.lane<3>(), 4.0f * 3.14f); 487} 488 489/** @brief Test vfloat4 mul. */ 490TEST(vfloat4, svmul) 491{ 492 float a = 3.14f; 493 vfloat4 b(1.0f, 2.0f, 3.0f, 4.0f); 494 b = a * b; 495 EXPECT_EQ(b.lane<0>(), 3.14f * 1.0f); 496 EXPECT_EQ(b.lane<1>(), 3.14f * 2.0f); 497 EXPECT_EQ(b.lane<2>(), 3.14f * 3.0f); 498 EXPECT_EQ(b.lane<3>(), 3.14f * 4.0f); 499} 500 501/** @brief Test vfloat4 div. */ 502TEST(vfloat4, vdiv) 503{ 504 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 505 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f); 506 a = a / b; 507 EXPECT_EQ(a.lane<0>(), 1.0f / 0.1f); 508 EXPECT_EQ(a.lane<1>(), 2.0f / 0.2f); 509 EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f); 510 EXPECT_EQ(a.lane<3>(), 4.0f / 0.4f); 511} 512 513/** @brief Test vfloat4 div. */ 514TEST(vfloat4, vsdiv) 515{ 516 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 517 float b = 0.3f; 518 a = a / b; 519 EXPECT_EQ(a.lane<0>(), 1.0f / 0.3f); 520 EXPECT_EQ(a.lane<1>(), 2.0f / 0.3f); 521 EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f); 522 EXPECT_EQ(a.lane<3>(), 4.0f / 0.3f); 523} 524 525/** @brief Test vfloat4 div. */ 526TEST(vfloat4, svdiv) 527{ 528 float a = 3.0f; 529 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f); 530 b = a / b; 531 EXPECT_EQ(b.lane<0>(), 3.0f / 0.1f); 532 EXPECT_EQ(b.lane<1>(), 3.0f / 0.2f); 533 EXPECT_EQ(b.lane<2>(), 3.0f / 0.3f); 534 EXPECT_EQ(b.lane<3>(), 3.0f / 0.4f); 535} 536 537/** @brief Test vfloat4 ceq. */ 538TEST(vfloat4, ceq) 539{ 540 vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f); 541 vfloat4 b1(0.1f, 0.2f, 0.3f, 0.4f); 542 vmask4 r1 = a1 == b1; 543 EXPECT_EQ(0u, mask(r1)); 544 EXPECT_EQ(false, any(r1)); 545 EXPECT_EQ(false, all(r1)); 546 547 vfloat4 a2(1.0f, 2.0f, 3.0f, 4.0f); 548 vfloat4 b2(1.0f, 0.2f, 0.3f, 0.4f); 549 vmask4 r2 = a2 == b2; 550 EXPECT_EQ(0x1u, mask(r2)); 551 EXPECT_EQ(true, any(r2)); 552 EXPECT_EQ(false, all(r2)); 553 554 vfloat4 a3(1.0f, 2.0f, 3.0f, 4.0f); 555 vfloat4 b3(1.0f, 0.2f, 3.0f, 0.4f); 556 vmask4 r3 = a3 == b3; 557 EXPECT_EQ(0x5u, mask(r3)); 558 EXPECT_EQ(true, any(r3)); 559 EXPECT_EQ(false, all(r3)); 560 561 vfloat4 a4(1.0f, 2.0f, 3.0f, 4.0f); 562 vmask4 r4 = a4 == a4; 563 EXPECT_EQ(0xFu, mask(r4)); 564 EXPECT_EQ(true, any(r4)); 565 EXPECT_EQ(true, all(r4)); 566} 567 568/** @brief Test vfloat4 cne. */ 569TEST(vfloat4, cne) 570{ 571 vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f); 572 vfloat4 b1(0.1f, 0.2f, 0.3f, 0.4f); 573 vmask4 r1 = a1 != b1; 574 EXPECT_EQ(0xFu, mask(r1)); 575 EXPECT_EQ(true, any(r1)); 576 EXPECT_EQ(true, all(r1)); 577 578 vfloat4 a2(1.0f, 2.0f, 3.0f, 4.0f); 579 vfloat4 b2(1.0f, 0.2f, 0.3f, 0.4f); 580 vmask4 r2 = a2 != b2; 581 EXPECT_EQ(0xEu, mask(r2)); 582 EXPECT_EQ(true, any(r2)); 583 EXPECT_EQ(false, all(r2)); 584 585 vfloat4 a3(1.0f, 2.0f, 3.0f, 4.0f); 586 vfloat4 b3(1.0f, 0.2f, 3.0f, 0.4f); 587 vmask4 r3 = a3 != b3; 588 EXPECT_EQ(0xAu, mask(r3)); 589 EXPECT_EQ(true, any(r3)); 590 EXPECT_EQ(false, all(r3)); 591 592 vfloat4 a4(1.0f, 2.0f, 3.0f, 4.0f); 593 vmask4 r4 = a4 != a4; 594 EXPECT_EQ(0u, mask(r4)); 595 EXPECT_EQ(false, any(r4)); 596 EXPECT_EQ(false, all(r4)); 597} 598 599/** @brief Test vfloat4 clt. */ 600TEST(vfloat4, clt) 601{ 602 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 603 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f); 604 vmask4 r = a < b; 605 EXPECT_EQ(0xAu, mask(r)); 606} 607 608/** @brief Test vfloat4 cle. */ 609TEST(vfloat4, cle) 610{ 611 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 612 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f); 613 vmask4 r = a <= b; 614 EXPECT_EQ(0xEu, mask(r)); 615} 616 617/** @brief Test vfloat4 cgt. */ 618TEST(vfloat4, cgt) 619{ 620 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 621 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f); 622 vmask4 r = a > b; 623 EXPECT_EQ(0x1u, mask(r)); 624} 625 626/** @brief Test vfloat4 cge. */ 627TEST(vfloat4, cge) 628{ 629 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 630 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f); 631 vmask4 r = a >= b; 632 EXPECT_EQ(0x5u, mask(r)); 633} 634 635/** @brief Test vfloat4 min. */ 636TEST(vfloat4, min) 637{ 638 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 639 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f); 640 vfloat4 r = min(a, b); 641 EXPECT_EQ(r.lane<0>(), 0.9f); 642 EXPECT_EQ(r.lane<1>(), 2.0f); 643 EXPECT_EQ(r.lane<2>(), 3.0f); 644 EXPECT_EQ(r.lane<3>(), 4.0f); 645 646 float c = 0.3f; 647 r = min(a, c); 648 EXPECT_EQ(r.lane<0>(), 0.3f); 649 EXPECT_EQ(r.lane<1>(), 0.3f); 650 EXPECT_EQ(r.lane<2>(), 0.3f); 651 EXPECT_EQ(r.lane<3>(), 0.3f); 652 653 float d = 1.5f; 654 r = min(a, d); 655 EXPECT_EQ(r.lane<0>(), 1.0f); 656 EXPECT_EQ(r.lane<1>(), 1.5f); 657 EXPECT_EQ(r.lane<2>(), 1.5f); 658 EXPECT_EQ(r.lane<3>(), 1.5f); 659} 660 661/** @brief Test vfloat4 max. */ 662TEST(vfloat4, max) 663{ 664 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 665 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f); 666 vfloat4 r = max(a, b); 667 EXPECT_EQ(r.lane<0>(), 1.0f); 668 EXPECT_EQ(r.lane<1>(), 2.1f); 669 EXPECT_EQ(r.lane<2>(), 3.0f); 670 EXPECT_EQ(r.lane<3>(), 4.1f); 671 672 float c = 4.3f; 673 r = max(a, c); 674 EXPECT_EQ(r.lane<0>(), 4.3f); 675 EXPECT_EQ(r.lane<1>(), 4.3f); 676 EXPECT_EQ(r.lane<2>(), 4.3f); 677 EXPECT_EQ(r.lane<3>(), 4.3f); 678 679 float d = 1.5f; 680 r = max(a, d); 681 EXPECT_EQ(r.lane<0>(), 1.5f); 682 EXPECT_EQ(r.lane<1>(), 2.0f); 683 EXPECT_EQ(r.lane<2>(), 3.0f); 684 EXPECT_EQ(r.lane<3>(), 4.0f); 685} 686 687/** @brief Test vfloat4 clamp. */ 688TEST(vfloat4, clamp) 689{ 690 vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f); 691 vfloat4 r1 = clamp(2.1f, 3.0f, a1); 692 EXPECT_EQ(r1.lane<0>(), 2.1f); 693 EXPECT_EQ(r1.lane<1>(), 2.1f); 694 EXPECT_EQ(r1.lane<2>(), 3.0f); 695 EXPECT_EQ(r1.lane<3>(), 3.0f); 696 697 vfloat4 a2(1.0f, 2.0f, qnan, 4.0f); 698 vfloat4 r2 = clamp(2.1f, 3.0f, a2); 699 EXPECT_EQ(r2.lane<0>(), 2.1f); 700 EXPECT_EQ(r2.lane<1>(), 2.1f); 701 EXPECT_EQ(r2.lane<2>(), 2.1f); 702 EXPECT_EQ(r2.lane<3>(), 3.0f); 703} 704 705/** @brief Test vfloat4 clampz. */ 706TEST(vfloat4, clampz) 707{ 708 vfloat4 a1(-1.0f, 0.0f, 0.1f, 4.0f); 709 vfloat4 r1 = clampz(3.0f, a1); 710 EXPECT_EQ(r1.lane<0>(), 0.0f); 711 EXPECT_EQ(r1.lane<1>(), 0.0f); 712 EXPECT_EQ(r1.lane<2>(), 0.1f); 713 EXPECT_EQ(r1.lane<3>(), 3.0f); 714 715 vfloat4 a2(-1.0f, 0.0f, qnan, 4.0f); 716 vfloat4 r2 = clampz(3.0f, a2); 717 EXPECT_EQ(r2.lane<0>(), 0.0f); 718 EXPECT_EQ(r2.lane<1>(), 0.0f); 719 EXPECT_EQ(r2.lane<2>(), 0.0f); 720 EXPECT_EQ(r2.lane<3>(), 3.0f); 721} 722 723/** @brief Test vfloat4 clampz. */ 724TEST(vfloat4, clampzo) 725{ 726 vfloat4 a1(-1.0f, 0.0f, 0.1f, 4.0f); 727 vfloat4 r1 = clampzo(a1); 728 EXPECT_EQ(r1.lane<0>(), 0.0f); 729 EXPECT_EQ(r1.lane<1>(), 0.0f); 730 EXPECT_EQ(r1.lane<2>(), 0.1f); 731 EXPECT_EQ(r1.lane<3>(), 1.0f); 732 733 vfloat4 a2(-1.0f, 0.0f, qnan, 4.0f); 734 vfloat4 r2 = clampzo(a2); 735 EXPECT_EQ(r2.lane<0>(), 0.0f); 736 EXPECT_EQ(r2.lane<1>(), 0.0f); 737 EXPECT_EQ(r2.lane<2>(), 0.0f); 738 EXPECT_EQ(r2.lane<3>(), 1.0f); 739} 740 741/** @brief Test vfloat4 abs. */ 742TEST(vfloat4, abs) 743{ 744 vfloat4 a(-1.0f, 0.0f, 0.1f, 4.0f); 745 vfloat4 r = abs(a); 746 EXPECT_EQ(r.lane<0>(), 1.0f); 747 EXPECT_EQ(r.lane<1>(), 0.0f); 748 EXPECT_EQ(r.lane<2>(), 0.1f); 749 EXPECT_EQ(r.lane<3>(), 4.0f); 750} 751 752/** @brief Test vfloat4 round. */ 753TEST(vfloat4, round) 754{ 755 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f); 756 vfloat4 r1 = round(a1); 757 EXPECT_EQ(r1.lane<0>(), 1.0f); 758 EXPECT_EQ(r1.lane<1>(), 2.0f); 759 EXPECT_EQ(r1.lane<2>(), 2.0f); 760 EXPECT_EQ(r1.lane<3>(), 4.0f); 761 762 vfloat4 a2(-2.5f, -2.5f, -3.5f, -3.5f); 763 vfloat4 r2 = round(a2); 764 EXPECT_EQ(r2.lane<0>(), -2.0f); 765 EXPECT_EQ(r2.lane<2>(), -4.0f); 766} 767 768/** @brief Test vfloat4 hmin. */ 769TEST(vfloat4, hmin) 770{ 771 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f); 772 vfloat4 r1 = hmin(a1); 773 EXPECT_EQ(r1.lane<0>(), 1.1f); 774 EXPECT_EQ(r1.lane<1>(), 1.1f); 775 EXPECT_EQ(r1.lane<2>(), 1.1f); 776 EXPECT_EQ(r1.lane<3>(), 1.1f); 777 778 vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f); 779 vfloat4 r2 = hmin(a2); 780 EXPECT_EQ(r2.lane<0>(), 0.2f); 781 EXPECT_EQ(r2.lane<1>(), 0.2f); 782 EXPECT_EQ(r2.lane<2>(), 0.2f); 783 EXPECT_EQ(r2.lane<3>(), 0.2f); 784} 785 786/** @brief Test vfloat4 hmin_s. */ 787TEST(vfloat4, hmin_s) 788{ 789 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f); 790 float r1 = hmin_s(a1); 791 EXPECT_EQ(r1, 1.1f); 792 793 vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f); 794 float r2 = hmin_s(a2); 795 EXPECT_EQ(r2, 0.2f); 796} 797 798/** @brief Test vfloat4 hmin_rgb_s. */ 799TEST(vfloat4, hmin_rgb_s) 800{ 801 vfloat4 a1(1.1f, 1.5f, 1.6f, 0.2f); 802 float r1 = hmin_rgb_s(a1); 803 EXPECT_EQ(r1, 1.1f); 804 805 vfloat4 a2(1.5f, 0.9f, 1.6f, 1.2f); 806 float r2 = hmin_rgb_s(a2); 807 EXPECT_EQ(r2, 0.9f); 808} 809 810/** @brief Test vfloat4 hmax. */ 811TEST(vfloat4, hmax) 812{ 813 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f); 814 vfloat4 r1 = hmax(a1); 815 EXPECT_EQ(r1.lane<0>(), 4.0f); 816 EXPECT_EQ(r1.lane<1>(), 4.0f); 817 EXPECT_EQ(r1.lane<2>(), 4.0f); 818 EXPECT_EQ(r1.lane<3>(), 4.0f); 819 820 vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f); 821 vfloat4 r2 = hmax(a2); 822 EXPECT_EQ(r2.lane<0>(), 1.6f); 823 EXPECT_EQ(r2.lane<1>(), 1.6f); 824 EXPECT_EQ(r2.lane<2>(), 1.6f); 825 EXPECT_EQ(r2.lane<3>(), 1.6f); 826} 827 828/** @brief Test vfloat4 hmax_s. */ 829TEST(vfloat4, hmax_s) 830{ 831 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f); 832 float r1 = hmax_s(a1); 833 EXPECT_EQ(r1, 4.0f); 834 835 vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f); 836 float r2 = hmax_s(a2); 837 EXPECT_EQ(r2, 1.6f); 838} 839 840/** @brief Test vfloat4 hadd_s. */ 841TEST(vfloat4, hadd_s) 842{ 843 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f); 844 float sum = 1.1f + 1.5f + 1.6f + 4.0f; 845 float r = hadd_s(a1); 846 EXPECT_NEAR(r, sum, 0.005f); 847} 848 849/** @brief Test vfloat4 hadd_rgb_s. */ 850TEST(vfloat4, hadd_rgb_s) 851{ 852 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f); 853 float sum = 1.1f + 1.5f + 1.6f; 854 float r = hadd_rgb_s(a1); 855 EXPECT_NEAR(r, sum, 0.005f); 856} 857 858/** @brief Test vfloat4 sqrt. */ 859TEST(vfloat4, sqrt) 860{ 861 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 862 vfloat4 r = sqrt(a); 863 EXPECT_EQ(r.lane<0>(), std::sqrt(1.0f)); 864 EXPECT_EQ(r.lane<1>(), std::sqrt(2.0f)); 865 EXPECT_EQ(r.lane<2>(), std::sqrt(3.0f)); 866 EXPECT_EQ(r.lane<3>(), std::sqrt(4.0f)); 867} 868 869/** @brief Test vfloat4 select. */ 870TEST(vfloat4, select) 871{ 872 vfloat4 m1(1.0f, 1.0f, 1.0f, 1.0f); 873 vfloat4 m2(1.0f, 2.0f, 1.0f, 2.0f); 874 vmask4 cond = m1 == m2; 875 876 vfloat4 a(1.0f, 3.0f, 3.0f, 1.0f); 877 vfloat4 b(4.0f, 2.0f, 2.0f, 4.0f); 878 879 // Select in one direction 880 vfloat4 r1 = select(a, b, cond); 881 EXPECT_EQ(r1.lane<0>(), 4.0f); 882 EXPECT_EQ(r1.lane<1>(), 3.0f); 883 EXPECT_EQ(r1.lane<2>(), 2.0f); 884 EXPECT_EQ(r1.lane<3>(), 1.0f); 885 886 // Select in the other 887 vfloat4 r2 = select(b, a, cond); 888 EXPECT_EQ(r2.lane<0>(), 1.0f); 889 EXPECT_EQ(r2.lane<1>(), 2.0f); 890 EXPECT_EQ(r2.lane<2>(), 3.0f); 891 EXPECT_EQ(r2.lane<3>(), 4.0f); 892} 893 894/** @brief Test vfloat4 select MSB only. */ 895TEST(vfloat4, select_msb) 896{ 897 int msb_set = static_cast<int>(0x80000000); 898 vint4 msb(msb_set, 0, msb_set, 0); 899 vmask4 cond(msb.m); 900 901 vfloat4 a(1.0f, 3.0f, 3.0f, 1.0f); 902 vfloat4 b(4.0f, 2.0f, 2.0f, 4.0f); 903 904 // Select in one direction 905 vfloat4 r1 = select_msb(a, b, cond); 906 EXPECT_EQ(r1.lane<0>(), 4.0f); 907 EXPECT_EQ(r1.lane<1>(), 3.0f); 908 EXPECT_EQ(r1.lane<2>(), 2.0f); 909 EXPECT_EQ(r1.lane<3>(), 1.0f); 910 911 // Select in the other 912 vfloat4 r2 = select_msb(b, a, cond); 913 EXPECT_EQ(r2.lane<0>(), 1.0f); 914 EXPECT_EQ(r2.lane<1>(), 2.0f); 915 EXPECT_EQ(r2.lane<2>(), 3.0f); 916 EXPECT_EQ(r2.lane<3>(), 4.0f); 917} 918 919/** @brief Test vfloat4 gatherf. */ 920TEST(vfloat4, gatherf) 921{ 922 vint4 indices(0, 4, 3, 2); 923 vfloat4 r = gatherf(f32_data, indices); 924 EXPECT_EQ(r.lane<0>(), 0.0f); 925 EXPECT_EQ(r.lane<1>(), 4.0f); 926 EXPECT_EQ(r.lane<2>(), 3.0f); 927 EXPECT_EQ(r.lane<3>(), 2.0f); 928} 929 930/** @brief Test vfloat4 storea. */ 931TEST(vfloat4, storea) 932{ 933 ASTCENC_ALIGNAS float out[4]; 934 vfloat4 a(f32_data); 935 storea(a, out); 936 EXPECT_EQ(out[0], 0.0f); 937 EXPECT_EQ(out[1], 1.0f); 938 EXPECT_EQ(out[2], 2.0f); 939 EXPECT_EQ(out[3], 3.0f); 940} 941 942/** @brief Test vfloat4 store. */ 943TEST(vfloat4, store) 944{ 945 ASTCENC_ALIGNAS float out[5]; 946 vfloat4 a(f32_data); 947 store(a, &(out[1])); 948 EXPECT_EQ(out[1], 0.0f); 949 EXPECT_EQ(out[2], 1.0f); 950 EXPECT_EQ(out[3], 2.0f); 951 EXPECT_EQ(out[4], 3.0f); 952} 953 954/** @brief Test vfloat4 dot. */ 955TEST(vfloat4, dot) 956{ 957 vfloat4 a1(1.0f, 2.0f, 4.0f, 8.0f); 958 vfloat4 b1(1.0f, 0.5f, 0.25f, 0.125f); 959 vfloat4 r1 = dot(a1, b1); 960 EXPECT_EQ(r1.lane<0>(), 4.0f); 961 EXPECT_EQ(r1.lane<1>(), 4.0f); 962 EXPECT_EQ(r1.lane<2>(), 4.0f); 963 EXPECT_EQ(r1.lane<3>(), 4.0f); 964 965 // These values will fail to add to the same value if reassociated 966 float l0 = 141.2540435791015625f; 967 float l1 = 5345345.5000000000000000f; 968 float l2 = 234234.7031250000000000f; 969 float l3 = 124353454080.0000000000000000f; 970 971 vfloat4 a2(1.0f, 1.0f, 1.0f, 1.0f); 972 vfloat4 b2(l0, l1, l2, l3); 973 vfloat4 r2 = dot(a2, b2); 974 975 // Test that reassociation causes a failure with the numbers we chose 976 EXPECT_FALSE(any(r2 == vfloat4(l0 + l1 + l2 + l3))); 977 978 // Test that the sum works, for the association pattern we want used 979 EXPECT_TRUE(all(r2 == vfloat4((l0 + l2) + (l1 + l3)))); 980} 981 982/** @brief Test vfloat4 dot_s. */ 983TEST(vfloat4, dot_s) 984{ 985 vfloat4 a1(1.0f, 2.0f, 4.0f, 8.0f); 986 vfloat4 b1(1.0f, 0.5f, 0.25f, 0.125f); 987 float r1 = dot_s(a1, b1); 988 EXPECT_EQ(r1, 4.0f); 989 990 // These values will fail to add to the same value if reassociated 991 float l0 = 141.2540435791015625f; 992 float l1 = 5345345.5000000000000000f; 993 float l2 = 234234.7031250000000000f; 994 float l3 = 124353454080.0000000000000000f; 995 996 vfloat4 a2(1.0f, 1.0f, 1.0f, 1.0f); 997 vfloat4 b2(l0, l1, l2, l3); 998 float r2 = dot_s(a2, b2); 999 1000 // Test that reassociation causes a failure with the numbers we chose 1001 EXPECT_NE(r2, l0 + l1 + l2 + l3); 1002 1003 // Test that the sum works, for the association pattern we want used 1004 EXPECT_EQ(r2, (l0 + l2) + (l1 + l3)); 1005} 1006 1007/** @brief Test vfloat4 dot3. */ 1008TEST(vfloat4, dot3) 1009{ 1010 vfloat4 a(1.0f, 2.0f, 4.0f, 8.0f); 1011 vfloat4 b(1.0f, 0.5f, 0.25f, 0.125f); 1012 vfloat4 r = dot3(a, b); 1013 EXPECT_EQ(r.lane<0>(), 3.0f); 1014 EXPECT_EQ(r.lane<1>(), 3.0f); 1015 EXPECT_EQ(r.lane<2>(), 3.0f); 1016 EXPECT_EQ(r.lane<3>(), 0.0f); 1017} 1018 1019/** @brief Test vfloat4 dot3_s. */ 1020TEST(vfloat4, dot3_s) 1021{ 1022 vfloat4 a(1.0f, 2.0f, 4.0f, 8.0f); 1023 vfloat4 b(1.0f, 0.5f, 0.25f, 0.125f); 1024 float r = dot3_s(a, b); 1025 EXPECT_EQ(r, 3.0f); 1026} 1027 1028/** @brief Test vfloat4 normalize. */ 1029TEST(vfloat4, normalize) 1030{ 1031 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f); 1032 vfloat4 r = normalize(a); 1033 EXPECT_NEAR(r.lane<0>(), 1.0f / astc::sqrt(30.0f), 0.0005f); 1034 EXPECT_NEAR(r.lane<1>(), 2.0f / astc::sqrt(30.0f), 0.0005f); 1035 EXPECT_NEAR(r.lane<2>(), 3.0f / astc::sqrt(30.0f), 0.0005f); 1036 EXPECT_NEAR(r.lane<3>(), 4.0f / astc::sqrt(30.0f), 0.0005f); 1037} 1038 1039/** @brief Test vfloat4 normalize_safe. */ 1040TEST(vfloat4, normalize_safe) 1041{ 1042 vfloat4 s(-1.0f, -1.0f, -1.0f, -1.0f); 1043 1044 vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f); 1045 vfloat4 r1 = normalize_safe(a1, s); 1046 EXPECT_NEAR(r1.lane<0>(), 1.0f / astc::sqrt(30.0f), 0.0005f); 1047 EXPECT_NEAR(r1.lane<1>(), 2.0f / astc::sqrt(30.0f), 0.0005f); 1048 EXPECT_NEAR(r1.lane<2>(), 3.0f / astc::sqrt(30.0f), 0.0005f); 1049 EXPECT_NEAR(r1.lane<3>(), 4.0f / astc::sqrt(30.0f), 0.0005f); 1050 1051 vfloat4 a2(0.0f, 0.0f, 0.0f, 0.0f); 1052 vfloat4 r2 = normalize_safe(a2, s); 1053 EXPECT_EQ(r2.lane<0>(), -1.0f); 1054 EXPECT_EQ(r2.lane<1>(), -1.0f); 1055 EXPECT_EQ(r2.lane<2>(), -1.0f); 1056 EXPECT_EQ(r2.lane<3>(), -1.0f); 1057} 1058 1059/** @brief Test vfloat4 float_to_int. */ 1060TEST(vfloat4, float_to_int) 1061{ 1062 vfloat4 a(1.1f, 1.5f, -1.6f, 4.0f); 1063 vint4 r = float_to_int(a); 1064 EXPECT_EQ(r.lane<0>(), 1); 1065 EXPECT_EQ(r.lane<1>(), 1); 1066 EXPECT_EQ(r.lane<2>(), -1); 1067 EXPECT_EQ(r.lane<3>(), 4); 1068} 1069 1070/** @brief Test vfloat4 round. */ 1071TEST(vfloat4, float_to_int_rtn) 1072{ 1073 vfloat4 a(1.1f, 1.5f, 1.6f, 4.0f); 1074 vint4 r = float_to_int_rtn(a); 1075 EXPECT_EQ(r.lane<0>(), 1); 1076 EXPECT_EQ(r.lane<1>(), 2); 1077 EXPECT_EQ(r.lane<2>(), 2); 1078 EXPECT_EQ(r.lane<3>(), 4); 1079} 1080 1081/** @brief Test vfloat4 round. */ 1082TEST(vfloat4, int_to_float) 1083{ 1084 vint4 a(1, 2, 3, 4); 1085 vfloat4 r = int_to_float(a); 1086 EXPECT_EQ(r.lane<0>(), 1.0f); 1087 EXPECT_EQ(r.lane<1>(), 2.0f); 1088 EXPECT_EQ(r.lane<2>(), 3.0f); 1089 EXPECT_EQ(r.lane<3>(), 4.0f); 1090} 1091 1092/** @brief Test vfloat4 float to fp16 conversion. */ 1093TEST(vfloat4, float_to_float16) 1094{ 1095 vfloat4 a(1.5, 234.5, 345345.0, qnan); 1096 vint4 r = float_to_float16(a); 1097 1098 // Normal numbers 1099 EXPECT_EQ(r.lane<0>(), 0x3E00); 1100 EXPECT_EQ(r.lane<1>(), 0x5B54); 1101 1102 // Large numbers convert to infinity 1103 EXPECT_EQ(r.lane<2>(), 0x7C00); 1104 1105 // NaN must convert to any valid NaN encoding 1106 EXPECT_EQ((r.lane<3>() >> 10) & 0x1F, 0x1F); // Exponent must be all 1s 1107 EXPECT_NE(r.lane<3>() & (0x3FF), 0); // Mantissa must be non-zero 1108} 1109 1110/** @brief Test float to fp16 conversion. */ 1111TEST(sfloat, float_to_float16) 1112{ 1113 int r = float_to_float16(234.5); 1114 EXPECT_EQ(r, 0x5B54); 1115} 1116 1117/** @brief Test vfloat4 fp16 to float conversion. */ 1118TEST(vfloat4, float16_to_float) 1119{ vint4 a(0x3E00, 0x5B54, 0x7C00, 0xFFFF); 1120 vfloat4 r = float16_to_float(a); 1121 1122 // Normal numbers 1123 EXPECT_EQ(r.lane<0>(), 1.5); 1124 EXPECT_EQ(r.lane<1>(), 234.5); 1125 1126 // Infinities must be preserved 1127 EXPECT_NE(std::isinf(r.lane<2>()), 0); 1128 1129 // NaNs must be preserved 1130 EXPECT_NE(std::isnan(r.lane<3>()), 0); 1131} 1132 1133/** @brief Test fp16 to float conversion. */ 1134TEST(sfloat, float16_to_float) 1135{ 1136 float r = float16_to_float(0x5B54); 1137 EXPECT_EQ(r, 234.5); 1138} 1139 1140// VINT4 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1141 1142/** @brief Test unaligned vint4 data load. */ 1143TEST(vint4, UnalignedLoad) 1144{ 1145 vint4 a(&(s32_data[1])); 1146 EXPECT_EQ(a.lane<0>(), 1); 1147 EXPECT_EQ(a.lane<1>(), 2); 1148 EXPECT_EQ(a.lane<2>(), 3); 1149 EXPECT_EQ(a.lane<3>(), 4); 1150} 1151 1152/** @brief Test unaligned vint4 data load. */ 1153TEST(vint4, UnalignedLoad8) 1154{ 1155 vint4 a(&(u8_data[1])); 1156 EXPECT_EQ(a.lane<0>(), 1); 1157 EXPECT_EQ(a.lane<1>(), 2); 1158 EXPECT_EQ(a.lane<2>(), 3); 1159 EXPECT_EQ(a.lane<3>(), 4); 1160} 1161 1162/** @brief Test scalar duplicated vint4 load. */ 1163TEST(vint4, ScalarDupLoad) 1164{ 1165 vint4 a(42); 1166 EXPECT_EQ(a.lane<0>(), 42); 1167 EXPECT_EQ(a.lane<1>(), 42); 1168 EXPECT_EQ(a.lane<2>(), 42); 1169 EXPECT_EQ(a.lane<3>(), 42); 1170} 1171 1172/** @brief Test scalar vint4 load. */ 1173TEST(vint4, ScalarLoad) 1174{ 1175 vint4 a(11, 22, 33, 44); 1176 EXPECT_EQ(a.lane<0>(), 11); 1177 EXPECT_EQ(a.lane<1>(), 22); 1178 EXPECT_EQ(a.lane<2>(), 33); 1179 EXPECT_EQ(a.lane<3>(), 44); 1180} 1181 1182/** @brief Test copy vint4 load. */ 1183TEST(vint4, CopyLoad) 1184{ 1185 vint4 s(11, 22, 33, 44); 1186 vint4 a(s.m); 1187 EXPECT_EQ(a.lane<0>(), 11); 1188 EXPECT_EQ(a.lane<1>(), 22); 1189 EXPECT_EQ(a.lane<2>(), 33); 1190 EXPECT_EQ(a.lane<3>(), 44); 1191} 1192 1193/** @brief Test vint4 scalar lane set. */ 1194TEST(int4, SetLane) 1195{ 1196 vint4 a(0); 1197 1198 a.set_lane<0>(1); 1199 EXPECT_EQ(a.lane<0>(), 1); 1200 EXPECT_EQ(a.lane<1>(), 0); 1201 EXPECT_EQ(a.lane<2>(), 0); 1202 EXPECT_EQ(a.lane<3>(), 0); 1203 1204 a.set_lane<1>(2); 1205 EXPECT_EQ(a.lane<0>(), 1); 1206 EXPECT_EQ(a.lane<1>(), 2); 1207 EXPECT_EQ(a.lane<2>(), 0); 1208 EXPECT_EQ(a.lane<3>(), 0); 1209 1210 a.set_lane<2>(3); 1211 EXPECT_EQ(a.lane<0>(), 1); 1212 EXPECT_EQ(a.lane<1>(), 2); 1213 EXPECT_EQ(a.lane<2>(), 3); 1214 EXPECT_EQ(a.lane<3>(), 0); 1215 1216 a.set_lane<3>(4); 1217 EXPECT_EQ(a.lane<0>(), 1); 1218 EXPECT_EQ(a.lane<1>(), 2); 1219 EXPECT_EQ(a.lane<2>(), 3); 1220 EXPECT_EQ(a.lane<3>(), 4); 1221} 1222 1223/** @brief Test vint4 zero. */ 1224TEST(vint4, Zero) 1225{ 1226 vint4 a = vint4::zero(); 1227 EXPECT_EQ(a.lane<0>(), 0); 1228 EXPECT_EQ(a.lane<1>(), 0); 1229 EXPECT_EQ(a.lane<2>(), 0); 1230 EXPECT_EQ(a.lane<3>(), 0); 1231} 1232 1233/** @brief Test vint4 load1. */ 1234TEST(vint4, Load1) 1235{ 1236 int s = 42; 1237 vint4 a = vint4::load1(&s); 1238 EXPECT_EQ(a.lane<0>(), 42); 1239 EXPECT_EQ(a.lane<1>(), 42); 1240 EXPECT_EQ(a.lane<2>(), 42); 1241 EXPECT_EQ(a.lane<3>(), 42); 1242} 1243 1244/** @brief Test vint4 loada. */ 1245TEST(vint4, Loada) 1246{ 1247 vint4 a = vint4::loada(&(s32_data[0])); 1248 EXPECT_EQ(a.lane<0>(), 0); 1249 EXPECT_EQ(a.lane<1>(), 1); 1250 EXPECT_EQ(a.lane<2>(), 2); 1251 EXPECT_EQ(a.lane<3>(), 3); 1252} 1253 1254/** @brief Test vint4 lane_id. */ 1255TEST(vint4, LaneID) 1256{ 1257 vint4 a = vint4::lane_id(); 1258 EXPECT_EQ(a.lane<0>(), 0); 1259 EXPECT_EQ(a.lane<1>(), 1); 1260 EXPECT_EQ(a.lane<2>(), 2); 1261 EXPECT_EQ(a.lane<3>(), 3); 1262} 1263 1264/** @brief Test vint4 add. */ 1265TEST(vint4, vadd) 1266{ 1267 vint4 a(1, 2, 3, 4); 1268 vint4 b(2, 3, 4, 5); 1269 a = a + b; 1270 EXPECT_EQ(a.lane<0>(), 1 + 2); 1271 EXPECT_EQ(a.lane<1>(), 2 + 3); 1272 EXPECT_EQ(a.lane<2>(), 3 + 4); 1273 EXPECT_EQ(a.lane<3>(), 4 + 5); 1274} 1275 1276/** @brief Test vint4 self-add. */ 1277TEST(vint4, vselfadd) 1278{ 1279 vint4 a(1, 2, 3, 4); 1280 vint4 b(2, 3, 4, 5); 1281 a += b; 1282 1283 EXPECT_EQ(a.lane<0>(), 1 + 2); 1284 EXPECT_EQ(a.lane<1>(), 2 + 3); 1285 EXPECT_EQ(a.lane<2>(), 3 + 4); 1286 EXPECT_EQ(a.lane<3>(), 4 + 5); 1287} 1288 1289/** @brief Test vint4 add. */ 1290TEST(vint4, vsadd) 1291{ 1292 vint4 a(1, 2, 3, 4); 1293 int b = 5; 1294 a = a + b; 1295 EXPECT_EQ(a.lane<0>(), 1 + 5); 1296 EXPECT_EQ(a.lane<1>(), 2 + 5); 1297 EXPECT_EQ(a.lane<2>(), 3 + 5); 1298 EXPECT_EQ(a.lane<3>(), 4 + 5); 1299} 1300 1301/** @brief Test vint4 sub. */ 1302TEST(vint4, vsub) 1303{ 1304 vint4 a(1, 2, 4, 4); 1305 vint4 b(2, 3, 3, 5); 1306 a = a - b; 1307 EXPECT_EQ(a.lane<0>(), 1 - 2); 1308 EXPECT_EQ(a.lane<1>(), 2 - 3); 1309 EXPECT_EQ(a.lane<2>(), 4 - 3); 1310 EXPECT_EQ(a.lane<3>(), 4 - 5); 1311} 1312 1313/** @brief Test vint4 sub. */ 1314TEST(vint4, vssub) 1315{ 1316 vint4 a(1, 2, 4, 4); 1317 int b = 5; 1318 a = a - b; 1319 EXPECT_EQ(a.lane<0>(), 1 - 5); 1320 EXPECT_EQ(a.lane<1>(), 2 - 5); 1321 EXPECT_EQ(a.lane<2>(), 4 - 5); 1322 EXPECT_EQ(a.lane<3>(), 4 - 5); 1323} 1324 1325/** @brief Test vint4 mul. */ 1326TEST(vint4, vmul) 1327{ 1328 vint4 a(1, 2, 4, 4); 1329 vint4 b(2, 3, 3, 5); 1330 a = a * b; 1331 EXPECT_EQ(a.lane<0>(), 1 * 2); 1332 EXPECT_EQ(a.lane<1>(), 2 * 3); 1333 EXPECT_EQ(a.lane<2>(), 4 * 3); 1334 EXPECT_EQ(a.lane<3>(), 4 * 5); 1335} 1336 1337/** @brief Test vint4 mul. */ 1338TEST(vint4, vsmul) 1339{ 1340 vint4 a(1, 2, 4, 4); 1341 a = a * 3; 1342 EXPECT_EQ(a.lane<0>(), 1 * 3); 1343 EXPECT_EQ(a.lane<1>(), 2 * 3); 1344 EXPECT_EQ(a.lane<2>(), 4 * 3); 1345 EXPECT_EQ(a.lane<3>(), 4 * 3); 1346 1347 vint4 b(1, 2, -4, 4); 1348 b = b * -3; 1349 EXPECT_EQ(b.lane<0>(), 1 * -3); 1350 EXPECT_EQ(b.lane<1>(), 2 * -3); 1351 EXPECT_EQ(b.lane<2>(), -4 * -3); 1352 EXPECT_EQ(b.lane<3>(), 4 * -3); 1353} 1354 1355/** @brief Test vint4 bitwise invert. */ 1356TEST(vint4, bit_invert) 1357{ 1358 vint4 a(-1, 0, 1, 2); 1359 a = ~a; 1360 EXPECT_EQ(a.lane<0>(), ~-1); 1361 EXPECT_EQ(a.lane<1>(), ~0); 1362 EXPECT_EQ(a.lane<2>(), ~1); 1363 EXPECT_EQ(a.lane<3>(), ~2); 1364} 1365 1366/** @brief Test vint4 bitwise or. */ 1367TEST(vint4, bit_vor) 1368{ 1369 vint4 a(1, 2, 3, 4); 1370 vint4 b(2, 3, 4, 5); 1371 a = a | b; 1372 EXPECT_EQ(a.lane<0>(), 3); 1373 EXPECT_EQ(a.lane<1>(), 3); 1374 EXPECT_EQ(a.lane<2>(), 7); 1375 EXPECT_EQ(a.lane<3>(), 5); 1376} 1377 1378TEST(vint4, bit_vsor) 1379{ 1380 vint4 a(1, 2, 3, 4); 1381 int b = 2; 1382 a = a | b; 1383 EXPECT_EQ(a.lane<0>(), 3); 1384 EXPECT_EQ(a.lane<1>(), 2); 1385 EXPECT_EQ(a.lane<2>(), 3); 1386 EXPECT_EQ(a.lane<3>(), 6); 1387} 1388 1389/** @brief Test vint4 bitwise and. */ 1390TEST(vint4, bit_vand) 1391{ 1392 vint4 a(1, 2, 3, 4); 1393 vint4 b(2, 3, 4, 5); 1394 a = a & b; 1395 EXPECT_EQ(a.lane<0>(), 0); 1396 EXPECT_EQ(a.lane<1>(), 2); 1397 EXPECT_EQ(a.lane<2>(), 0); 1398 EXPECT_EQ(a.lane<3>(), 4); 1399} 1400 1401/** @brief Test vint4 bitwise and. */ 1402TEST(vint4, bit_vsand) 1403{ 1404 vint4 a(1, 2, 3, 4); 1405 int b = 2; 1406 a = a & b; 1407 EXPECT_EQ(a.lane<0>(), 0); 1408 EXPECT_EQ(a.lane<1>(), 2); 1409 EXPECT_EQ(a.lane<2>(), 2); 1410 EXPECT_EQ(a.lane<3>(), 0); 1411} 1412 1413/** @brief Test vint4 bitwise xor. */ 1414TEST(vint4, bit_vxor) 1415{ 1416 vint4 a(1, 2, 3, 4); 1417 vint4 b(2, 3, 4, 5); 1418 a = a ^ b; 1419 EXPECT_EQ(a.lane<0>(), 3); 1420 EXPECT_EQ(a.lane<1>(), 1); 1421 EXPECT_EQ(a.lane<2>(), 7); 1422 EXPECT_EQ(a.lane<3>(), 1); 1423} 1424 1425/** @brief Test vint4 bitwise xor. */ 1426TEST(vint4, bit_vsxor) 1427{ 1428 vint4 a(1, 2, 3, 4); 1429 int b = 2; 1430 a = a ^ b; 1431 EXPECT_EQ(a.lane<0>(), 3); 1432 EXPECT_EQ(a.lane<1>(), 0); 1433 EXPECT_EQ(a.lane<2>(), 1); 1434 EXPECT_EQ(a.lane<3>(), 6); 1435} 1436 1437/** @brief Test vint4 ceq. */ 1438TEST(vint4, ceq) 1439{ 1440 vint4 a1(1, 2, 3, 4); 1441 vint4 b1(0, 1, 2, 3); 1442 vmask4 r1 = a1 == b1; 1443 EXPECT_EQ(0u, mask(r1)); 1444 EXPECT_EQ(false, any(r1)); 1445 EXPECT_EQ(false, all(r1)); 1446 1447 vint4 a2(1, 2, 3, 4); 1448 vint4 b2(1, 0, 0, 0); 1449 vmask4 r2 = a2 == b2; 1450 EXPECT_EQ(0x1u, mask(r2)); 1451 EXPECT_EQ(true, any(r2)); 1452 EXPECT_EQ(false, all(r2)); 1453 1454 vint4 a3(1, 2, 3, 4); 1455 vint4 b3(1, 0, 3, 0); 1456 vmask4 r3 = a3 == b3; 1457 EXPECT_EQ(0x5u, mask(r3)); 1458 EXPECT_EQ(true, any(r3)); 1459 EXPECT_EQ(false, all(r3)); 1460 1461 vint4 a4(1, 2, 3, 4); 1462 vmask4 r4 = a4 == a4; 1463 EXPECT_EQ(0xFu, mask(r4)); 1464 EXPECT_EQ(true, any(r4)); 1465 EXPECT_EQ(true, all(r4)); 1466} 1467 1468/** @brief Test vint4 cne. */ 1469TEST(vint4, cne) 1470{ 1471 vint4 a1(1, 2, 3, 4); 1472 vint4 b1(0, 1, 2, 3); 1473 vmask4 r1 = a1 != b1; 1474 EXPECT_EQ(0xFu, mask(r1)); 1475 EXPECT_EQ(true, any(r1)); 1476 EXPECT_EQ(true, all(r1)); 1477 1478 vint4 a2(1, 2, 3, 4); 1479 vint4 b2(1, 0, 0, 0); 1480 vmask4 r2 = a2 != b2; 1481 EXPECT_EQ(0xEu, mask(r2)); 1482 EXPECT_EQ(true, any(r2)); 1483 EXPECT_EQ(false, all(r2)); 1484 1485 vint4 a3(1, 2, 3, 4); 1486 vint4 b3(1, 0, 3, 0); 1487 vmask4 r3 = a3 != b3; 1488 EXPECT_EQ(0xAu, mask(r3)); 1489 EXPECT_EQ(true, any(r3)); 1490 EXPECT_EQ(false, all(r3)); 1491 1492 vint4 a4(1, 2, 3, 4); 1493 vmask4 r4 = a4 != a4; 1494 EXPECT_EQ(0u, mask(r4)); 1495 EXPECT_EQ(false, any(r4)); 1496 EXPECT_EQ(false, all(r4)); 1497} 1498 1499/** @brief Test vint4 clt. */ 1500TEST(vint4, clt) 1501{ 1502 vint4 a(1, 2, 3, 4); 1503 vint4 b(0, 3, 3, 5); 1504 vmask4 r = a < b; 1505 EXPECT_EQ(0xAu, mask(r)); 1506} 1507 1508/** @brief Test vint4 cgt. */ 1509TEST(vint4, cle) 1510{ 1511 vint4 a(1, 2, 3, 4); 1512 vint4 b(0, 3, 3, 5); 1513 vmask4 r = a > b; 1514 EXPECT_EQ(0x1u, mask(r)); 1515} 1516 1517/** @brief Test vint4 lsl. */ 1518TEST(vint4, lsl) 1519{ 1520 vint4 a(1, 2, 4, 4); 1521 a = lsl<0>(a); 1522 EXPECT_EQ(a.lane<0>(), 1); 1523 EXPECT_EQ(a.lane<1>(), 2); 1524 EXPECT_EQ(a.lane<2>(), 4); 1525 EXPECT_EQ(a.lane<3>(), 4); 1526 1527 a = lsl<1>(a); 1528 EXPECT_EQ(a.lane<0>(), 2); 1529 EXPECT_EQ(a.lane<1>(), 4); 1530 EXPECT_EQ(a.lane<2>(), 8); 1531 EXPECT_EQ(a.lane<3>(), 8); 1532 1533 a = lsl<2>(a); 1534 EXPECT_EQ(a.lane<0>(), 8); 1535 EXPECT_EQ(a.lane<1>(), 16); 1536 EXPECT_EQ(a.lane<2>(), 32); 1537 EXPECT_EQ(a.lane<3>(), 32); 1538} 1539 1540/** @brief Test vint4 lsr. */ 1541TEST(vint4, lsr) 1542{ 1543 vint4 a(1, 2, 4, -4); 1544 a = lsr<0>(a); 1545 EXPECT_EQ(a.lane<0>(), 1); 1546 EXPECT_EQ(a.lane<1>(), 2); 1547 EXPECT_EQ(a.lane<2>(), 4); 1548 EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFFC)); 1549 1550 a = lsr<1>(a); 1551 EXPECT_EQ(a.lane<0>(), 0); 1552 EXPECT_EQ(a.lane<1>(), 1); 1553 EXPECT_EQ(a.lane<2>(), 2); 1554 EXPECT_EQ(a.lane<3>(), 0x7FFFFFFE); 1555 1556 a = lsr<2>(a); 1557 EXPECT_EQ(a.lane<0>(), 0); 1558 EXPECT_EQ(a.lane<1>(), 0); 1559 EXPECT_EQ(a.lane<2>(), 0); 1560 EXPECT_EQ(a.lane<3>(), 0x1FFFFFFF); 1561} 1562 1563/** @brief Test vint4 asr. */ 1564TEST(vint4, asr) 1565{ 1566 vint4 a(1, 2, 4, -4); 1567 a = asr<0>(a); 1568 EXPECT_EQ(a.lane<0>(), 1); 1569 EXPECT_EQ(a.lane<1>(), 2); 1570 EXPECT_EQ(a.lane<2>(), 4); 1571 EXPECT_EQ(a.lane<3>(), -4); 1572 1573 a = asr<1>(a); 1574 EXPECT_EQ(a.lane<0>(), 0); 1575 EXPECT_EQ(a.lane<1>(), 1); 1576 EXPECT_EQ(a.lane<2>(), 2); 1577 EXPECT_EQ(a.lane<3>(), -2); 1578 1579 // Note - quirk of asr is that you will get "stuck" at -1 1580 a = asr<2>(a); 1581 EXPECT_EQ(a.lane<0>(), 0); 1582 EXPECT_EQ(a.lane<1>(), 0); 1583 EXPECT_EQ(a.lane<2>(), 0); 1584 EXPECT_EQ(a.lane<3>(), -1); 1585} 1586 1587/** @brief Test vint4 min. */ 1588TEST(vint4, min) 1589{ 1590 vint4 a(1, 2, 3, 4); 1591 vint4 b(0, 3, 3, 5); 1592 vint4 r = min(a, b); 1593 EXPECT_EQ(r.lane<0>(), 0); 1594 EXPECT_EQ(r.lane<1>(), 2); 1595 EXPECT_EQ(r.lane<2>(), 3); 1596 EXPECT_EQ(r.lane<3>(), 4); 1597} 1598 1599/** @brief Test vint4 max. */ 1600TEST(vint4, max) 1601{ 1602 vint4 a(1, 2, 3, 4); 1603 vint4 b(0, 3, 3, 5); 1604 vint4 r = max(a, b); 1605 EXPECT_EQ(r.lane<0>(), 1); 1606 EXPECT_EQ(r.lane<1>(), 3); 1607 EXPECT_EQ(r.lane<2>(), 3); 1608 EXPECT_EQ(r.lane<3>(), 5); 1609} 1610 1611/** @brief Test vint4 clamp. */ 1612TEST(vint4, clamp) 1613{ 1614 vint4 a(1, 2, 3, 4); 1615 vint4 r = clamp(2, 3, a); 1616 EXPECT_EQ(r.lane<0>(), 2); 1617 EXPECT_EQ(r.lane<1>(), 2); 1618 EXPECT_EQ(r.lane<2>(), 3); 1619 EXPECT_EQ(r.lane<3>(), 3); 1620} 1621 1622/** @brief Test vint4 hmin. */ 1623TEST(vint4, hmin) 1624{ 1625 vint4 a1(1, 2, 1, 2); 1626 vint4 r1 = hmin(a1); 1627 EXPECT_EQ(r1.lane<0>(), 1); 1628 EXPECT_EQ(r1.lane<1>(), 1); 1629 EXPECT_EQ(r1.lane<2>(), 1); 1630 EXPECT_EQ(r1.lane<3>(), 1); 1631 1632 vint4 a2(1, 2, -1, 5); 1633 vint4 r2 = hmin(a2); 1634 EXPECT_EQ(r2.lane<0>(), -1); 1635 EXPECT_EQ(r2.lane<1>(), -1); 1636 EXPECT_EQ(r2.lane<2>(), -1); 1637 EXPECT_EQ(r2.lane<3>(), -1); 1638} 1639 1640/** @brief Test vint4 hmax. */ 1641TEST(vint4, hmax) 1642{ 1643 vint4 a1(1, 3, 1, 2); 1644 vint4 r1 = hmax(a1); 1645 EXPECT_EQ(r1.lane<0>(), 3); 1646 EXPECT_EQ(r1.lane<1>(), 3); 1647 EXPECT_EQ(r1.lane<2>(), 3); 1648 EXPECT_EQ(r1.lane<3>(), 3); 1649 1650 vint4 a2(1, 2, -1, 5); 1651 vint4 r2 = hmax(a2); 1652 EXPECT_EQ(r2.lane<0>(), 5); 1653 EXPECT_EQ(r2.lane<1>(), 5); 1654 EXPECT_EQ(r2.lane<2>(), 5); 1655 EXPECT_EQ(r2.lane<3>(), 5); 1656} 1657 1658/** @brief Test vint4 hadd_s. */ 1659TEST(vint4, hadd_s) 1660{ 1661 vint4 a1(1, 3, 5, 7); 1662 int r1 = hadd_s(a1); 1663 EXPECT_EQ(r1, 16); 1664 1665 vint4 a2(1, 2, -1, 5); 1666 int r2 = hadd_s(a2); 1667 EXPECT_EQ(r2, 7); 1668} 1669 1670/** @brief Test vint4 hadd_rgb_s. */ 1671TEST(vint4, hadd_rgb_s) 1672{ 1673 vint4 a1(1, 3, 5, 7); 1674 int r1 = hadd_rgb_s(a1); 1675 EXPECT_EQ(r1, 9); 1676 1677 vint4 a2(1, 2, -1, 5); 1678 int r2 = hadd_rgb_s(a2); 1679 EXPECT_EQ(r2, 2); 1680} 1681 1682/** @brief Test vint4 clz. */ 1683TEST(vint4, clz) 1684{ 1685 int msb_set = static_cast<int>(0x80000000); 1686 vint4 a1(msb_set, 0x40000000, 0x20000000, 0x10000000); 1687 vint4 r1 = clz(a1); 1688 EXPECT_EQ(r1.lane<0>(), 0); 1689 EXPECT_EQ(r1.lane<1>(), 1); 1690 EXPECT_EQ(r1.lane<2>(), 2); 1691 EXPECT_EQ(r1.lane<3>(), 3); 1692 1693 vint4 a2(0x0, 0x1, 0x2, 0x4); 1694 vint4 r2 = clz(a2); 1695 EXPECT_EQ(r2.lane<0>(), 32); 1696 EXPECT_EQ(r2.lane<1>(), 31); 1697 EXPECT_EQ(r2.lane<2>(), 30); 1698 EXPECT_EQ(r2.lane<3>(), 29); 1699} 1700 1701/** @brief Test vint4 two_to_the_n. */ 1702TEST(vint4, two_to_the_n) 1703{ 1704 vint4 a1(0, 1, 2, 3); 1705 vint4 r1 = two_to_the_n(a1); 1706 EXPECT_EQ(r1.lane<0>(), 1 << 0); 1707 EXPECT_EQ(r1.lane<1>(), 1 << 1); 1708 EXPECT_EQ(r1.lane<2>(), 1 << 2); 1709 EXPECT_EQ(r1.lane<3>(), 1 << 3); 1710 1711 vint4 a2(27, 28, 29, 30); 1712 vint4 r2 = two_to_the_n(a2); 1713 EXPECT_EQ(r2.lane<0>(), 1 << 27); 1714 EXPECT_EQ(r2.lane<1>(), 1 << 28); 1715 EXPECT_EQ(r2.lane<2>(), 1 << 29); 1716 EXPECT_EQ(r2.lane<3>(), 1 << 30); 1717 1718 // Shifts higher than 30 are not allowed as it overflows the int type; 1719 // and results in implementation-defined behavior because of how we 1720 // generate the shifted result in two_to_the_n(). 1721 // - Shift by 31 shifts into sign bit 1722 // - Shift by 32 shifts off the end 1723} 1724 1725/** @brief Test vint4 storea. */ 1726TEST(vint4, storea) 1727{ 1728 ASTCENC_ALIGNAS int out[4]; 1729 vint4 a(s32_data); 1730 storea(a, out); 1731 EXPECT_EQ(out[0], 0); 1732 EXPECT_EQ(out[1], 1); 1733 EXPECT_EQ(out[2], 2); 1734 EXPECT_EQ(out[3], 3); 1735} 1736 1737/** @brief Test vint4 store. */ 1738TEST(vint4, store) 1739{ 1740 ASTCENC_ALIGNAS int out[5]; 1741 vint4 a(s32_data); 1742 store(a, &(out[1])); 1743 EXPECT_EQ(out[1], 0); 1744 EXPECT_EQ(out[2], 1); 1745 EXPECT_EQ(out[3], 2); 1746 EXPECT_EQ(out[4], 3); 1747} 1748 1749/** @brief Test vint4 store_nbytes. */ 1750TEST(vint4, store_nbytes) 1751{ 1752 ASTCENC_ALIGNAS int out; 1753 vint4 a(42, 314, 75, 90); 1754 store_nbytes(a, reinterpret_cast<uint8_t*>(&out)); 1755 EXPECT_EQ(out, 42); 1756} 1757 1758/** @brief Test vint4 store_lanes_masked. */ 1759TEST(vint4, store_lanes_masked) 1760{ 1761 uint8_t resulta[16] { 0 }; 1762 1763 // Store nothing 1764 vmask4 mask1 = vint4(0) == vint4(1); 1765 vint4 data1 = vint4(1); 1766 1767 store_lanes_masked(resulta, data1, mask1); 1768 vint4 result1v = vint4::load(resulta); 1769 vint4 expect1v = vint4::zero(); 1770 EXPECT_TRUE(all(result1v == expect1v)); 1771 1772 // Store half 1773 vmask4 mask2 = vint4(1, 1, 0, 0) == vint4(1); 1774 vint4 data2 = vint4(2); 1775 1776 store_lanes_masked(resulta, data2, mask2); 1777 vint4 result2v = vint4::load(resulta); 1778 vint4 expect2v = vint4(2, 2, 0, 0); 1779 EXPECT_TRUE(all(result2v == expect2v)); 1780 1781 // Store all 1782 vmask4 mask3 = vint4(1) == vint4(1); 1783 vint4 data3 = vint4(3); 1784 1785 store_lanes_masked(resulta, data3, mask3); 1786 vint4 result3v = vint4::load(resulta); 1787 vint4 expect3v = vint4(3); 1788 EXPECT_TRUE(all(result3v == expect3v)); 1789} 1790 1791/** @brief Test vint4 store_lanes_masked to unaligned address. */ 1792TEST(vint4, store_lanes_masked_unaligned) 1793{ 1794 uint8_t resulta[17] { 0 }; 1795 1796 // Store nothing 1797 vmask4 mask1 = vint4(0) == vint4(1); 1798 vint4 data1 = vint4(1); 1799 1800 store_lanes_masked(resulta + 1, data1, mask1); 1801 vint4 result1v = vint4::load(resulta + 1); 1802 vint4 expect1v = vint4::zero(); 1803 EXPECT_TRUE(all(result1v == expect1v)); 1804 1805 // Store half 1806 vmask4 mask2 = vint4(1, 1, 0, 0) == vint4(1); 1807 vint4 data2 = vint4(2); 1808 1809 store_lanes_masked(resulta + 1, data2, mask2); 1810 vint4 result2v = vint4::load(resulta + 1); 1811 vint4 expect2v = vint4(2, 2, 0, 0); 1812 EXPECT_TRUE(all(result2v == expect2v)); 1813 1814 // Store all 1815 vmask4 mask3 = vint4(1) == vint4(1); 1816 vint4 data3 = vint4(3); 1817 1818 store_lanes_masked(resulta + 1, data3, mask3); 1819 vint4 result3v = vint4::load(resulta + 1); 1820 vint4 expect3v = vint4(3); 1821 EXPECT_TRUE(all(result3v == expect3v)); 1822} 1823 1824/** @brief Test vint4 gatheri. */ 1825TEST(vint4, gatheri) 1826{ 1827 vint4 indices(0, 4, 3, 2); 1828 vint4 r = gatheri(s32_data, indices); 1829 EXPECT_EQ(r.lane<0>(), 0); 1830 EXPECT_EQ(r.lane<1>(), 4); 1831 EXPECT_EQ(r.lane<2>(), 3); 1832 EXPECT_EQ(r.lane<3>(), 2); 1833} 1834 1835/** @brief Test vint4 pack_low_bytes. */ 1836TEST(vint4, pack_low_bytes) 1837{ 1838 vint4 a(1, 2, 3, 4); 1839 vint4 r = pack_low_bytes(a); 1840 EXPECT_EQ(r.lane<0>(), (4 << 24) | (3 << 16) | (2 << 8) | (1 << 0)); 1841} 1842 1843/** @brief Test vint4 select. */ 1844TEST(vint4, select) 1845{ 1846 vint4 m1(1, 1, 1, 1); 1847 vint4 m2(1, 2, 1, 2); 1848 vmask4 cond = m1 == m2; 1849 1850 vint4 a(1, 3, 3, 1); 1851 vint4 b(4, 2, 2, 4); 1852 1853 vint4 r1 = select(a, b, cond); 1854 EXPECT_EQ(r1.lane<0>(), 4); 1855 EXPECT_EQ(r1.lane<1>(), 3); 1856 EXPECT_EQ(r1.lane<2>(), 2); 1857 EXPECT_EQ(r1.lane<3>(), 1); 1858 1859 vint4 r2 = select(b, a, cond); 1860 EXPECT_EQ(r2.lane<0>(), 1); 1861 EXPECT_EQ(r2.lane<1>(), 2); 1862 EXPECT_EQ(r2.lane<2>(), 3); 1863 EXPECT_EQ(r2.lane<3>(), 4); 1864} 1865 1866// VMASK4 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1867/** @brief Test vmask4 scalar literal constructor. */ 1868TEST(vmask4, scalar_literal_construct) 1869{ 1870 vfloat4 m1a(0.0f, 0.0f, 0.0f, 0.0f); 1871 vfloat4 m1b(1.0f, 1.0f, 1.0f, 1.0f); 1872 vmask4 m1(true); 1873 1874 vfloat4 r = select(m1a, m1b, m1); 1875 1876 EXPECT_EQ(r.lane<0>(), 1.0f); 1877 EXPECT_EQ(r.lane<1>(), 1.0f); 1878 EXPECT_EQ(r.lane<2>(), 1.0f); 1879 EXPECT_EQ(r.lane<3>(), 1.0f); 1880 1881 r = select(m1b, m1a, m1); 1882 1883 EXPECT_EQ(r.lane<0>(), 0.0f); 1884 EXPECT_EQ(r.lane<1>(), 0.0f); 1885 EXPECT_EQ(r.lane<2>(), 0.0f); 1886 EXPECT_EQ(r.lane<3>(), 0.0f); 1887} 1888 1889/** @brief Test vmask4 literal constructor. */ 1890TEST(vmask4, literal_construct) 1891{ 1892 vfloat4 m1a(0.0f, 0.0f, 0.0f, 0.0f); 1893 vfloat4 m1b(1.0f, 1.0f, 1.0f, 1.0f); 1894 vmask4 m1(true, false, true, false); 1895 1896 vfloat4 r = select(m1a, m1b, m1); 1897 1898 EXPECT_EQ(r.lane<0>(), 1.0f); 1899 EXPECT_EQ(r.lane<1>(), 0.0f); 1900 EXPECT_EQ(r.lane<2>(), 1.0f); 1901 EXPECT_EQ(r.lane<3>(), 0.0f); 1902} 1903 1904/** @brief Test vmask4 or. */ 1905TEST(vmask4, or) 1906{ 1907 vfloat4 m1a(0, 1, 0, 1); 1908 vfloat4 m1b(1, 1, 1, 1); 1909 vmask4 m1 = m1a == m1b; 1910 1911 vfloat4 m2a(1, 1, 0, 0); 1912 vfloat4 m2b(1, 1, 1, 1); 1913 vmask4 m2 = m2a == m2b; 1914 1915 vmask4 r = m1 | m2; 1916 EXPECT_EQ(mask(r), 0xBu); 1917} 1918 1919/** @brief Test vmask4 and. */ 1920TEST(vmask4, and) 1921{ 1922 vfloat4 m1a(0, 1, 0, 1); 1923 vfloat4 m1b(1, 1, 1, 1); 1924 vmask4 m1 = m1a == m1b; 1925 1926 vfloat4 m2a(1, 1, 0, 0); 1927 vfloat4 m2b(1, 1, 1, 1); 1928 vmask4 m2 = m2a == m2b; 1929 1930 vmask4 r = m1 & m2; 1931 EXPECT_EQ(mask(r), 0x2u); 1932} 1933 1934/** @brief Test vmask4 xor. */ 1935TEST(vmask4, xor) 1936{ 1937 vfloat4 m1a(0, 1, 0, 1); 1938 vfloat4 m1b(1, 1, 1, 1); 1939 vmask4 m1 = m1a == m1b; 1940 1941 vfloat4 m2a(1, 1, 0, 0); 1942 vfloat4 m2b(1, 1, 1, 1); 1943 vmask4 m2 = m2a == m2b; 1944 1945 vmask4 r = m1 ^ m2; 1946 EXPECT_EQ(mask(r), 0x9u); 1947} 1948 1949/** @brief Test vmask4 not. */ 1950TEST(vmask4, not) 1951{ 1952 vfloat4 m1a(0, 1, 0, 1); 1953 vfloat4 m1b(1, 1, 1, 1); 1954 vmask4 m1 = m1a == m1b; 1955 vmask4 r = ~m1; 1956 EXPECT_EQ(mask(r), 0x5u); 1957} 1958 1959/** @brief Test vint4 table permute. */ 1960TEST(vint4, vtable_8bt_32bi_32entry) 1961{ 1962 vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); 1963 vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f); 1964 1965 vint4 table0p, table1p; 1966 vtable_prepare(table0, table1, table0p, table1p); 1967 1968 vint4 index(0, 7, 4, 31); 1969 1970 vint4 result = vtable_8bt_32bi(table0p, table1p, index); 1971 1972 EXPECT_EQ(result.lane<0>(), 3); 1973 EXPECT_EQ(result.lane<1>(), 4); 1974 EXPECT_EQ(result.lane<2>(), 7); 1975 EXPECT_EQ(result.lane<3>(), 28); 1976} 1977 1978/** @brief Test vint4 table permute. */ 1979TEST(vint4, vtable_8bt_32bi_64entry) 1980{ 1981 vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); 1982 vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f); 1983 vint4 table2(0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f); 1984 vint4 table3(0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f); 1985 1986 vint4 table0p, table1p, table2p, table3p; 1987 vtable_prepare(table0, table1, table2, table3, table0p, table1p, table2p, table3p); 1988 1989 vint4 index(0, 7, 38, 63); 1990 1991 vint4 result = vtable_8bt_32bi(table0p, table1p, table2p, table3p, index); 1992 1993 EXPECT_EQ(result.lane<0>(), 3); 1994 EXPECT_EQ(result.lane<1>(), 4); 1995 EXPECT_EQ(result.lane<2>(), 37); 1996 EXPECT_EQ(result.lane<3>(), 60); 1997} 1998 1999/** @brief Test vint4 rgba byte interleave. */ 2000TEST(vint4, interleave_rgba8) 2001{ 2002 vint4 r(0x01, 0x11, 0x21, 0x31); 2003 vint4 g(0x02, 0x12, 0x22, 0x32); 2004 vint4 b(0x03, 0x13, 0x23, 0x33); 2005 vint4 a(0x04, 0x14, 0x24, 0x34); 2006 2007 vint4 result = interleave_rgba8(r, g, b, a); 2008 2009 EXPECT_EQ(result.lane<0>(), 0x04030201); 2010 EXPECT_EQ(result.lane<1>(), 0x14131211); 2011 EXPECT_EQ(result.lane<2>(), 0x24232221); 2012 EXPECT_EQ(result.lane<3>(), 0x34333231); 2013} 2014 2015# if ASTCENC_SIMD_WIDTH == 8 2016 2017// VFLOAT8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 2018 2019/** @brief Test unaligned vfloat8 data load. */ 2020TEST(vfloat8, UnalignedLoad) 2021{ 2022 vfloat8 a(&(f32_data[1])); 2023 EXPECT_EQ(a.lane<0>(), 1.0f); 2024 EXPECT_EQ(a.lane<1>(), 2.0f); 2025 EXPECT_EQ(a.lane<2>(), 3.0f); 2026 EXPECT_EQ(a.lane<3>(), 4.0f); 2027 EXPECT_EQ(a.lane<4>(), 5.0f); 2028 EXPECT_EQ(a.lane<5>(), 6.0f); 2029 EXPECT_EQ(a.lane<6>(), 7.0f); 2030 EXPECT_EQ(a.lane<7>(), 8.0f); 2031} 2032 2033/** @brief Test scalar duplicated vfloat8 load. */ 2034TEST(vfloat8, ScalarDupLoad) 2035{ 2036 vfloat8 a(1.1f); 2037 EXPECT_EQ(a.lane<0>(), 1.1f); 2038 EXPECT_EQ(a.lane<1>(), 1.1f); 2039 EXPECT_EQ(a.lane<2>(), 1.1f); 2040 EXPECT_EQ(a.lane<3>(), 1.1f); 2041 EXPECT_EQ(a.lane<4>(), 1.1f); 2042 EXPECT_EQ(a.lane<5>(), 1.1f); 2043 EXPECT_EQ(a.lane<6>(), 1.1f); 2044 EXPECT_EQ(a.lane<7>(), 1.1f); 2045} 2046 2047/** @brief Test scalar vfloat8 load. */ 2048TEST(vfloat8, ScalarLoad) 2049{ 2050 vfloat8 a(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f); 2051 EXPECT_EQ(a.lane<0>(), 1.1f); 2052 EXPECT_EQ(a.lane<1>(), 2.2f); 2053 EXPECT_EQ(a.lane<2>(), 3.3f); 2054 EXPECT_EQ(a.lane<3>(), 4.4f); 2055 EXPECT_EQ(a.lane<4>(), 5.5f); 2056 EXPECT_EQ(a.lane<5>(), 6.6f); 2057 EXPECT_EQ(a.lane<6>(), 7.7f); 2058 EXPECT_EQ(a.lane<7>(), 8.8f); 2059} 2060 2061/** @brief Test copy vfloat8 load. */ 2062TEST(vfloat8, CopyLoad) 2063{ 2064 vfloat8 s(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f); 2065 vfloat8 a(s.m); 2066 EXPECT_EQ(a.lane<0>(), 1.1f); 2067 EXPECT_EQ(a.lane<1>(), 2.2f); 2068 EXPECT_EQ(a.lane<2>(), 3.3f); 2069 EXPECT_EQ(a.lane<3>(), 4.4f); 2070 EXPECT_EQ(a.lane<4>(), 5.5f); 2071 EXPECT_EQ(a.lane<5>(), 6.6f); 2072 EXPECT_EQ(a.lane<6>(), 7.7f); 2073 EXPECT_EQ(a.lane<7>(), 8.8f); 2074} 2075 2076/** @brief Test vfloat8 zero. */ 2077TEST(vfloat8, Zero) 2078{ 2079 vfloat8 a = vfloat8::zero(); 2080 EXPECT_EQ(a.lane<0>(), 0.0f); 2081 EXPECT_EQ(a.lane<1>(), 0.0f); 2082 EXPECT_EQ(a.lane<2>(), 0.0f); 2083 EXPECT_EQ(a.lane<3>(), 0.0f); 2084 EXPECT_EQ(a.lane<4>(), 0.0f); 2085 EXPECT_EQ(a.lane<5>(), 0.0f); 2086 EXPECT_EQ(a.lane<6>(), 0.0f); 2087 EXPECT_EQ(a.lane<7>(), 0.0f); 2088} 2089 2090/** @brief Test vfloat8 load1. */ 2091TEST(vfloat8, Load1) 2092{ 2093 float s = 3.14f; 2094 vfloat8 a = vfloat8::load1(&s); 2095 EXPECT_EQ(a.lane<0>(), 3.14f); 2096 EXPECT_EQ(a.lane<1>(), 3.14f); 2097 EXPECT_EQ(a.lane<2>(), 3.14f); 2098 EXPECT_EQ(a.lane<3>(), 3.14f); 2099 EXPECT_EQ(a.lane<4>(), 3.14f); 2100 EXPECT_EQ(a.lane<5>(), 3.14f); 2101 EXPECT_EQ(a.lane<6>(), 3.14f); 2102 EXPECT_EQ(a.lane<7>(), 3.14f); 2103} 2104 2105/** @brief Test vfloat8 loada. */ 2106TEST(vfloat8, Loada) 2107{ 2108 vfloat8 a = vfloat8::loada(&(f32_data[0])); 2109 EXPECT_EQ(a.lane<0>(), 0.0f); 2110 EXPECT_EQ(a.lane<1>(), 1.0f); 2111 EXPECT_EQ(a.lane<2>(), 2.0f); 2112 EXPECT_EQ(a.lane<3>(), 3.0f); 2113 EXPECT_EQ(a.lane<4>(), 4.0f); 2114 EXPECT_EQ(a.lane<5>(), 5.0f); 2115 EXPECT_EQ(a.lane<6>(), 6.0f); 2116 EXPECT_EQ(a.lane<7>(), 7.0f); 2117} 2118 2119/** @brief Test vfloat8 lane_id. */ 2120TEST(vfloat8, LaneID) 2121{ 2122 vfloat8 a = vfloat8::lane_id(); 2123 EXPECT_EQ(a.lane<0>(), 0.0f); 2124 EXPECT_EQ(a.lane<1>(), 1.0f); 2125 EXPECT_EQ(a.lane<2>(), 2.0f); 2126 EXPECT_EQ(a.lane<3>(), 3.0f); 2127 EXPECT_EQ(a.lane<4>(), 4.0f); 2128 EXPECT_EQ(a.lane<5>(), 5.0f); 2129 EXPECT_EQ(a.lane<6>(), 6.0f); 2130 EXPECT_EQ(a.lane<7>(), 7.0f); 2131} 2132 2133/** @brief Test vfloat8 add. */ 2134TEST(vfloat8, vadd) 2135{ 2136 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); 2137 vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); 2138 a = a + b; 2139 EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f); 2140 EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f); 2141 EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f); 2142 EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f); 2143 EXPECT_EQ(a.lane<4>(), 5.0f + 0.5f); 2144 EXPECT_EQ(a.lane<5>(), 6.0f + 0.6f); 2145 EXPECT_EQ(a.lane<6>(), 7.0f + 0.7f); 2146 EXPECT_EQ(a.lane<7>(), 8.0f + 0.8f); 2147} 2148 2149/** @brief Test vfloat8 sub. */ 2150TEST(vfloat8, vsub) 2151{ 2152 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); 2153 vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); 2154 a = a - b; 2155 EXPECT_EQ(a.lane<0>(), 1.0f - 0.1f); 2156 EXPECT_EQ(a.lane<1>(), 2.0f - 0.2f); 2157 EXPECT_EQ(a.lane<2>(), 3.0f - 0.3f); 2158 EXPECT_EQ(a.lane<3>(), 4.0f - 0.4f); 2159 EXPECT_EQ(a.lane<4>(), 5.0f - 0.5f); 2160 EXPECT_EQ(a.lane<5>(), 6.0f - 0.6f); 2161 EXPECT_EQ(a.lane<6>(), 7.0f - 0.7f); 2162 EXPECT_EQ(a.lane<7>(), 8.0f - 0.8f); 2163} 2164 2165/** @brief Test vfloat8 mul. */ 2166TEST(vfloat8, vmul) 2167{ 2168 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); 2169 vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); 2170 a = a * b; 2171 EXPECT_EQ(a.lane<0>(), 1.0f * 0.1f); 2172 EXPECT_EQ(a.lane<1>(), 2.0f * 0.2f); 2173 EXPECT_EQ(a.lane<2>(), 3.0f * 0.3f); 2174 EXPECT_EQ(a.lane<3>(), 4.0f * 0.4f); 2175 EXPECT_EQ(a.lane<4>(), 5.0f * 0.5f); 2176 EXPECT_EQ(a.lane<5>(), 6.0f * 0.6f); 2177 EXPECT_EQ(a.lane<6>(), 7.0f * 0.7f); 2178 EXPECT_EQ(a.lane<7>(), 8.0f * 0.8f); 2179} 2180 2181/** @brief Test vfloat8 mul. */ 2182TEST(vfloat8, vsmul) 2183{ 2184 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); 2185 float b = 3.14f; 2186 a = a * b; 2187 EXPECT_EQ(a.lane<0>(), 1.0f * 3.14f); 2188 EXPECT_EQ(a.lane<1>(), 2.0f * 3.14f); 2189 EXPECT_EQ(a.lane<2>(), 3.0f * 3.14f); 2190 EXPECT_EQ(a.lane<3>(), 4.0f * 3.14f); 2191 EXPECT_EQ(a.lane<4>(), 5.0f * 3.14f); 2192 EXPECT_EQ(a.lane<5>(), 6.0f * 3.14f); 2193 EXPECT_EQ(a.lane<6>(), 7.0f * 3.14f); 2194 EXPECT_EQ(a.lane<7>(), 8.0f * 3.14f); 2195} 2196 2197/** @brief Test vfloat8 mul. */ 2198TEST(vfloat8, svmul) 2199{ 2200 float a = 3.14f; 2201 vfloat8 b(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); 2202 b = a * b; 2203 EXPECT_EQ(b.lane<0>(), 3.14f * 1.0f); 2204 EXPECT_EQ(b.lane<1>(), 3.14f * 2.0f); 2205 EXPECT_EQ(b.lane<2>(), 3.14f * 3.0f); 2206 EXPECT_EQ(b.lane<3>(), 3.14f * 4.0f); 2207 EXPECT_EQ(b.lane<4>(), 3.14f * 5.0f); 2208 EXPECT_EQ(b.lane<5>(), 3.14f * 6.0f); 2209 EXPECT_EQ(b.lane<6>(), 3.14f * 7.0f); 2210 EXPECT_EQ(b.lane<7>(), 3.14f * 8.0f); 2211} 2212 2213/** @brief Test vfloat8 div. */ 2214TEST(vfloat8, vdiv) 2215{ 2216 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); 2217 vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); 2218 a = a / b; 2219 EXPECT_EQ(a.lane<0>(), 1.0f / 0.1f); 2220 EXPECT_EQ(a.lane<1>(), 2.0f / 0.2f); 2221 EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f); 2222 EXPECT_EQ(a.lane<3>(), 4.0f / 0.4f); 2223 EXPECT_EQ(a.lane<4>(), 5.0f / 0.5f); 2224 EXPECT_EQ(a.lane<5>(), 6.0f / 0.6f); 2225 EXPECT_EQ(a.lane<6>(), 7.0f / 0.7f); 2226 EXPECT_EQ(a.lane<7>(), 8.0f / 0.8f); 2227} 2228 2229/** @brief Test vfloat8 div. */ 2230TEST(vfloat8, vsdiv) 2231{ 2232 vfloat8 a(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); 2233 float b = 3.14f; 2234 vfloat8 r = a / b; 2235 2236 EXPECT_EQ(r.lane<0>(), 0.1f / 3.14f); 2237 EXPECT_EQ(r.lane<1>(), 0.2f / 3.14f); 2238 EXPECT_EQ(r.lane<2>(), 0.3f / 3.14f); 2239 EXPECT_EQ(r.lane<3>(), 0.4f / 3.14f); 2240 EXPECT_EQ(r.lane<4>(), 0.5f / 3.14f); 2241 EXPECT_EQ(r.lane<5>(), 0.6f / 3.14f); 2242 EXPECT_EQ(r.lane<6>(), 0.7f / 3.14f); 2243 EXPECT_EQ(r.lane<7>(), 0.8f / 3.14f); 2244} 2245 2246/** @brief Test vfloat8 div. */ 2247TEST(vfloat8, svdiv) 2248{ 2249 float a = 3.14f; 2250 vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); 2251 vfloat8 r = a / b; 2252 2253 EXPECT_EQ(r.lane<0>(), 3.14f / 0.1f); 2254 EXPECT_EQ(r.lane<1>(), 3.14f / 0.2f); 2255 EXPECT_EQ(r.lane<2>(), 3.14f / 0.3f); 2256 EXPECT_EQ(r.lane<3>(), 3.14f / 0.4f); 2257 EXPECT_EQ(r.lane<4>(), 3.14f / 0.5f); 2258 EXPECT_EQ(r.lane<5>(), 3.14f / 0.6f); 2259 EXPECT_EQ(r.lane<6>(), 3.14f / 0.7f); 2260 EXPECT_EQ(r.lane<7>(), 3.14f / 0.8f); 2261} 2262 2263/** @brief Test vfloat8 ceq. */ 2264TEST(vfloat8, ceq) 2265{ 2266 vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); 2267 vfloat8 b1(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); 2268 vmask8 r1 = a1 == b1; 2269 EXPECT_EQ(0u, mask(r1)); 2270 EXPECT_EQ(false, any(r1)); 2271 EXPECT_EQ(false, all(r1)); 2272 2273 vfloat8 a2(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); 2274 vfloat8 b2(1.0f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); 2275 vmask8 r2 = a2 == b2; 2276 EXPECT_EQ(0x1u, mask(r2)); 2277 EXPECT_EQ(true, any(r2)); 2278 EXPECT_EQ(false, all(r2)); 2279 2280 vfloat8 a3(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); 2281 vfloat8 b3(1.0f, 0.2f, 3.0f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); 2282 vmask8 r3 = a3 == b3; 2283 EXPECT_EQ(0x5u, mask(r3)); 2284 EXPECT_EQ(true, any(r3)); 2285 EXPECT_EQ(false, all(r3)); 2286 2287 vfloat8 a4(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); 2288 vmask8 r4 = a4 == a4; 2289 EXPECT_EQ(0xFFu, mask(r4)); 2290 EXPECT_EQ(true, any(r4)); 2291 EXPECT_EQ(true, all(r4)); 2292} 2293 2294/** @brief Test vfloat8 cne. */ 2295TEST(vfloat8, cne) 2296{ 2297 vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); 2298 vfloat8 b1(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); 2299 vmask8 r1 = a1 != b1; 2300 EXPECT_EQ(0xFFu, mask(r1)); 2301 EXPECT_EQ(true, any(r1)); 2302 EXPECT_EQ(true, all(r1)); 2303 2304 vfloat8 a2(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); 2305 vfloat8 b2(1.0f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); 2306 vmask8 r2 = a2 != b2; 2307 EXPECT_EQ(0xFEu, mask(r2)); 2308 EXPECT_EQ(true, any(r2)); 2309 EXPECT_EQ(false, all(r2)); 2310 2311 vfloat8 a3(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); 2312 vfloat8 b3(1.0f, 0.2f, 3.0f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); 2313 vmask8 r3 = a3 != b3; 2314 EXPECT_EQ(0xFAu, mask(r3)); 2315 EXPECT_EQ(true, any(r3)); 2316 EXPECT_EQ(false, all(r3)); 2317 2318 vfloat8 a4(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); 2319 vmask8 r4 = a4 != a4; 2320 EXPECT_EQ(0u, mask(r4)); 2321 EXPECT_EQ(false, any(r4)); 2322 EXPECT_EQ(false, all(r4)); 2323} 2324 2325/** @brief Test vfloat8 clt. */ 2326TEST(vfloat8, clt) 2327{ 2328 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); 2329 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); 2330 vmask8 r = a < b; 2331 EXPECT_EQ(0xAAu, mask(r)); 2332} 2333 2334/** @brief Test vfloat8 cle. */ 2335TEST(vfloat8, cle) 2336{ 2337 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); 2338 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); 2339 vmask8 r = a <= b; 2340 EXPECT_EQ(0xEEu, mask(r)); 2341} 2342 2343/** @brief Test vfloat8 cgt. */ 2344TEST(vfloat8, cgt) 2345{ 2346 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); 2347 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); 2348 vmask8 r = a > b; 2349 EXPECT_EQ(0x11u, mask(r)); 2350} 2351 2352/** @brief Test vfloat8 cge. */ 2353TEST(vfloat8, cge) 2354{ 2355 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); 2356 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); 2357 vmask8 r = a >= b; 2358 EXPECT_EQ(0x55u, mask(r)); 2359} 2360 2361/** @brief Test vfloat8 min. */ 2362TEST(vfloat8, min) 2363{ 2364 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); 2365 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); 2366 vfloat8 r = min(a, b); 2367 EXPECT_EQ(r.lane<0>(), 0.9f); 2368 EXPECT_EQ(r.lane<1>(), 2.0f); 2369 EXPECT_EQ(r.lane<2>(), 3.0f); 2370 EXPECT_EQ(r.lane<3>(), 4.0f); 2371 EXPECT_EQ(r.lane<4>(), 0.9f); 2372 EXPECT_EQ(r.lane<5>(), 2.0f); 2373 EXPECT_EQ(r.lane<6>(), 3.0f); 2374 EXPECT_EQ(r.lane<7>(), 4.0f); 2375} 2376 2377/** @brief Test vfloat8 max. */ 2378TEST(vfloat8, max) 2379{ 2380 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); 2381 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); 2382 vfloat8 r = max(a, b); 2383 EXPECT_EQ(r.lane<0>(), 1.0f); 2384 EXPECT_EQ(r.lane<1>(), 2.1f); 2385 EXPECT_EQ(r.lane<2>(), 3.0f); 2386 EXPECT_EQ(r.lane<3>(), 4.1f); 2387 EXPECT_EQ(r.lane<4>(), 1.0f); 2388 EXPECT_EQ(r.lane<5>(), 2.1f); 2389 EXPECT_EQ(r.lane<6>(), 3.0f); 2390 EXPECT_EQ(r.lane<7>(), 4.1f); 2391} 2392 2393/** @brief Test vfloat8 clamp. */ 2394TEST(vfloat8, clamp) 2395{ 2396 vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); 2397 vfloat8 r1 = clamp(2.1f, 3.0f, a1); 2398 EXPECT_EQ(r1.lane<0>(), 2.1f); 2399 EXPECT_EQ(r1.lane<1>(), 2.1f); 2400 EXPECT_EQ(r1.lane<2>(), 3.0f); 2401 EXPECT_EQ(r1.lane<3>(), 3.0f); 2402 EXPECT_EQ(r1.lane<4>(), 2.1f); 2403 EXPECT_EQ(r1.lane<5>(), 2.1f); 2404 EXPECT_EQ(r1.lane<6>(), 3.0f); 2405 EXPECT_EQ(r1.lane<7>(), 3.0f); 2406 2407 vfloat8 a2(1.0f, 2.0f, qnan, 4.0f, 1.0f, 2.0f, qnan, 4.0f); 2408 vfloat8 r2 = clamp(2.1f, 3.0f, a2); 2409 EXPECT_EQ(r2.lane<0>(), 2.1f); 2410 EXPECT_EQ(r2.lane<1>(), 2.1f); 2411 EXPECT_EQ(r2.lane<2>(), 2.1f); 2412 EXPECT_EQ(r2.lane<3>(), 3.0f); 2413 EXPECT_EQ(r2.lane<4>(), 2.1f); 2414 EXPECT_EQ(r2.lane<5>(), 2.1f); 2415 EXPECT_EQ(r2.lane<6>(), 2.1f); 2416 EXPECT_EQ(r2.lane<7>(), 3.0f); 2417} 2418 2419/** @brief Test vfloat8 clampz. */ 2420TEST(vfloat8, clampz) 2421{ 2422 vfloat8 a1(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f); 2423 vfloat8 r1 = clampz(3.0f, a1); 2424 EXPECT_EQ(r1.lane<0>(), 0.0f); 2425 EXPECT_EQ(r1.lane<1>(), 0.0f); 2426 EXPECT_EQ(r1.lane<2>(), 0.1f); 2427 EXPECT_EQ(r1.lane<3>(), 3.0f); 2428 EXPECT_EQ(r1.lane<4>(), 0.0f); 2429 EXPECT_EQ(r1.lane<5>(), 0.0f); 2430 EXPECT_EQ(r1.lane<6>(), 0.1f); 2431 EXPECT_EQ(r1.lane<7>(), 3.0f); 2432 2433 vfloat8 a2(-1.0f, 0.0f, qnan, 4.0f, -1.0f, 0.0f, qnan, 4.0f); 2434 vfloat8 r2 = clampz(3.0f, a2); 2435 EXPECT_EQ(r2.lane<0>(), 0.0f); 2436 EXPECT_EQ(r2.lane<1>(), 0.0f); 2437 EXPECT_EQ(r2.lane<2>(), 0.0f); 2438 EXPECT_EQ(r2.lane<3>(), 3.0f); 2439 EXPECT_EQ(r2.lane<4>(), 0.0f); 2440 EXPECT_EQ(r2.lane<5>(), 0.0f); 2441 EXPECT_EQ(r2.lane<6>(), 0.0f); 2442 EXPECT_EQ(r2.lane<7>(), 3.0f); 2443} 2444 2445/** @brief Test vfloat8 clampz. */ 2446TEST(vfloat8, clampzo) 2447{ 2448 vfloat8 a1(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f); 2449 vfloat8 r1 = clampzo(a1); 2450 EXPECT_EQ(r1.lane<0>(), 0.0f); 2451 EXPECT_EQ(r1.lane<1>(), 0.0f); 2452 EXPECT_EQ(r1.lane<2>(), 0.1f); 2453 EXPECT_EQ(r1.lane<3>(), 1.0f); 2454 EXPECT_EQ(r1.lane<4>(), 0.0f); 2455 EXPECT_EQ(r1.lane<5>(), 0.0f); 2456 EXPECT_EQ(r1.lane<6>(), 0.1f); 2457 EXPECT_EQ(r1.lane<7>(), 1.0f); 2458 2459 vfloat8 a2(-1.0f, 0.0f, qnan, 4.0f, -1.0f, 0.0f, qnan, 4.0f); 2460 vfloat8 r2 = clampzo(a2); 2461 EXPECT_EQ(r2.lane<0>(), 0.0f); 2462 EXPECT_EQ(r2.lane<1>(), 0.0f); 2463 EXPECT_EQ(r2.lane<2>(), 0.0f); 2464 EXPECT_EQ(r2.lane<3>(), 1.0f); 2465 EXPECT_EQ(r2.lane<4>(), 0.0f); 2466 EXPECT_EQ(r2.lane<5>(), 0.0f); 2467 EXPECT_EQ(r2.lane<6>(), 0.0f); 2468 EXPECT_EQ(r2.lane<7>(), 1.0f); 2469} 2470 2471/** @brief Test vfloat8 abs. */ 2472TEST(vfloat8, abs) 2473{ 2474 vfloat8 a(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f); 2475 vfloat8 r = abs(a); 2476 EXPECT_EQ(r.lane<0>(), 1.0f); 2477 EXPECT_EQ(r.lane<1>(), 0.0f); 2478 EXPECT_EQ(r.lane<2>(), 0.1f); 2479 EXPECT_EQ(r.lane<3>(), 4.0f); 2480 EXPECT_EQ(r.lane<4>(), 1.0f); 2481 EXPECT_EQ(r.lane<5>(), 0.0f); 2482 EXPECT_EQ(r.lane<6>(), 0.1f); 2483 EXPECT_EQ(r.lane<7>(), 4.0f); 2484} 2485 2486/** @brief Test vfloat8 round. */ 2487TEST(vfloat8, round) 2488{ 2489 vfloat8 a(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); 2490 vfloat8 r = round(a); 2491 EXPECT_EQ(r.lane<0>(), 1.0f); 2492 EXPECT_EQ(r.lane<1>(), 2.0f); 2493 EXPECT_EQ(r.lane<2>(), 2.0f); 2494 EXPECT_EQ(r.lane<3>(), 4.0f); 2495 EXPECT_EQ(r.lane<4>(), 1.0f); 2496 EXPECT_EQ(r.lane<5>(), 2.0f); 2497 EXPECT_EQ(r.lane<6>(), 2.0f); 2498 EXPECT_EQ(r.lane<7>(), 4.0f); 2499} 2500 2501/** @brief Test vfloat8 hmin. */ 2502TEST(vfloat8, hmin) 2503{ 2504 vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); 2505 vfloat8 r1 = hmin(a1); 2506 EXPECT_EQ(r1.lane<0>(), 1.1f); 2507 EXPECT_EQ(r1.lane<1>(), 1.1f); 2508 EXPECT_EQ(r1.lane<2>(), 1.1f); 2509 EXPECT_EQ(r1.lane<3>(), 1.1f); 2510 EXPECT_EQ(r1.lane<4>(), 1.1f); 2511 EXPECT_EQ(r1.lane<5>(), 1.1f); 2512 EXPECT_EQ(r1.lane<6>(), 1.1f); 2513 EXPECT_EQ(r1.lane<7>(), 1.1f); 2514 2515 vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f); 2516 vfloat8 r2 = hmin(a2); 2517 EXPECT_EQ(r2.lane<0>(), 0.2f); 2518 EXPECT_EQ(r2.lane<1>(), 0.2f); 2519 EXPECT_EQ(r2.lane<2>(), 0.2f); 2520 EXPECT_EQ(r2.lane<3>(), 0.2f); 2521 EXPECT_EQ(r2.lane<4>(), 0.2f); 2522 EXPECT_EQ(r2.lane<5>(), 0.2f); 2523 EXPECT_EQ(r2.lane<6>(), 0.2f); 2524 EXPECT_EQ(r2.lane<7>(), 0.2f); 2525} 2526 2527/** @brief Test vfloat8 hmin_s. */ 2528TEST(vfloat8, hmin_s) 2529{ 2530 vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); 2531 float r1 = hmin_s(a1); 2532 EXPECT_EQ(r1, 1.1f); 2533 2534 vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f); 2535 float r2 = hmin_s(a2); 2536 EXPECT_EQ(r2, 0.2f); 2537} 2538 2539/** @brief Test vfloat8 hmax. */ 2540TEST(vfloat8, hmax) 2541{ 2542 vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); 2543 vfloat8 r1 = hmax(a1); 2544 EXPECT_EQ(r1.lane<0>(), 4.0f); 2545 EXPECT_EQ(r1.lane<1>(), 4.0f); 2546 EXPECT_EQ(r1.lane<2>(), 4.0f); 2547 EXPECT_EQ(r1.lane<3>(), 4.0f); 2548 EXPECT_EQ(r1.lane<4>(), 4.0f); 2549 EXPECT_EQ(r1.lane<5>(), 4.0f); 2550 EXPECT_EQ(r1.lane<6>(), 4.0f); 2551 EXPECT_EQ(r1.lane<7>(), 4.0f); 2552 2553 vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f); 2554 vfloat8 r2 = hmax(a2); 2555 EXPECT_EQ(r2.lane<0>(), 1.6f); 2556 EXPECT_EQ(r2.lane<1>(), 1.6f); 2557 EXPECT_EQ(r2.lane<2>(), 1.6f); 2558 EXPECT_EQ(r2.lane<3>(), 1.6f); 2559 EXPECT_EQ(r2.lane<4>(), 1.6f); 2560 EXPECT_EQ(r2.lane<5>(), 1.6f); 2561 EXPECT_EQ(r2.lane<6>(), 1.6f); 2562 EXPECT_EQ(r2.lane<7>(), 1.6f); 2563} 2564 2565/** @brief Test vfloat8 hmax_s. */ 2566TEST(vfloat8, hmax_s) 2567{ 2568 vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); 2569 float r1 = hmax_s(a1); 2570 EXPECT_EQ(r1, 4.0f); 2571 2572 vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f); 2573 float r2 = hmax_s(a2); 2574 EXPECT_EQ(r2, 1.6f); 2575} 2576 2577/** @brief Test vfloat8 hadd_s. */ 2578TEST(vfloat8, hadd_s) 2579{ 2580 vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); 2581 float sum = 1.1f + 1.5f + 1.6f + 4.0f + 1.1f + 1.5f + 1.6f + 4.0f; 2582 float r = hadd_s(a1); 2583 EXPECT_NEAR(r, sum, 0.005f); 2584} 2585 2586/** @brief Test vfloat8 sqrt. */ 2587TEST(vfloat8, sqrt) 2588{ 2589 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); 2590 vfloat8 r = sqrt(a); 2591 EXPECT_EQ(r.lane<0>(), std::sqrt(1.0f)); 2592 EXPECT_EQ(r.lane<1>(), std::sqrt(2.0f)); 2593 EXPECT_EQ(r.lane<2>(), std::sqrt(3.0f)); 2594 EXPECT_EQ(r.lane<3>(), std::sqrt(4.0f)); 2595 EXPECT_EQ(r.lane<4>(), std::sqrt(1.0f)); 2596 EXPECT_EQ(r.lane<5>(), std::sqrt(2.0f)); 2597 EXPECT_EQ(r.lane<6>(), std::sqrt(3.0f)); 2598 EXPECT_EQ(r.lane<7>(), std::sqrt(4.0f)); 2599} 2600 2601/** @brief Test vfloat8 select. */ 2602TEST(vfloat8, select) 2603{ 2604 vfloat8 m1(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f); 2605 vfloat8 m2(1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f); 2606 vmask8 cond = m1 == m2; 2607 2608 vfloat8 a(1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 3.0f, 3.0f, 1.0); 2609 vfloat8 b(4.0f, 2.0f, 2.0f, 4.0f, 4.0f, 2.0f, 2.0f, 4.0); 2610 2611 // Select in one direction 2612 vfloat8 r1 = select(a, b, cond); 2613 EXPECT_EQ(r1.lane<0>(), 4.0f); 2614 EXPECT_EQ(r1.lane<1>(), 3.0f); 2615 EXPECT_EQ(r1.lane<2>(), 2.0f); 2616 EXPECT_EQ(r1.lane<3>(), 1.0f); 2617 EXPECT_EQ(r1.lane<4>(), 4.0f); 2618 EXPECT_EQ(r1.lane<5>(), 3.0f); 2619 EXPECT_EQ(r1.lane<6>(), 2.0f); 2620 EXPECT_EQ(r1.lane<7>(), 1.0f); 2621 2622 // Select in the other 2623 vfloat8 r2 = select(b, a, cond); 2624 EXPECT_EQ(r2.lane<0>(), 1.0f); 2625 EXPECT_EQ(r2.lane<1>(), 2.0f); 2626 EXPECT_EQ(r2.lane<2>(), 3.0f); 2627 EXPECT_EQ(r2.lane<3>(), 4.0f); 2628 EXPECT_EQ(r2.lane<4>(), 1.0f); 2629 EXPECT_EQ(r2.lane<5>(), 2.0f); 2630 EXPECT_EQ(r2.lane<6>(), 3.0f); 2631 EXPECT_EQ(r2.lane<7>(), 4.0f); 2632} 2633 2634/** @brief Test vfloat8 select MSB only. */ 2635TEST(vfloat8, select_msb) 2636{ 2637 int msb_set = static_cast<int>(0x80000000); 2638 vint8 msb(msb_set, 0, msb_set, 0, msb_set, 0, msb_set, 0); 2639 vmask8 cond(msb.m); 2640 2641 vfloat8 a(1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 3.0f, 3.0f, 1.0f); 2642 vfloat8 b(4.0f, 2.0f, 2.0f, 4.0f, 4.0f, 2.0f, 2.0f, 4.0f); 2643 2644 // Select in one direction 2645 vfloat8 r1 = select(a, b, cond); 2646 EXPECT_EQ(r1.lane<0>(), 4.0f); 2647 EXPECT_EQ(r1.lane<1>(), 3.0f); 2648 EXPECT_EQ(r1.lane<2>(), 2.0f); 2649 EXPECT_EQ(r1.lane<3>(), 1.0f); 2650 EXPECT_EQ(r1.lane<4>(), 4.0f); 2651 EXPECT_EQ(r1.lane<5>(), 3.0f); 2652 EXPECT_EQ(r1.lane<6>(), 2.0f); 2653 EXPECT_EQ(r1.lane<7>(), 1.0f); 2654 2655 // Select in the other 2656 vfloat8 r2 = select(b, a, cond); 2657 EXPECT_EQ(r2.lane<0>(), 1.0f); 2658 EXPECT_EQ(r2.lane<1>(), 2.0f); 2659 EXPECT_EQ(r2.lane<2>(), 3.0f); 2660 EXPECT_EQ(r2.lane<3>(), 4.0f); 2661 EXPECT_EQ(r2.lane<4>(), 1.0f); 2662 EXPECT_EQ(r2.lane<5>(), 2.0f); 2663 EXPECT_EQ(r2.lane<6>(), 3.0f); 2664 EXPECT_EQ(r2.lane<7>(), 4.0f); 2665} 2666 2667/** @brief Test vfloat8 gatherf. */ 2668TEST(vfloat8, gatherf) 2669{ 2670 vint8 indices(0, 4, 3, 2, 7, 4, 3, 2); 2671 vfloat8 r = gatherf(f32_data, indices); 2672 EXPECT_EQ(r.lane<0>(), 0.0f); 2673 EXPECT_EQ(r.lane<1>(), 4.0f); 2674 EXPECT_EQ(r.lane<2>(), 3.0f); 2675 EXPECT_EQ(r.lane<3>(), 2.0f); 2676 EXPECT_EQ(r.lane<4>(), 7.0f); 2677 EXPECT_EQ(r.lane<5>(), 4.0f); 2678 EXPECT_EQ(r.lane<6>(), 3.0f); 2679 EXPECT_EQ(r.lane<7>(), 2.0f); 2680} 2681 2682/** @brief Test vfloat8 store. */ 2683TEST(vfloat8, store) 2684{ 2685 alignas(32) float out[9]; 2686 vfloat8 a(f32_data); 2687 store(a, &(out[1])); 2688 EXPECT_EQ(out[1], 0.0f); 2689 EXPECT_EQ(out[2], 1.0f); 2690 EXPECT_EQ(out[3], 2.0f); 2691 EXPECT_EQ(out[4], 3.0f); 2692 EXPECT_EQ(out[5], 4.0f); 2693 EXPECT_EQ(out[6], 5.0f); 2694 EXPECT_EQ(out[7], 6.0f); 2695 EXPECT_EQ(out[8], 7.0f); 2696} 2697 2698/** @brief Test vfloat8 storea. */ 2699TEST(vfloat8, storea) 2700{ 2701 alignas(32) float out[9]; 2702 vfloat8 a(f32_data); 2703 store(a, out); 2704 EXPECT_EQ(out[0], 0.0f); 2705 EXPECT_EQ(out[1], 1.0f); 2706 EXPECT_EQ(out[2], 2.0f); 2707 EXPECT_EQ(out[3], 3.0f); 2708 EXPECT_EQ(out[4], 4.0f); 2709 EXPECT_EQ(out[5], 5.0f); 2710 EXPECT_EQ(out[6], 6.0f); 2711 EXPECT_EQ(out[7], 7.0f); 2712} 2713 2714/** @brief Test vfloat8 float_to_int. */ 2715TEST(vfloat8, float_to_int) 2716{ 2717 vfloat8 a(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); 2718 vint8 r = float_to_int(a); 2719 EXPECT_EQ(r.lane<0>(), 1); 2720 EXPECT_EQ(r.lane<1>(), 1); 2721 EXPECT_EQ(r.lane<2>(), 1); 2722 EXPECT_EQ(r.lane<3>(), 4); 2723 EXPECT_EQ(r.lane<4>(), 1); 2724 EXPECT_EQ(r.lane<5>(), 1); 2725 EXPECT_EQ(r.lane<6>(), 1); 2726 EXPECT_EQ(r.lane<7>(), 4); 2727} 2728 2729// vint8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 2730 2731/** @brief Test unaligned vint8 data load. */ 2732TEST(vint8, UnalignedLoad) 2733{ 2734 vint8 a(&(s32_data[1])); 2735 EXPECT_EQ(a.lane<0>(), 1); 2736 EXPECT_EQ(a.lane<1>(), 2); 2737 EXPECT_EQ(a.lane<2>(), 3); 2738 EXPECT_EQ(a.lane<3>(), 4); 2739 EXPECT_EQ(a.lane<4>(), 5); 2740 EXPECT_EQ(a.lane<5>(), 6); 2741 EXPECT_EQ(a.lane<6>(), 7); 2742 EXPECT_EQ(a.lane<7>(), 8); 2743} 2744 2745/** @brief Test unaligned vint8 data load. */ 2746TEST(vint8, UnalignedLoad8) 2747{ 2748 vint8 a(&(u8_data[1])); 2749 EXPECT_EQ(a.lane<0>(), 1); 2750 EXPECT_EQ(a.lane<1>(), 2); 2751 EXPECT_EQ(a.lane<2>(), 3); 2752 EXPECT_EQ(a.lane<3>(), 4); 2753 EXPECT_EQ(a.lane<4>(), 5); 2754 EXPECT_EQ(a.lane<5>(), 6); 2755 EXPECT_EQ(a.lane<6>(), 7); 2756 EXPECT_EQ(a.lane<7>(), 8); 2757} 2758 2759/** @brief Test scalar duplicated vint8 load. */ 2760TEST(vint8, ScalarDupLoad) 2761{ 2762 vint8 a(42); 2763 EXPECT_EQ(a.lane<0>(), 42); 2764 EXPECT_EQ(a.lane<1>(), 42); 2765 EXPECT_EQ(a.lane<2>(), 42); 2766 EXPECT_EQ(a.lane<3>(), 42); 2767 EXPECT_EQ(a.lane<4>(), 42); 2768 EXPECT_EQ(a.lane<5>(), 42); 2769 EXPECT_EQ(a.lane<6>(), 42); 2770 EXPECT_EQ(a.lane<7>(), 42); 2771} 2772 2773/** @brief Test scalar vint8 load. */ 2774TEST(vint8, ScalarLoad) 2775{ 2776 vint8 a(11, 22, 33, 44, 55, 66, 77, 88); 2777 EXPECT_EQ(a.lane<0>(), 11); 2778 EXPECT_EQ(a.lane<1>(), 22); 2779 EXPECT_EQ(a.lane<2>(), 33); 2780 EXPECT_EQ(a.lane<3>(), 44); 2781 EXPECT_EQ(a.lane<4>(), 55); 2782 EXPECT_EQ(a.lane<5>(), 66); 2783 EXPECT_EQ(a.lane<6>(), 77); 2784 EXPECT_EQ(a.lane<7>(), 88); 2785} 2786 2787/** @brief Test copy vint8 load. */ 2788TEST(vint8, CopyLoad) 2789{ 2790 vint8 s(11, 22, 33, 44, 55, 66, 77, 88); 2791 vint8 a(s.m); 2792 EXPECT_EQ(a.lane<0>(), 11); 2793 EXPECT_EQ(a.lane<1>(), 22); 2794 EXPECT_EQ(a.lane<2>(), 33); 2795 EXPECT_EQ(a.lane<3>(), 44); 2796 EXPECT_EQ(a.lane<4>(), 55); 2797 EXPECT_EQ(a.lane<5>(), 66); 2798 EXPECT_EQ(a.lane<6>(), 77); 2799 EXPECT_EQ(a.lane<7>(), 88); 2800} 2801 2802/** @brief Test vint8 zero. */ 2803TEST(vint8, Zero) 2804{ 2805 vint8 a = vint8::zero(); 2806 EXPECT_EQ(a.lane<0>(), 0); 2807 EXPECT_EQ(a.lane<1>(), 0); 2808 EXPECT_EQ(a.lane<2>(), 0); 2809 EXPECT_EQ(a.lane<3>(), 0); 2810 EXPECT_EQ(a.lane<4>(), 0); 2811 EXPECT_EQ(a.lane<5>(), 0); 2812 EXPECT_EQ(a.lane<6>(), 0); 2813 EXPECT_EQ(a.lane<7>(), 0); 2814} 2815 2816/** @brief Test vint8 load1. */ 2817TEST(vint8, Load1) 2818{ 2819 int s = 42; 2820 vint8 a = vint8::load1(&s); 2821 EXPECT_EQ(a.lane<0>(), 42); 2822 EXPECT_EQ(a.lane<1>(), 42); 2823 EXPECT_EQ(a.lane<2>(), 42); 2824 EXPECT_EQ(a.lane<3>(), 42); 2825 EXPECT_EQ(a.lane<4>(), 42); 2826 EXPECT_EQ(a.lane<5>(), 42); 2827 EXPECT_EQ(a.lane<6>(), 42); 2828 EXPECT_EQ(a.lane<7>(), 42); 2829} 2830 2831/** @brief Test vint8 loada. */ 2832TEST(vint8, Loada) 2833{ 2834 vint8 a = vint8::loada(&(s32_data[0])); 2835 EXPECT_EQ(a.lane<0>(), 0); 2836 EXPECT_EQ(a.lane<1>(), 1); 2837 EXPECT_EQ(a.lane<2>(), 2); 2838 EXPECT_EQ(a.lane<3>(), 3); 2839 EXPECT_EQ(a.lane<4>(), 4); 2840 EXPECT_EQ(a.lane<5>(), 5); 2841 EXPECT_EQ(a.lane<6>(), 6); 2842 EXPECT_EQ(a.lane<7>(), 7); 2843} 2844 2845/** @brief Test vint8 lane_id. */ 2846TEST(vint8, LaneID) 2847{ 2848 vint8 a = vint8::lane_id(); 2849 EXPECT_EQ(a.lane<0>(), 0); 2850 EXPECT_EQ(a.lane<1>(), 1); 2851 EXPECT_EQ(a.lane<2>(), 2); 2852 EXPECT_EQ(a.lane<3>(), 3); 2853 EXPECT_EQ(a.lane<4>(), 4); 2854 EXPECT_EQ(a.lane<5>(), 5); 2855 EXPECT_EQ(a.lane<6>(), 6); 2856 EXPECT_EQ(a.lane<7>(), 7); 2857} 2858 2859/** @brief Test vint8 add. */ 2860TEST(vint8, vadd) 2861{ 2862 vint8 a(1, 2, 3, 4, 1, 2, 3, 4); 2863 vint8 b(2, 3, 4, 5, 2, 3, 4, 5); 2864 a = a + b; 2865 EXPECT_EQ(a.lane<0>(), 1 + 2); 2866 EXPECT_EQ(a.lane<1>(), 2 + 3); 2867 EXPECT_EQ(a.lane<2>(), 3 + 4); 2868 EXPECT_EQ(a.lane<3>(), 4 + 5); 2869 EXPECT_EQ(a.lane<4>(), 1 + 2); 2870 EXPECT_EQ(a.lane<5>(), 2 + 3); 2871 EXPECT_EQ(a.lane<6>(), 3 + 4); 2872 EXPECT_EQ(a.lane<7>(), 4 + 5); 2873} 2874 2875 2876/** @brief Test vint8 self-add. */ 2877TEST(vint8, vselfadd1) 2878{ 2879 vint8 a(1, 2, 3, 4, 1, 2, 3, 4); 2880 vint8 b(2, 3, 4, 5, 2, 3, 4, 5); 2881 a += b; 2882 2883 EXPECT_EQ(a.lane<0>(), 1 + 2); 2884 EXPECT_EQ(a.lane<1>(), 2 + 3); 2885 EXPECT_EQ(a.lane<2>(), 3 + 4); 2886 EXPECT_EQ(a.lane<3>(), 4 + 5); 2887 EXPECT_EQ(a.lane<4>(), 1 + 2); 2888 EXPECT_EQ(a.lane<5>(), 2 + 3); 2889 EXPECT_EQ(a.lane<6>(), 3 + 4); 2890 EXPECT_EQ(a.lane<7>(), 4 + 5); 2891} 2892 2893/** @brief Test vint8 sub. */ 2894TEST(vint8, vsub) 2895{ 2896 vint8 a(1, 2, 4, 4, 1, 2, 4, 4); 2897 vint8 b(2, 3, 3, 5, 2, 3, 3, 5); 2898 a = a - b; 2899 EXPECT_EQ(a.lane<0>(), 1 - 2); 2900 EXPECT_EQ(a.lane<1>(), 2 - 3); 2901 EXPECT_EQ(a.lane<2>(), 4 - 3); 2902 EXPECT_EQ(a.lane<3>(), 4 - 5); 2903 EXPECT_EQ(a.lane<4>(), 1 - 2); 2904 EXPECT_EQ(a.lane<5>(), 2 - 3); 2905 EXPECT_EQ(a.lane<6>(), 4 - 3); 2906 EXPECT_EQ(a.lane<7>(), 4 - 5); 2907} 2908 2909/** @brief Test vint8 mul. */ 2910TEST(vint8, vmul) 2911{ 2912 vint8 a(1, 2, 4, 4, 1, 2, 4, 4); 2913 vint8 b(2, 3, 3, 5, 2, 3, 3, 5); 2914 a = a * b; 2915 EXPECT_EQ(a.lane<0>(), 1 * 2); 2916 EXPECT_EQ(a.lane<1>(), 2 * 3); 2917 EXPECT_EQ(a.lane<2>(), 4 * 3); 2918 EXPECT_EQ(a.lane<3>(), 4 * 5); 2919 EXPECT_EQ(a.lane<4>(), 1 * 2); 2920 EXPECT_EQ(a.lane<5>(), 2 * 3); 2921 EXPECT_EQ(a.lane<6>(), 4 * 3); 2922 EXPECT_EQ(a.lane<7>(), 4 * 5); 2923} 2924 2925/** @brief Test vint8 bitwise invert. */ 2926TEST(vint8, bit_invert) 2927{ 2928 vint8 a(-1, 0, 1, 2, -1, 0, 1, 2); 2929 a = ~a; 2930 EXPECT_EQ(a.lane<0>(), ~-1); 2931 EXPECT_EQ(a.lane<1>(), ~0); 2932 EXPECT_EQ(a.lane<2>(), ~1); 2933 EXPECT_EQ(a.lane<3>(), ~2); 2934 EXPECT_EQ(a.lane<4>(), ~-1); 2935 EXPECT_EQ(a.lane<5>(), ~0); 2936 EXPECT_EQ(a.lane<6>(), ~1); 2937 EXPECT_EQ(a.lane<7>(), ~2); 2938} 2939 2940/** @brief Test vint8 bitwise or. */ 2941TEST(vint8, bit_vor) 2942{ 2943 vint8 a(1, 2, 3, 4, 1, 2, 3, 4); 2944 vint8 b(2, 3, 4, 5, 2, 3, 4, 5); 2945 a = a | b; 2946 EXPECT_EQ(a.lane<0>(), 3); 2947 EXPECT_EQ(a.lane<1>(), 3); 2948 EXPECT_EQ(a.lane<2>(), 7); 2949 EXPECT_EQ(a.lane<3>(), 5); 2950 EXPECT_EQ(a.lane<4>(), 3); 2951 EXPECT_EQ(a.lane<5>(), 3); 2952 EXPECT_EQ(a.lane<6>(), 7); 2953 EXPECT_EQ(a.lane<7>(), 5); 2954} 2955 2956/** @brief Test vint8 bitwise and. */ 2957TEST(vint8, bit_vand) 2958{ 2959 vint8 a(1, 2, 3, 4, 1, 2, 3, 4); 2960 vint8 b(2, 3, 4, 5, 2, 3, 4, 5); 2961 a = a & b; 2962 EXPECT_EQ(a.lane<0>(), 0); 2963 EXPECT_EQ(a.lane<1>(), 2); 2964 EXPECT_EQ(a.lane<2>(), 0); 2965 EXPECT_EQ(a.lane<3>(), 4); 2966 EXPECT_EQ(a.lane<4>(), 0); 2967 EXPECT_EQ(a.lane<5>(), 2); 2968 EXPECT_EQ(a.lane<6>(), 0); 2969 EXPECT_EQ(a.lane<7>(), 4); 2970} 2971 2972/** @brief Test vint8 bitwise xor. */ 2973TEST(vint8, bit_vxor) 2974{ 2975 vint8 a(1, 2, 3, 4, 1, 2, 3, 4); 2976 vint8 b(2, 3, 4, 5, 2, 3, 4, 5); 2977 a = a ^ b; 2978 EXPECT_EQ(a.lane<0>(), 3); 2979 EXPECT_EQ(a.lane<1>(), 1); 2980 EXPECT_EQ(a.lane<2>(), 7); 2981 EXPECT_EQ(a.lane<3>(), 1); 2982 EXPECT_EQ(a.lane<4>(), 3); 2983 EXPECT_EQ(a.lane<5>(), 1); 2984 EXPECT_EQ(a.lane<6>(), 7); 2985 EXPECT_EQ(a.lane<7>(), 1); 2986} 2987 2988/** @brief Test vint8 ceq. */ 2989TEST(vint8, ceq) 2990{ 2991 vint8 a1(1, 2, 3, 4, 1, 2, 3, 4); 2992 vint8 b1(0, 1, 2, 3, 0, 1, 2, 3); 2993 vmask8 r1 = a1 == b1; 2994 EXPECT_EQ(0u, mask(r1)); 2995 EXPECT_EQ(false, any(r1)); 2996 EXPECT_EQ(false, all(r1)); 2997 2998 vint8 a2(1, 2, 3, 4, 1, 2, 3, 4); 2999 vint8 b2(1, 0, 0, 0, 1, 0, 0, 0); 3000 vmask8 r2 = a2 == b2; 3001 EXPECT_EQ(0x11u, mask(r2)); 3002 EXPECT_EQ(true, any(r2)); 3003 EXPECT_EQ(false, all(r2)); 3004 3005 vint8 a3(1, 2, 3, 4, 1, 2, 3, 4); 3006 vint8 b3(1, 0, 3, 0, 1, 0, 3, 0); 3007 vmask8 r3 = a3 == b3; 3008 EXPECT_EQ(0x55u, mask(r3)); 3009 EXPECT_EQ(true, any(r3)); 3010 EXPECT_EQ(false, all(r3)); 3011 3012 vint8 a4(1, 2, 3, 4, 1, 2, 3, 4); 3013 vmask8 r4 = a4 == a4; 3014 EXPECT_EQ(0xFFu, mask(r4)); 3015 EXPECT_EQ(true, any(r4)); 3016 EXPECT_EQ(true, all(r4)); 3017} 3018 3019/** @brief Test vint8 cne. */ 3020TEST(vint8, cne) 3021{ 3022 vint8 a1(1, 2, 3, 4, 1, 2, 3, 4); 3023 vint8 b1(0, 1, 2, 3, 0, 1, 2, 3); 3024 vmask8 r1 = a1 != b1; 3025 EXPECT_EQ(0xFFu, mask(r1)); 3026 EXPECT_EQ(true, any(r1)); 3027 EXPECT_EQ(true, all(r1)); 3028 3029 vint8 a2(1, 2, 3, 4, 1, 2, 3, 4); 3030 vint8 b2(1, 0, 0, 0, 1, 0, 0, 0); 3031 vmask8 r2 = a2 != b2; 3032 EXPECT_EQ(0xEEu, mask(r2)); 3033 EXPECT_EQ(true, any(r2)); 3034 EXPECT_EQ(false, all(r2)); 3035 3036 vint8 a3(1, 2, 3, 4, 1, 2, 3, 4); 3037 vint8 b3(1, 0, 3, 0, 1, 0, 3, 0); 3038 vmask8 r3 = a3 != b3; 3039 EXPECT_EQ(0xAAu, mask(r3)); 3040 EXPECT_EQ(true, any(r3)); 3041 EXPECT_EQ(false, all(r3)); 3042 3043 vint8 a4(1, 2, 3, 4, 1, 2, 3, 4); 3044 vmask8 r4 = a4 != a4; 3045 EXPECT_EQ(0u, mask(r4)); 3046 EXPECT_EQ(false, any(r4)); 3047 EXPECT_EQ(false, all(r4)); 3048} 3049 3050/** @brief Test vint8 clt. */ 3051TEST(vint8, clt) 3052{ 3053 vint8 a(1, 2, 3, 4, 1, 2, 3, 4); 3054 vint8 b(0, 3, 3, 5, 0, 3, 3, 5); 3055 vmask8 r = a < b; 3056 EXPECT_EQ(0xAAu, mask(r)); 3057} 3058 3059/** @brief Test vint8 cgt. */ 3060TEST(vint8, cgt) 3061{ 3062 vint8 a(1, 2, 3, 4, 1, 2, 3, 4); 3063 vint8 b(0, 3, 3, 5, 0, 3, 3, 5); 3064 vmask8 r = a > b; 3065 EXPECT_EQ(0x11u, mask(r)); 3066} 3067 3068/** @brief Test vint8 min. */ 3069TEST(vint8, min) 3070{ 3071 vint8 a(1, 2, 3, 4, 1, 2, 3, 4); 3072 vint8 b(0, 3, 3, 5, 0, 3, 3, 5); 3073 vint8 r = min(a, b); 3074 EXPECT_EQ(r.lane<0>(), 0); 3075 EXPECT_EQ(r.lane<1>(), 2); 3076 EXPECT_EQ(r.lane<2>(), 3); 3077 EXPECT_EQ(r.lane<3>(), 4); 3078 EXPECT_EQ(r.lane<4>(), 0); 3079 EXPECT_EQ(r.lane<5>(), 2); 3080 EXPECT_EQ(r.lane<6>(), 3); 3081 EXPECT_EQ(r.lane<7>(), 4); 3082} 3083 3084/** @brief Test vint8 max. */ 3085TEST(vint8, max) 3086{ 3087 vint8 a(1, 2, 3, 4, 1, 2, 3, 4); 3088 vint8 b(0, 3, 3, 5, 0, 3, 3, 5); 3089 vint8 r = max(a, b); 3090 EXPECT_EQ(r.lane<0>(), 1); 3091 EXPECT_EQ(r.lane<1>(), 3); 3092 EXPECT_EQ(r.lane<2>(), 3); 3093 EXPECT_EQ(r.lane<3>(), 5); 3094 EXPECT_EQ(r.lane<4>(), 1); 3095 EXPECT_EQ(r.lane<5>(), 3); 3096 EXPECT_EQ(r.lane<6>(), 3); 3097 EXPECT_EQ(r.lane<7>(), 5); 3098} 3099 3100/** @brief Test vint8 lsl. */ 3101TEST(vint8, lsl) 3102{ 3103 vint8 a(1, 2, 4, -4, 1, 2, 4, -4); 3104 a = lsl<0>(a); 3105 EXPECT_EQ(a.lane<0>(), 1); 3106 EXPECT_EQ(a.lane<1>(), 2); 3107 EXPECT_EQ(a.lane<2>(), 4); 3108 EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFFC)); 3109 EXPECT_EQ(a.lane<4>(), 1); 3110 EXPECT_EQ(a.lane<5>(), 2); 3111 EXPECT_EQ(a.lane<6>(), 4); 3112 EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFFC)); 3113 3114 3115 a = lsl<1>(a); 3116 EXPECT_EQ(a.lane<0>(), 2); 3117 EXPECT_EQ(a.lane<1>(), 4); 3118 EXPECT_EQ(a.lane<2>(), 8); 3119 EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFF8)); 3120 EXPECT_EQ(a.lane<4>(), 2); 3121 EXPECT_EQ(a.lane<5>(), 4); 3122 EXPECT_EQ(a.lane<6>(), 8); 3123 EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFF8)); 3124 3125 a = lsl<2>(a); 3126 EXPECT_EQ(a.lane<0>(), 8); 3127 EXPECT_EQ(a.lane<1>(), 16); 3128 EXPECT_EQ(a.lane<2>(), 32); 3129 EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFE0)); 3130 EXPECT_EQ(a.lane<4>(), 8); 3131 EXPECT_EQ(a.lane<5>(), 16); 3132 EXPECT_EQ(a.lane<6>(), 32); 3133 EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFE0)); 3134} 3135 3136/** @brief Test vint8 lsr. */ 3137TEST(vint8, lsr) 3138{ 3139 vint8 a(1, 2, 4, -4, 1, 2, 4, -4); 3140 a = lsr<0>(a); 3141 EXPECT_EQ(a.lane<0>(), 1); 3142 EXPECT_EQ(a.lane<1>(), 2); 3143 EXPECT_EQ(a.lane<2>(), 4); 3144 EXPECT_EQ(a.lane<3>(), static_cast<int>(0xFFFFFFFC)); 3145 EXPECT_EQ(a.lane<4>(), 1); 3146 EXPECT_EQ(a.lane<5>(), 2); 3147 EXPECT_EQ(a.lane<6>(), 4); 3148 EXPECT_EQ(a.lane<7>(), static_cast<int>(0xFFFFFFFC)); 3149 3150 3151 a = lsr<1>(a); 3152 EXPECT_EQ(a.lane<0>(), 0); 3153 EXPECT_EQ(a.lane<1>(), 1); 3154 EXPECT_EQ(a.lane<2>(), 2); 3155 EXPECT_EQ(a.lane<3>(), 0x7FFFFFFE); 3156 EXPECT_EQ(a.lane<4>(), 0); 3157 EXPECT_EQ(a.lane<5>(), 1); 3158 EXPECT_EQ(a.lane<6>(), 2); 3159 EXPECT_EQ(a.lane<7>(), 0x7FFFFFFE); 3160 3161 a = lsr<2>(a); 3162 EXPECT_EQ(a.lane<0>(), 0); 3163 EXPECT_EQ(a.lane<1>(), 0); 3164 EXPECT_EQ(a.lane<2>(), 0); 3165 EXPECT_EQ(a.lane<3>(), 0x1FFFFFFF); 3166 EXPECT_EQ(a.lane<4>(), 0); 3167 EXPECT_EQ(a.lane<5>(), 0); 3168 EXPECT_EQ(a.lane<6>(), 0); 3169 EXPECT_EQ(a.lane<7>(), 0x1FFFFFFF); 3170} 3171 3172/** @brief Test vint8 asr. */ 3173TEST(vint8, asr) 3174{ 3175 vint8 a(1, 2, 4, -4, 1, 2, 4, -4); 3176 a = asr<0>(a); 3177 EXPECT_EQ(a.lane<0>(), 1); 3178 EXPECT_EQ(a.lane<1>(), 2); 3179 EXPECT_EQ(a.lane<2>(), 4); 3180 EXPECT_EQ(a.lane<3>(), -4); 3181 EXPECT_EQ(a.lane<4>(), 1); 3182 EXPECT_EQ(a.lane<5>(), 2); 3183 EXPECT_EQ(a.lane<6>(), 4); 3184 EXPECT_EQ(a.lane<7>(), -4); 3185 3186 a = asr<1>(a); 3187 EXPECT_EQ(a.lane<0>(), 0); 3188 EXPECT_EQ(a.lane<1>(), 1); 3189 EXPECT_EQ(a.lane<2>(), 2); 3190 EXPECT_EQ(a.lane<3>(), -2); 3191 EXPECT_EQ(a.lane<4>(), 0); 3192 EXPECT_EQ(a.lane<5>(), 1); 3193 EXPECT_EQ(a.lane<6>(), 2); 3194 EXPECT_EQ(a.lane<7>(), -2); 3195 3196 // Note - quirk of asr is that you will get "stuck" at -1 3197 a = asr<2>(a); 3198 EXPECT_EQ(a.lane<0>(), 0); 3199 EXPECT_EQ(a.lane<1>(), 0); 3200 EXPECT_EQ(a.lane<2>(), 0); 3201 EXPECT_EQ(a.lane<3>(), -1); 3202 EXPECT_EQ(a.lane<4>(), 0); 3203 EXPECT_EQ(a.lane<5>(), 0); 3204 EXPECT_EQ(a.lane<6>(), 0); 3205 EXPECT_EQ(a.lane<7>(), -1); 3206} 3207 3208/** @brief Test vint8 hmin. */ 3209TEST(vint8, hmin) 3210{ 3211 vint8 a1(1, 2, 1, 2, 1, 2, 1, 2); 3212 vint8 r1 = hmin(a1); 3213 EXPECT_EQ(r1.lane<0>(), 1); 3214 EXPECT_EQ(r1.lane<1>(), 1); 3215 EXPECT_EQ(r1.lane<2>(), 1); 3216 EXPECT_EQ(r1.lane<3>(), 1); 3217 EXPECT_EQ(r1.lane<4>(), 1); 3218 EXPECT_EQ(r1.lane<5>(), 1); 3219 EXPECT_EQ(r1.lane<6>(), 1); 3220 EXPECT_EQ(r1.lane<7>(), 1); 3221 3222 vint8 a2(1, 2, -1, 5, 1, 2, -1, 5); 3223 vint8 r2 = hmin(a2); 3224 EXPECT_EQ(r2.lane<0>(), -1); 3225 EXPECT_EQ(r2.lane<1>(), -1); 3226 EXPECT_EQ(r2.lane<2>(), -1); 3227 EXPECT_EQ(r2.lane<3>(), -1); 3228 EXPECT_EQ(r2.lane<4>(), -1); 3229 EXPECT_EQ(r2.lane<5>(), -1); 3230 EXPECT_EQ(r2.lane<6>(), -1); 3231 EXPECT_EQ(r2.lane<7>(), -1); 3232} 3233 3234/** @brief Test vint8 hmax. */ 3235TEST(vint8, hmax) 3236{ 3237 vint8 a1(1, 2, 1, 2, 1, 3, 1, 2); 3238 vint8 r1 = hmax(a1); 3239 EXPECT_EQ(r1.lane<0>(), 3); 3240 EXPECT_EQ(r1.lane<1>(), 3); 3241 EXPECT_EQ(r1.lane<2>(), 3); 3242 EXPECT_EQ(r1.lane<3>(), 3); 3243 EXPECT_EQ(r1.lane<4>(), 3); 3244 EXPECT_EQ(r1.lane<5>(), 3); 3245 EXPECT_EQ(r1.lane<6>(), 3); 3246 EXPECT_EQ(r1.lane<7>(), 3); 3247 3248 vint8 a2(1, 2, -1, 5, 1, 2, -1, 5); 3249 vint8 r2 = hmax(a2); 3250 EXPECT_EQ(r2.lane<0>(), 5); 3251 EXPECT_EQ(r2.lane<1>(), 5); 3252 EXPECT_EQ(r2.lane<2>(), 5); 3253 EXPECT_EQ(r2.lane<3>(), 5); 3254 EXPECT_EQ(r2.lane<4>(), 5); 3255 EXPECT_EQ(r2.lane<5>(), 5); 3256 EXPECT_EQ(r2.lane<6>(), 5); 3257 EXPECT_EQ(r2.lane<7>(), 5); 3258} 3259 3260/** @brief Test vint8 storea. */ 3261TEST(vint8, storea) 3262{ 3263 alignas(32) int out[8]; 3264 vint8 a(s32_data); 3265 storea(a, out); 3266 EXPECT_EQ(out[0], 0); 3267 EXPECT_EQ(out[1], 1); 3268 EXPECT_EQ(out[2], 2); 3269 EXPECT_EQ(out[3], 3); 3270 EXPECT_EQ(out[4], 4); 3271 EXPECT_EQ(out[5], 5); 3272 EXPECT_EQ(out[6], 6); 3273 EXPECT_EQ(out[7], 7); 3274} 3275 3276/** @brief Test vint8 store. */ 3277TEST(vint8, store) 3278{ 3279 alignas(32) int out[9]; 3280 vint8 a(s32_data); 3281 store(a, out + 1); 3282 EXPECT_EQ(out[1], 0); 3283 EXPECT_EQ(out[2], 1); 3284 EXPECT_EQ(out[3], 2); 3285 EXPECT_EQ(out[4], 3); 3286 EXPECT_EQ(out[5], 4); 3287 EXPECT_EQ(out[6], 5); 3288 EXPECT_EQ(out[7], 6); 3289 EXPECT_EQ(out[8], 7); 3290} 3291 3292/** @brief Test vint8 store_nbytes. */ 3293TEST(vint8, store_nbytes) 3294{ 3295 alignas(32) int out[2]; 3296 vint8 a(42, 314, 75, 90, 42, 314, 75, 90); 3297 store_nbytes(a, reinterpret_cast<uint8_t*>(&out)); 3298 EXPECT_EQ(out[0], 42); 3299 EXPECT_EQ(out[1], 314); 3300} 3301 3302/** @brief Test vint8 store_lanes_masked. */ 3303TEST(vint8, store_lanes_masked) 3304{ 3305 uint8_t resulta[32] { 0 }; 3306 3307 // Store nothing 3308 vmask8 mask1 = vint8(0) == vint8(1); 3309 vint8 data1 = vint8(1); 3310 3311 store_lanes_masked(resulta, data1, mask1); 3312 vint8 result1v = vint8::load(resulta); 3313 vint8 expect1v = vint8::zero(); 3314 EXPECT_TRUE(all(result1v == expect1v)); 3315 3316 // Store half 3317 vmask8 mask2 = vint8(1, 1, 1, 1, 0, 0, 0, 0) == vint8(1); 3318 vint8 data2 = vint8(2); 3319 3320 store_lanes_masked(resulta, data2, mask2); 3321 vint8 result2v = vint8::load(resulta); 3322 vint8 expect2v = vint8(2, 2, 2, 2, 0, 0, 0, 0); 3323 EXPECT_TRUE(all(result2v == expect2v)); 3324 3325 // Store all 3326 vmask8 mask3 = vint8(1) == vint8(1); 3327 vint8 data3 = vint8(3); 3328 3329 store_lanes_masked(resulta, data3, mask3); 3330 vint8 result3v = vint8::load(resulta); 3331 vint8 expect3v = vint8(3); 3332 EXPECT_TRUE(all(result3v == expect3v)); 3333} 3334 3335/** @brief Test vint8 store_lanes_masked to unaligned address. */ 3336TEST(vint8, store_lanes_masked_unaligned) 3337{ 3338 uint8_t resulta[33] { 0 }; 3339 3340 // Store nothing 3341 vmask8 mask1 = vint8(0) == vint8(1); 3342 vint8 data1 = vint8(1); 3343 3344 store_lanes_masked(resulta + 1, data1, mask1); 3345 vint8 result1v = vint8::load(resulta + 1); 3346 vint8 expect1v = vint8::zero(); 3347 EXPECT_TRUE(all(result1v == expect1v)); 3348 3349 // Store half 3350 vmask8 mask2 = vint8(1, 1, 1, 1, 0, 0, 0, 0) == vint8(1); 3351 vint8 data2 = vint8(2); 3352 3353 store_lanes_masked(resulta + 1, data2, mask2); 3354 vint8 result2v = vint8::load(resulta + 1); 3355 vint8 expect2v = vint8(2, 2, 2, 2, 0, 0, 0, 0); 3356 EXPECT_TRUE(all(result2v == expect2v)); 3357 3358 // Store all 3359 vmask8 mask3 = vint8(1) == vint8(1); 3360 vint8 data3 = vint8(3); 3361 3362 store_lanes_masked(resulta + 1, data3, mask3); 3363 vint8 result3v = vint8::load(resulta + 1); 3364 vint8 expect3v = vint8(3); 3365 EXPECT_TRUE(all(result3v == expect3v)); 3366} 3367 3368/** @brief Test vint8 gatheri. */ 3369TEST(vint8, gatheri) 3370{ 3371 vint8 indices(0, 4, 3, 2, 7, 4, 3, 2); 3372 vint8 r = gatheri(s32_data, indices); 3373 EXPECT_EQ(r.lane<0>(), 0); 3374 EXPECT_EQ(r.lane<1>(), 4); 3375 EXPECT_EQ(r.lane<2>(), 3); 3376 EXPECT_EQ(r.lane<3>(), 2); 3377 EXPECT_EQ(r.lane<4>(), 7); 3378 EXPECT_EQ(r.lane<5>(), 4); 3379 EXPECT_EQ(r.lane<6>(), 3); 3380 EXPECT_EQ(r.lane<7>(), 2); 3381} 3382 3383/** @brief Test vint8 pack_low_bytes. */ 3384TEST(vint8, pack_low_bytes) 3385{ 3386 vint8 a(1, 2, 3, 4, 2, 3, 4, 5); 3387 vint8 r = pack_low_bytes(a); 3388 EXPECT_EQ(r.lane<0>(), (4 << 24) | (3 << 16) | (2 << 8) | (1 << 0)); 3389 EXPECT_EQ(r.lane<1>(), (5 << 24) | (4 << 16) | (3 << 8) | (2 << 0)); 3390} 3391 3392/** @brief Test vint8 select. */ 3393TEST(vint8, select) 3394{ 3395 vint8 m1(1, 1, 1, 1, 1, 1, 1, 1); 3396 vint8 m2(1, 2, 1, 2, 1, 2, 1, 2); 3397 vmask8 cond = m1 == m2; 3398 3399 vint8 a(1, 3, 3, 1, 1, 3, 3, 1); 3400 vint8 b(4, 2, 2, 4, 4, 2, 2, 4); 3401 3402 vint8 r1 = select(a, b, cond); 3403 EXPECT_EQ(r1.lane<0>(), 4); 3404 EXPECT_EQ(r1.lane<1>(), 3); 3405 EXPECT_EQ(r1.lane<2>(), 2); 3406 EXPECT_EQ(r1.lane<3>(), 1); 3407 EXPECT_EQ(r1.lane<4>(), 4); 3408 EXPECT_EQ(r1.lane<5>(), 3); 3409 EXPECT_EQ(r1.lane<6>(), 2); 3410 EXPECT_EQ(r1.lane<7>(), 1); 3411 3412 vint8 r2 = select(b, a, cond); 3413 EXPECT_EQ(r2.lane<0>(), 1); 3414 EXPECT_EQ(r2.lane<1>(), 2); 3415 EXPECT_EQ(r2.lane<2>(), 3); 3416 EXPECT_EQ(r2.lane<3>(), 4); 3417 EXPECT_EQ(r2.lane<4>(), 1); 3418 EXPECT_EQ(r2.lane<5>(), 2); 3419 EXPECT_EQ(r2.lane<6>(), 3); 3420 EXPECT_EQ(r2.lane<7>(), 4); 3421} 3422 3423// vmask8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 3424 3425/** @brief Test vmask8 scalar literal constructor. */ 3426TEST(vmask8, scalar_literal_construct) 3427{ 3428 vfloat8 ma(0.0f); 3429 vfloat8 mb(1.0f); 3430 3431 vmask8 m1(true); 3432 vfloat8 r1 = select(ma, mb, m1); 3433 vmask8 rm1 = r1 == mb; 3434 EXPECT_EQ(all(rm1), true); 3435 3436 vmask8 m2(false); 3437 vfloat8 r2 = select(ma, mb, m2); 3438 vmask8 rm2 = r2 == mb; 3439 EXPECT_EQ(any(rm2), false); 3440} 3441 3442/** @brief Test vmask8 or. */ 3443TEST(vmask8, or) 3444{ 3445 vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1); 3446 vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1); 3447 vmask8 m1 = m1a == m1b; 3448 3449 vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0); 3450 vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1); 3451 vmask8 m2 = m2a == m2b; 3452 3453 vmask8 r = m1 | m2; 3454 EXPECT_EQ(mask(r), 0xBBu); 3455} 3456 3457/** @brief Test vmask8 and. */ 3458TEST(vmask8, and) 3459{ 3460 vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1); 3461 vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1); 3462 vmask8 m1 = m1a == m1b; 3463 3464 vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0); 3465 vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1); 3466 vmask8 m2 = m2a == m2b; 3467 3468 vmask8 r = m1 & m2; 3469 EXPECT_EQ(mask(r), 0x22u); 3470} 3471 3472/** @brief Test vmask8 xor. */ 3473TEST(vmask8, xor) 3474{ 3475 vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1); 3476 vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1); 3477 vmask8 m1 = m1a == m1b; 3478 3479 vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0); 3480 vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1); 3481 vmask8 m2 = m2a == m2b; 3482 3483 vmask8 r = m1 ^ m2; 3484 EXPECT_EQ(mask(r), 0x99u); 3485} 3486 3487/** @brief Test vmask8 not. */ 3488TEST(vmask8, not) 3489{ 3490 vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1); 3491 vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1); 3492 vmask8 m1 = m1a == m1b; 3493 vmask8 r = ~m1; 3494 EXPECT_EQ(mask(r), 0x55u); 3495} 3496 3497/** @brief Test vint8 table permute. */ 3498TEST(vint8, vtable_8bt_32bi_32entry) 3499{ 3500 vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); 3501 vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f); 3502 3503 vint8 table0p, table1p; 3504 vtable_prepare(table0, table1, table0p, table1p); 3505 3506 vint8 index(0, 7, 4, 15, 16, 20, 23, 31); 3507 3508 vint8 result = vtable_8bt_32bi(table0p, table1p, index); 3509 3510 EXPECT_EQ(result.lane<0>(), 3); 3511 EXPECT_EQ(result.lane<1>(), 4); 3512 EXPECT_EQ(result.lane<2>(), 7); 3513 EXPECT_EQ(result.lane<3>(), 12); 3514 EXPECT_EQ(result.lane<4>(), 19); 3515 EXPECT_EQ(result.lane<5>(), 23); 3516 EXPECT_EQ(result.lane<6>(), 20); 3517 EXPECT_EQ(result.lane<7>(), 28); 3518} 3519 3520/** @brief Test vint4 table permute. */ 3521TEST(vint8, vtable_8bt_32bi_64entry) 3522{ 3523 vint4 table0(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); 3524 vint4 table1(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f); 3525 vint4 table2(0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f); 3526 vint4 table3(0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f); 3527 3528 vint8 table0p, table1p, table2p, table3p; 3529 vtable_prepare(table0, table1, table2, table3, table0p, table1p, table2p, table3p); 3530 3531 vint8 index(0, 7, 4, 15, 16, 20, 38, 63); 3532 3533 vint8 result = vtable_8bt_32bi(table0p, table1p, table2p, table3p, index); 3534 3535 EXPECT_EQ(result.lane<0>(), 3); 3536 EXPECT_EQ(result.lane<1>(), 4); 3537 EXPECT_EQ(result.lane<2>(), 7); 3538 EXPECT_EQ(result.lane<3>(), 12); 3539 EXPECT_EQ(result.lane<4>(), 19); 3540 EXPECT_EQ(result.lane<5>(), 23); 3541 EXPECT_EQ(result.lane<6>(), 37); 3542 EXPECT_EQ(result.lane<7>(), 60); 3543} 3544 3545#endif 3546 3547} 3548