1/* 2 * Copyright 2021 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include <cassert> 9#include <cstdio> 10#include <cstdint> 11#include "experimental/lowp-basic/QMath.h" 12 13// Compile for x86_64 + ssse3 with: 14// c++ -O3 --std=c++17 -mssse3 experimental/lowp-basic/lowp_experiments.cpp -o lowp 15// 16// Compile for aarch64 with (Mac os): 17// c++ -O3 --std=c++17 -arch arm64 experimental/lowp-basic/lowp_experiments.cpp -o lowp 18// 19// View assembly: 20// dumpobj -d lowp | less 21 22template <int N, typename T> using V = T __attribute__((ext_vector_type(N))); 23 24#if !defined(__clang__) 25 static_assert(false, "This only works on clang."); 26#endif 27 28#if defined(__SSSE3__) 29 #include <immintrin.h> 30#endif 31 32#if defined(__ARM_NEON) 33 // From section 5.5.5 of the ARM C Language Extensions (ACLE) 34 #include <arm_neon.h> 35#endif 36 37using Q15 = V<8, uint16_t>; 38 39#if defined(__SSSE3__) 40static void test_mm_mulhrs_epi16_simulation() { 41 for (int i = -32768; i < 32768; i++) { 42 for (int j = -32768; j < 32768; j++) { 43 Q15 a(i); 44 Q15 b(j); 45 Q15 simResult = simulate_ssse3_mm_mulhrs_epi16(a, b); 46 Q15 intrinsicResult = _mm_mulhrs_epi16(a, b); 47 for (int i = 0; i < 8; i++) { 48 if (simResult[i] != intrinsicResult[i]) { 49 printf("simulate_ssse3_mm_mulhrs_epi16 broken\n"); 50 printf("i: %d, a: %hx b: %hx, intrinsic: %hx, sim: %hx\n", 51 i, a[i], b[i], intrinsicResult[i], simResult[i]); 52 exit(1); 53 } 54 } 55 } 56 } 57} 58 59// Use ssse3 to simulate saturating multiply on arm. 60static Q15 ssse3_vqrdmulhq_s16(Q15 a, Q15 b) { 61 constexpr Q15 limit(0x8000); 62 const Q15 product = _mm_mulhrs_epi16(a, b); 63 const Q15 eq = _mm_cmpeq_epi16(product, limit); 64 return _mm_xor_si128(eq, product); 65} 66 67static void test_ssse3_vqrdmulhq_s16() { 68 for (int i = -32768; i < 32768; i++) { 69 for (int j = -32768; j < 32768; j++) { 70 Q15 a(i); 71 Q15 b(j); 72 Q15 simResult = ssse3_vqrdmulhq_s16(a, b); 73 Q15 realVqrdmulhqS16 = simulate_neon_vqrdmulhq_s16(a, b); 74 for (int i = 0; i < 8; i++) { 75 if (simResult[i] != realVqrdmulhqS16[i]) { 76 printf("simulating vqrdmulhq_s16 with ssse3 broken\n"); 77 printf("i: %d, a: %hx b: %hx, intrinsic: %hx, sim: %hx\n", 78 i, a[i], b[i], realVqrdmulhqS16[i], simResult[i]); 79 exit(1); 80 } 81 } 82 } 83 } 84} 85 86#endif 87 88#if defined(__ARM_NEON) 89static void test_neon_vqrdmulhq_s16_simulation() { 90 for (int i = -32768; i < 32768; i++) { 91 for (int j = -32768; j < 32768; j++) { 92 Q15 a(i); 93 Q15 b(j); 94 Q15 simResult = simulate_neon_vqrdmulhq_s16(a, b); 95 Q15 intrinsicResult = vqrdmulhq_s16(a, b); 96 for (int i = 0; i < 8; i++) { 97 if (simResult[i] != intrinsicResult[i]) { 98 printf("simulate_neon_vqrdmulhq_s16 broken\n"); 99 printf("i: %d, a: %hx b: %hx, intrinsic: %hx, sim: %hx\n", 100 i, a[i], b[i], intrinsicResult[i], simResult[i]); 101 exit(1); 102 } 103 } 104 } 105 } 106} 107#endif 108 109int main() { 110 #if defined(__SSSE3__) 111 //test_mm_mulhrs_epi16_simulation(); 112 test_ssse3_vqrdmulhq_s16(); 113 #endif 114 #if defined(__ARM_NEON) 115 test_neon_vqrdmulhq_s16_simulation(); 116 #endif 117 printf("Done.\n"); 118 return 0; 119} 120