1/*
2 * Copyright 2021 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include <cassert>
9#include <cstdio>
10#include <cstdint>
11#include "experimental/lowp-basic/QMath.h"
12
13// Compile for x86_64 + ssse3 with:
14//     c++ -O3 --std=c++17 -mssse3 experimental/lowp-basic/lowp_experiments.cpp -o lowp
15//
16// Compile for aarch64 with (Mac os):
17//    c++ -O3 --std=c++17 -arch arm64 experimental/lowp-basic/lowp_experiments.cpp  -o lowp
18//
19// View assembly:
20//    dumpobj -d lowp | less
21
22template <int N, typename T> using V = T __attribute__((ext_vector_type(N)));
23
24#if !defined(__clang__)
25    static_assert(false, "This only works on clang.");
26#endif
27
28#if defined(__SSSE3__)
29    #include <immintrin.h>
30#endif
31
32#if defined(__ARM_NEON)
33    // From section 5.5.5 of the ARM C Language Extensions (ACLE)
34    #include <arm_neon.h>
35#endif
36
37using Q15 = V<8, uint16_t>;
38
39#if defined(__SSSE3__)
40static void test_mm_mulhrs_epi16_simulation() {
41    for (int i = -32768; i < 32768; i++) {
42        for (int j = -32768; j < 32768; j++) {
43            Q15 a(i);
44            Q15 b(j);
45            Q15 simResult = simulate_ssse3_mm_mulhrs_epi16(a, b);
46            Q15 intrinsicResult = _mm_mulhrs_epi16(a, b);
47            for (int i = 0; i < 8; i++) {
48                if (simResult[i] != intrinsicResult[i]) {
49                    printf("simulate_ssse3_mm_mulhrs_epi16 broken\n");
50                    printf("i: %d, a: %hx b: %hx, intrinsic: %hx, sim: %hx\n",
51                           i, a[i], b[i], intrinsicResult[i], simResult[i]);
52                    exit(1);
53                }
54            }
55        }
56    }
57}
58
59// Use ssse3 to simulate saturating multiply on arm.
60static Q15 ssse3_vqrdmulhq_s16(Q15 a, Q15 b) {
61    constexpr Q15 limit(0x8000);
62    const Q15 product = _mm_mulhrs_epi16(a, b);
63    const Q15 eq = _mm_cmpeq_epi16(product, limit);
64    return _mm_xor_si128(eq, product);
65}
66
67static void test_ssse3_vqrdmulhq_s16() {
68    for (int i = -32768; i < 32768; i++) {
69        for (int j = -32768; j < 32768; j++) {
70            Q15 a(i);
71            Q15 b(j);
72            Q15 simResult = ssse3_vqrdmulhq_s16(a, b);
73            Q15 realVqrdmulhqS16 = simulate_neon_vqrdmulhq_s16(a, b);
74            for (int i = 0; i < 8; i++) {
75                if (simResult[i] != realVqrdmulhqS16[i]) {
76                    printf("simulating vqrdmulhq_s16 with ssse3 broken\n");
77                    printf("i: %d, a: %hx b: %hx, intrinsic: %hx, sim: %hx\n",
78                           i, a[i], b[i], realVqrdmulhqS16[i], simResult[i]);
79                    exit(1);
80                }
81            }
82        }
83    }
84}
85
86#endif
87
88#if defined(__ARM_NEON)
89static void test_neon_vqrdmulhq_s16_simulation() {
90    for (int i = -32768; i < 32768; i++) {
91        for (int j = -32768; j < 32768; j++) {
92            Q15 a(i);
93            Q15 b(j);
94            Q15 simResult = simulate_neon_vqrdmulhq_s16(a, b);
95            Q15 intrinsicResult = vqrdmulhq_s16(a, b);
96            for (int i = 0; i < 8; i++) {
97                if (simResult[i] != intrinsicResult[i]) {
98                    printf("simulate_neon_vqrdmulhq_s16 broken\n");
99                    printf("i: %d, a: %hx b: %hx, intrinsic: %hx, sim: %hx\n",
100                           i, a[i], b[i], intrinsicResult[i], simResult[i]);
101                    exit(1);
102                }
103            }
104        }
105    }
106}
107#endif
108
109int main() {
110    #if defined(__SSSE3__)
111        //test_mm_mulhrs_epi16_simulation();
112        test_ssse3_vqrdmulhq_s16();
113    #endif
114    #if defined(__ARM_NEON)
115        test_neon_vqrdmulhq_s16_simulation();
116    #endif
117    printf("Done.\n");
118    return 0;
119}
120