1/* 2 * Copyright 2021 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include <algorithm> 9#include <cmath> 10#include <cstdio> 11#include <cstdint> 12 13#include "experimental/lowp-basic/QMath.h" 14 15struct Stats { 16 int diff_8_bits = 0; 17 int max_diff = 0; 18 int min_diff = 0; 19 int64_t total = 0; 20 21 void log(int16_t golden, int16_t candidate) { 22 int diff = candidate - golden; 23 max_diff = std::max(max_diff, diff); 24 min_diff = std::min(min_diff, diff); 25 diff_8_bits += candidate != golden; 26 total++; 27 } 28 29 void print() const { 30 printf("8-bit diff: %d - %g%%\n", diff_8_bits, 100.0 * diff_8_bits / total); 31 printf("differences min: %d max: %d\n", min_diff, max_diff); 32 printf("total: %lld\n", total); 33 } 34}; 35 36static float golden_lerp(float t, int16_t a, int16_t b) { 37 return (1.0f - t) * a + t * b; 38} 39 40template <int logPixelScale> 41static int16_t saturating_lerp(float t, int16_t a, int16_t b) { 42 const int16_t half = 1 << (logPixelScale - 1); 43 Q15 qt(floor(t * 32768.f + 0.5f)); 44 Q15 qa(a << logPixelScale); 45 Q15 qb(b << logPixelScale); 46 47 Q15 answer = simulate_neon_vqrdmulhq_s16(qt, qb - qa) + qa; 48 return (answer[0] + half) >> logPixelScale; 49} 50 51template <int logPixelScale> 52static int16_t ssse3_lerp(float t, int16_t a, int16_t b) { 53 const int16_t half = 1 << (logPixelScale - 1); 54 Q15 qt(floor(t * 32768.f + 0.5f)); 55 Q15 qa(a << logPixelScale); 56 Q15 qb(b << logPixelScale); 57 58 Q15 answer = simulate_ssse3_mm_mulhrs_epi16(qt, qb - qa) + qa; 59 return (answer[0] + half) >> logPixelScale; 60} 61 62static int16_t full_res_lerp(float t, int16_t a, int16_t b) { 63 int32_t ft(floor(t * 65536.0f + 0.5f)); 64 65 int32_t temp = ft * (b - a) + a * 65536; 66 int32_t rounded = temp + 32768; 67 return rounded >> 16; 68} 69 70// Change of parameters on t from [0, 1) to [-1, 1). This cuts the number if differences in half. 71template <int logPixelScale> 72static int16_t balanced_lerp(float t, int16_t a, int16_t b) { 73 const int16_t half = 1 << logPixelScale; 74 // t on [-1, 1). 75 Q15 qt (floor(t * 65536.0f - 32768.0f + 0.5f)); 76 // need to pick logPixelScale to scale by addition 1/2. 77 Q15 qw ((b - a) << logPixelScale); 78 Q15 qm ((a + b) << logPixelScale); 79 Q15 answer = simulate_ssse3_mm_mulhrs_epi16(qt, qw) + qm; 80 // Extra shift to divide by 2. 81 return (answer[0] + half) >> (logPixelScale + 1); 82} 83 84template <typename Lerp> 85static Stats check_lerp(Lerp lerp) { 86 Stats stats; 87 for (float t = 0; t < 1.0f - 1.0f / 65536.0f ; t += 1.0f/65536.0f) 88 for (int a = 255; a >= 0; a--) 89 for (int b = 255; b >= 0; b--) { 90 float l = golden_lerp(t, a, b); 91 int16_t golden = floor(l + 0.5f); 92 int16_t candidate = lerp(t, a, b); 93 stats.log(golden, candidate); 94 } 95 return stats; 96} 97 98int main() { 99 Stats stats; 100 101 printf("\nUsing full_res_lerp...\n"); 102 stats = check_lerp(full_res_lerp); 103 stats.print(); 104 105 printf("\nUsing vqrdmulhq_s16...\n"); 106 stats = check_lerp(saturating_lerp<7>); 107 stats.print(); 108 109 printf("\nUsing mm_mulhrs_epi16...\n"); 110 stats = check_lerp(ssse3_lerp<7>); 111 stats.print(); 112 113 printf("\nInterval [-1, 1) mm_mulhrs_epi16...\n"); 114 stats = check_lerp(balanced_lerp<7>); 115 stats.print(); 116 117 printf("Done."); 118 return 0; 119} 120 121