1cb93a386Sopenharmony_ci/* 2cb93a386Sopenharmony_ci * Copyright 2021 Google LLC 3cb93a386Sopenharmony_ci * 4cb93a386Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be 5cb93a386Sopenharmony_ci * found in the LICENSE file. 6cb93a386Sopenharmony_ci */ 7cb93a386Sopenharmony_ci 8cb93a386Sopenharmony_ci#include <algorithm> 9cb93a386Sopenharmony_ci#include <cmath> 10cb93a386Sopenharmony_ci#include <cstdio> 11cb93a386Sopenharmony_ci#include <cstdint> 12cb93a386Sopenharmony_ci 13cb93a386Sopenharmony_ci#include "experimental/lowp-basic/QMath.h" 14cb93a386Sopenharmony_ci 15cb93a386Sopenharmony_cistruct Stats { 16cb93a386Sopenharmony_ci int64_t diff_8_bits = 0; 17cb93a386Sopenharmony_ci int64_t max_diff = 0; 18cb93a386Sopenharmony_ci int64_t min_diff = 0; 19cb93a386Sopenharmony_ci int64_t total = 0; 20cb93a386Sopenharmony_ci 21cb93a386Sopenharmony_ci void log(int16_t golden, int16_t candidate) { 22cb93a386Sopenharmony_ci int64_t diff = candidate - golden; 23cb93a386Sopenharmony_ci max_diff = std::max(max_diff, diff); 24cb93a386Sopenharmony_ci min_diff = std::min(min_diff, diff); 25cb93a386Sopenharmony_ci diff_8_bits += candidate != golden; 26cb93a386Sopenharmony_ci total++; 27cb93a386Sopenharmony_ci } 28cb93a386Sopenharmony_ci 29cb93a386Sopenharmony_ci void print() const { 30cb93a386Sopenharmony_ci printf("8-bit diff: %lld - %g%%\n", diff_8_bits, 100.0 * diff_8_bits / total); 31cb93a386Sopenharmony_ci printf("differences min: %lld max: %lld\n", min_diff, max_diff); 32cb93a386Sopenharmony_ci printf("total: %lld\n", total); 33cb93a386Sopenharmony_ci } 34cb93a386Sopenharmony_ci}; 35cb93a386Sopenharmony_ci 36cb93a386Sopenharmony_ci// This has all kinds of rounding issues. 37cb93a386Sopenharmony_ci// TODO(herb): figure out rounding problems with this code. 38cb93a386Sopenharmony_cistatic float golden_bilerp(float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) { 39cb93a386Sopenharmony_ci return (1.0f-tx) * (1.0f-ty) * p00 40cb93a386Sopenharmony_ci + (1.0f-tx) * ty * p01 41cb93a386Sopenharmony_ci + (1.0f-ty) * tx * p10 42cb93a386Sopenharmony_ci + tx * ty * p11; 43cb93a386Sopenharmony_ci} 44cb93a386Sopenharmony_ci 45cb93a386Sopenharmony_cistatic double golden_bilerp2( 46cb93a386Sopenharmony_ci float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) { 47cb93a386Sopenharmony_ci // Double is needed to avoid rounding of lower bits. 48cb93a386Sopenharmony_ci double dtx(tx), dty(ty); 49cb93a386Sopenharmony_ci 50cb93a386Sopenharmony_ci double top = (1.0 - dtx) * p00 + dtx * p10; 51cb93a386Sopenharmony_ci double bottom = (1.0 - dtx) * p01 + dtx * p11; 52cb93a386Sopenharmony_ci 53cb93a386Sopenharmony_ci return (1.0 - dty) * top + dty * bottom; 54cb93a386Sopenharmony_ci} 55cb93a386Sopenharmony_ci 56cb93a386Sopenharmony_cistatic int16_t full_res_bilerp( 57cb93a386Sopenharmony_ci float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) { 58cb93a386Sopenharmony_ci int32_t ftx(floor(tx * 65536.0f + 0.5f)); 59cb93a386Sopenharmony_ci int64_t top = ftx * (p10 - p00) + 65536 * p00; 60cb93a386Sopenharmony_ci int64_t bottom = ftx * (p11 - p01) + 65536 * p01; 61cb93a386Sopenharmony_ci 62cb93a386Sopenharmony_ci int64_t fty(floor(ty * 65536.0f + 0.5f)); 63cb93a386Sopenharmony_ci int64_t temp = fty * (bottom - top) + top * 65536LL; 64cb93a386Sopenharmony_ci int64_t rounded = temp + (1LL << 31); 65cb93a386Sopenharmony_ci return rounded >> 32; 66cb93a386Sopenharmony_ci} 67cb93a386Sopenharmony_ci 68cb93a386Sopenharmony_ci 69cb93a386Sopenharmony_cistatic int16_t bilerp_1(float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) { 70cb93a386Sopenharmony_ci const int logPixelScale = 7; 71cb93a386Sopenharmony_ci const int16_t half = 1 << logPixelScale; 72cb93a386Sopenharmony_ci I16 qtx = floor(tx * 65536.0f - 32768.0f + 0.5f); 73cb93a386Sopenharmony_ci I16 qw = (p10 - p00) << logPixelScale; 74cb93a386Sopenharmony_ci U16 qm = (p10 + p00) << logPixelScale; 75cb93a386Sopenharmony_ci I16 top = (I16)((U16)(constrained_add(simulate_ssse3_mm_mulhrs_epi16(qtx, qw), qm) + 1) >> 1); 76cb93a386Sopenharmony_ci 77cb93a386Sopenharmony_ci qw = (p11 - p01) << logPixelScale; 78cb93a386Sopenharmony_ci qm = (p11 + p01) << logPixelScale; 79cb93a386Sopenharmony_ci I16 bottom = 80cb93a386Sopenharmony_ci (I16)((U16)(constrained_add(simulate_ssse3_mm_mulhrs_epi16(qtx, qw), qm) + 1) >> 1); 81cb93a386Sopenharmony_ci 82cb93a386Sopenharmony_ci I16 qty = floor(ty * 65536.0f - 32768.0f + 0.5f); 83cb93a386Sopenharmony_ci 84cb93a386Sopenharmony_ci qw = bottom - top; 85cb93a386Sopenharmony_ci qm = (U16)bottom + (U16)top; 86cb93a386Sopenharmony_ci U16 scaledAnswer = constrained_add(simulate_ssse3_mm_mulhrs_epi16(qty, qw), qm); 87cb93a386Sopenharmony_ci 88cb93a386Sopenharmony_ci return (scaledAnswer[0] + half) >> (logPixelScale + 1); 89cb93a386Sopenharmony_ci} 90cb93a386Sopenharmony_ci 91cb93a386Sopenharmony_citemplate <typename Bilerp> 92cb93a386Sopenharmony_cistatic Stats check_bilerp(Bilerp bilerp) { 93cb93a386Sopenharmony_ci Stats stats; 94cb93a386Sopenharmony_ci const int step = 1; 95cb93a386Sopenharmony_ci auto interesting = {0, 1, 2, 3, 4, 5, 6, 7, 8, 60, 61, 62, 63, 64, 65, 66, 67, 68, 124, 125, 96cb93a386Sopenharmony_ci 126, 127, 128, 129, 130, 131, 132, 188, 189, 190, 191, 192, 193, 194, 97cb93a386Sopenharmony_ci 195, 196, 248, 249, 250, 251, 252, 253, 254, 255}; 98cb93a386Sopenharmony_ci for (float tx : {0.0f, 0.25f, 0.5f, 0.75f, 1.0f - 1.0f/65536.0f}) 99cb93a386Sopenharmony_ci for (float ty : {0.0f, 0.25f, 0.5f, 0.75f, 1.0f - 1.0f/65536.0f}) 100cb93a386Sopenharmony_ci for (int p00 : interesting) 101cb93a386Sopenharmony_ci for (int p01 : interesting) 102cb93a386Sopenharmony_ci for (int p10 : interesting) 103cb93a386Sopenharmony_ci for (int p11 : interesting) { 104cb93a386Sopenharmony_ci // Having this be double causes the proper rounding. 105cb93a386Sopenharmony_ci double l = golden_bilerp2(tx, ty, p00, p10, p01, p11); 106cb93a386Sopenharmony_ci int16_t golden = floor(l + 0.5); 107cb93a386Sopenharmony_ci //l = golden_bilerp(tx, ty, p00, p10, p01, p11); 108cb93a386Sopenharmony_ci //int16_t golden2 = floor(l + 0.5f); 109cb93a386Sopenharmony_ci int16_t candidate = bilerp(tx, ty, p00, p10, p01, p11); 110cb93a386Sopenharmony_ci stats.log(golden, candidate); 111cb93a386Sopenharmony_ci } 112cb93a386Sopenharmony_ci return stats; 113cb93a386Sopenharmony_ci} 114cb93a386Sopenharmony_ci 115cb93a386Sopenharmony_ciint main() { 116cb93a386Sopenharmony_ci Stats stats; 117cb93a386Sopenharmony_ci 118cb93a386Sopenharmony_ci printf("\nUsing trunc_bilerp...\n"); 119cb93a386Sopenharmony_ci stats = check_bilerp(bilerp_1); 120cb93a386Sopenharmony_ci stats.print(); 121cb93a386Sopenharmony_ci 122cb93a386Sopenharmony_ci printf("\nUsing full_res_bilerp...\n"); 123cb93a386Sopenharmony_ci stats = check_bilerp(full_res_bilerp); 124cb93a386Sopenharmony_ci stats.print(); 125cb93a386Sopenharmony_ci 126cb93a386Sopenharmony_ci printf("Done.\n"); 127cb93a386Sopenharmony_ci return 0; 128cb93a386Sopenharmony_ci} 129