1cb93a386Sopenharmony_ci/*
2cb93a386Sopenharmony_ci * Copyright 2021 Google LLC
3cb93a386Sopenharmony_ci *
4cb93a386Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be
5cb93a386Sopenharmony_ci * found in the LICENSE file.
6cb93a386Sopenharmony_ci */
7cb93a386Sopenharmony_ci
8cb93a386Sopenharmony_ci#include <algorithm>
9cb93a386Sopenharmony_ci#include <cmath>
10cb93a386Sopenharmony_ci#include <cstdio>
11cb93a386Sopenharmony_ci#include <cstdint>
12cb93a386Sopenharmony_ci
13cb93a386Sopenharmony_ci#include "experimental/lowp-basic/QMath.h"
14cb93a386Sopenharmony_ci
15cb93a386Sopenharmony_cistruct Stats {
16cb93a386Sopenharmony_ci    int64_t diff_8_bits = 0;
17cb93a386Sopenharmony_ci    int64_t max_diff = 0;
18cb93a386Sopenharmony_ci    int64_t min_diff = 0;
19cb93a386Sopenharmony_ci    int64_t total = 0;
20cb93a386Sopenharmony_ci
21cb93a386Sopenharmony_ci    void log(int16_t golden, int16_t candidate) {
22cb93a386Sopenharmony_ci        int64_t diff = candidate - golden;
23cb93a386Sopenharmony_ci        max_diff = std::max(max_diff, diff);
24cb93a386Sopenharmony_ci        min_diff = std::min(min_diff, diff);
25cb93a386Sopenharmony_ci        diff_8_bits += candidate != golden;
26cb93a386Sopenharmony_ci        total++;
27cb93a386Sopenharmony_ci    }
28cb93a386Sopenharmony_ci
29cb93a386Sopenharmony_ci    void print() const {
30cb93a386Sopenharmony_ci        printf("8-bit diff: %lld - %g%%\n", diff_8_bits, 100.0 * diff_8_bits / total);
31cb93a386Sopenharmony_ci        printf("differences min: %lld max: %lld\n", min_diff, max_diff);
32cb93a386Sopenharmony_ci        printf("total: %lld\n", total);
33cb93a386Sopenharmony_ci    }
34cb93a386Sopenharmony_ci};
35cb93a386Sopenharmony_ci
36cb93a386Sopenharmony_ci// This has all kinds of rounding issues.
37cb93a386Sopenharmony_ci// TODO(herb): figure out rounding problems with this code.
38cb93a386Sopenharmony_cistatic float golden_bilerp(float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) {
39cb93a386Sopenharmony_ci    return (1.0f-tx) * (1.0f-ty) * p00
40cb93a386Sopenharmony_ci         + (1.0f-tx) * ty * p01
41cb93a386Sopenharmony_ci         + (1.0f-ty) * tx * p10
42cb93a386Sopenharmony_ci         + tx * ty * p11;
43cb93a386Sopenharmony_ci}
44cb93a386Sopenharmony_ci
45cb93a386Sopenharmony_cistatic double golden_bilerp2(
46cb93a386Sopenharmony_ci        float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) {
47cb93a386Sopenharmony_ci    // Double is needed to avoid rounding of lower bits.
48cb93a386Sopenharmony_ci    double dtx(tx), dty(ty);
49cb93a386Sopenharmony_ci
50cb93a386Sopenharmony_ci    double top = (1.0 - dtx) * p00 + dtx * p10;
51cb93a386Sopenharmony_ci    double bottom = (1.0 - dtx) * p01 + dtx * p11;
52cb93a386Sopenharmony_ci
53cb93a386Sopenharmony_ci    return (1.0 - dty) * top + dty * bottom;
54cb93a386Sopenharmony_ci}
55cb93a386Sopenharmony_ci
56cb93a386Sopenharmony_cistatic int16_t full_res_bilerp(
57cb93a386Sopenharmony_ci        float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) {
58cb93a386Sopenharmony_ci    int32_t ftx(floor(tx * 65536.0f + 0.5f));
59cb93a386Sopenharmony_ci    int64_t top = ftx * (p10 - p00) + 65536 * p00;
60cb93a386Sopenharmony_ci    int64_t bottom = ftx * (p11 - p01) + 65536 * p01;
61cb93a386Sopenharmony_ci
62cb93a386Sopenharmony_ci    int64_t fty(floor(ty * 65536.0f + 0.5f));
63cb93a386Sopenharmony_ci    int64_t temp = fty * (bottom - top) + top * 65536LL;
64cb93a386Sopenharmony_ci    int64_t rounded = temp + (1LL << 31);
65cb93a386Sopenharmony_ci    return rounded >> 32;
66cb93a386Sopenharmony_ci}
67cb93a386Sopenharmony_ci
68cb93a386Sopenharmony_ci
69cb93a386Sopenharmony_cistatic int16_t bilerp_1(float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) {
70cb93a386Sopenharmony_ci    const int logPixelScale = 7;
71cb93a386Sopenharmony_ci    const int16_t half = 1 << logPixelScale;
72cb93a386Sopenharmony_ci    I16 qtx = floor(tx * 65536.0f - 32768.0f + 0.5f);
73cb93a386Sopenharmony_ci    I16 qw = (p10 - p00) << logPixelScale;
74cb93a386Sopenharmony_ci    U16 qm = (p10 + p00) << logPixelScale;
75cb93a386Sopenharmony_ci    I16 top = (I16)((U16)(constrained_add(simulate_ssse3_mm_mulhrs_epi16(qtx, qw), qm) + 1) >> 1);
76cb93a386Sopenharmony_ci
77cb93a386Sopenharmony_ci    qw = (p11 - p01) << logPixelScale;
78cb93a386Sopenharmony_ci    qm = (p11 + p01) << logPixelScale;
79cb93a386Sopenharmony_ci    I16 bottom =
80cb93a386Sopenharmony_ci            (I16)((U16)(constrained_add(simulate_ssse3_mm_mulhrs_epi16(qtx, qw), qm) + 1) >> 1);
81cb93a386Sopenharmony_ci
82cb93a386Sopenharmony_ci    I16 qty = floor(ty * 65536.0f - 32768.0f + 0.5f);
83cb93a386Sopenharmony_ci
84cb93a386Sopenharmony_ci    qw = bottom - top;
85cb93a386Sopenharmony_ci    qm = (U16)bottom + (U16)top;
86cb93a386Sopenharmony_ci    U16 scaledAnswer = constrained_add(simulate_ssse3_mm_mulhrs_epi16(qty, qw), qm);
87cb93a386Sopenharmony_ci
88cb93a386Sopenharmony_ci    return (scaledAnswer[0] + half) >> (logPixelScale + 1);
89cb93a386Sopenharmony_ci}
90cb93a386Sopenharmony_ci
91cb93a386Sopenharmony_citemplate <typename Bilerp>
92cb93a386Sopenharmony_cistatic Stats check_bilerp(Bilerp bilerp) {
93cb93a386Sopenharmony_ci    Stats stats;
94cb93a386Sopenharmony_ci    const int step = 1;
95cb93a386Sopenharmony_ci    auto interesting = {0, 1, 2, 3, 4, 5, 6, 7, 8, 60, 61, 62, 63, 64, 65, 66, 67, 68, 124, 125,
96cb93a386Sopenharmony_ci                        126, 127, 128, 129, 130, 131, 132, 188, 189, 190, 191, 192, 193, 194,
97cb93a386Sopenharmony_ci                        195, 196, 248, 249, 250, 251, 252, 253, 254, 255};
98cb93a386Sopenharmony_ci    for (float tx : {0.0f, 0.25f, 0.5f, 0.75f, 1.0f - 1.0f/65536.0f})
99cb93a386Sopenharmony_ci    for (float ty : {0.0f, 0.25f, 0.5f, 0.75f, 1.0f - 1.0f/65536.0f})
100cb93a386Sopenharmony_ci    for (int p00 : interesting)
101cb93a386Sopenharmony_ci    for (int p01 : interesting)
102cb93a386Sopenharmony_ci    for (int p10 : interesting)
103cb93a386Sopenharmony_ci    for (int p11 : interesting) {
104cb93a386Sopenharmony_ci        // Having this be double causes the proper rounding.
105cb93a386Sopenharmony_ci        double l = golden_bilerp2(tx, ty, p00, p10, p01, p11);
106cb93a386Sopenharmony_ci        int16_t golden = floor(l + 0.5);
107cb93a386Sopenharmony_ci        //l = golden_bilerp(tx, ty, p00, p10, p01, p11);
108cb93a386Sopenharmony_ci        //int16_t golden2 = floor(l + 0.5f);
109cb93a386Sopenharmony_ci        int16_t candidate = bilerp(tx, ty, p00, p10, p01, p11);
110cb93a386Sopenharmony_ci        stats.log(golden, candidate);
111cb93a386Sopenharmony_ci    }
112cb93a386Sopenharmony_ci    return stats;
113cb93a386Sopenharmony_ci}
114cb93a386Sopenharmony_ci
115cb93a386Sopenharmony_ciint main() {
116cb93a386Sopenharmony_ci    Stats stats;
117cb93a386Sopenharmony_ci
118cb93a386Sopenharmony_ci    printf("\nUsing trunc_bilerp...\n");
119cb93a386Sopenharmony_ci    stats = check_bilerp(bilerp_1);
120cb93a386Sopenharmony_ci    stats.print();
121cb93a386Sopenharmony_ci
122cb93a386Sopenharmony_ci    printf("\nUsing full_res_bilerp...\n");
123cb93a386Sopenharmony_ci    stats = check_bilerp(full_res_bilerp);
124cb93a386Sopenharmony_ci    stats.print();
125cb93a386Sopenharmony_ci
126cb93a386Sopenharmony_ci    printf("Done.\n");
127cb93a386Sopenharmony_ci    return 0;
128cb93a386Sopenharmony_ci}
129