1cc1dc7a3Sopenharmony_ci// SPDX-License-Identifier: Apache-2.0
2cc1dc7a3Sopenharmony_ci// ----------------------------------------------------------------------------
3cc1dc7a3Sopenharmony_ci// Copyright 2023 Arm Limited
4cc1dc7a3Sopenharmony_ci//
5cc1dc7a3Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6cc1dc7a3Sopenharmony_ci// use this file except in compliance with the License. You may obtain a copy
7cc1dc7a3Sopenharmony_ci// of the License at:
8cc1dc7a3Sopenharmony_ci//
9cc1dc7a3Sopenharmony_ci//     http://www.apache.org/licenses/LICENSE-2.0
10cc1dc7a3Sopenharmony_ci//
11cc1dc7a3Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software
12cc1dc7a3Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13cc1dc7a3Sopenharmony_ci// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14cc1dc7a3Sopenharmony_ci// License for the specific language governing permissions and limitations
15cc1dc7a3Sopenharmony_ci// under the License.
16cc1dc7a3Sopenharmony_ci// ----------------------------------------------------------------------------
17cc1dc7a3Sopenharmony_ci
18cc1dc7a3Sopenharmony_ci// astcenc doesn't use the top 8 integer bits directly for sRGB RGB components
19cc1dc7a3Sopenharmony_ci// or when using the decode_unorm8 decode mode. An alterantive is used which
20cc1dc7a3Sopenharmony_ci// allows a common code path to be used. This test program shows that the two
21cc1dc7a3Sopenharmony_ci// produce equivalent output once rounded to a decode_unorm8 output.
22cc1dc7a3Sopenharmony_ci
23cc1dc7a3Sopenharmony_ci// Compile with e.g. clang++ astcenc_u8_test_bench.cpp -o astcenc_u8_test_bench -mavx2 -mf16c
24cc1dc7a3Sopenharmony_ci
25cc1dc7a3Sopenharmony_ci#define ASTCENC_AVX 2
26cc1dc7a3Sopenharmony_ci#define ASTCENC_F16C 1
27cc1dc7a3Sopenharmony_ci#define ASTCENC_SSE 41
28cc1dc7a3Sopenharmony_ci
29cc1dc7a3Sopenharmony_ci#include "../Source/astcenc_mathlib.cpp"
30cc1dc7a3Sopenharmony_ci#include "../Source/astcenc_color_unquantize.cpp"
31cc1dc7a3Sopenharmony_ci#include "../Source/astcenc_decompress_symbolic.cpp"
32cc1dc7a3Sopenharmony_ci
33cc1dc7a3Sopenharmony_ciint main()
34cc1dc7a3Sopenharmony_ci{
35cc1dc7a3Sopenharmony_ci    printf("Decode mode test bench\n");
36cc1dc7a3Sopenharmony_ci
37cc1dc7a3Sopenharmony_ci    for (int ep0 = 0; ep0 < 256; ep0++)
38cc1dc7a3Sopenharmony_ci    {
39cc1dc7a3Sopenharmony_ci        for (int ep1 = 0; ep1 < 256; ep1++)
40cc1dc7a3Sopenharmony_ci        {
41cc1dc7a3Sopenharmony_ci            for (int wt1 = 0; wt1 < 65; wt1++)
42cc1dc7a3Sopenharmony_ci            {
43cc1dc7a3Sopenharmony_ci                // Validate linear data with decode_unorm8 mode
44cc1dc7a3Sopenharmony_ci                {
45cc1dc7a3Sopenharmony_ci                    // Expand 8 bit to 16 bit
46cc1dc7a3Sopenharmony_ci                    vint4 weights(wt1);
47cc1dc7a3Sopenharmony_ci                    int ep0_v0 = ep0 * 257;
48cc1dc7a3Sopenharmony_ci                    int ep1_v0 = ep1 * 257;
49cc1dc7a3Sopenharmony_ci
50cc1dc7a3Sopenharmony_ci                    // Linear with decode_u8 handling
51cc1dc7a3Sopenharmony_ci                    vmask4 decode_u8_v0(true, true, true, true);
52cc1dc7a3Sopenharmony_ci                    vint4 ep0v0(ep0_v0, ep0_v0, ep0_v0, ep0_v0);
53cc1dc7a3Sopenharmony_ci                    vint4 ep1v0(ep1_v0, ep1_v0, ep1_v0, ep1_v0);
54cc1dc7a3Sopenharmony_ci
55cc1dc7a3Sopenharmony_ci                    // Linear without decode_u8 handling
56cc1dc7a3Sopenharmony_ci                    vmask4 decode_u8_v1(false, false, false, false);
57cc1dc7a3Sopenharmony_ci                    vint4 ep0v1(ep0_v0, ep0_v0, ep0_v0, ep0_v0);
58cc1dc7a3Sopenharmony_ci                    vint4 ep1v1(ep1_v0, ep1_v0, ep1_v0, ep1_v0);
59cc1dc7a3Sopenharmony_ci
60cc1dc7a3Sopenharmony_ci                    // Lerp both styles
61cc1dc7a3Sopenharmony_ci                    vint4 colorv0 = lerp_color_int(decode_u8_v0, ep0v0, ep1v0, weights);
62cc1dc7a3Sopenharmony_ci                    vint4 colorv1 = lerp_color_int(decode_u8_v1, ep0v1, ep1v1, weights);
63cc1dc7a3Sopenharmony_ci
64cc1dc7a3Sopenharmony_ci                    // Validate top 8 integer bits match in both cases
65cc1dc7a3Sopenharmony_ci                    //  - Shows that astcenc-style U8 doesn't differ from Khronos-style U8
66cc1dc7a3Sopenharmony_ci                    vint4 cs0 = lsr<8>(colorv0);
67cc1dc7a3Sopenharmony_ci                    vint4 cs1 = lsr<8>(colorv1);
68cc1dc7a3Sopenharmony_ci                    assert(cs0.lane<0>() == cs1.lane<0>());
69cc1dc7a3Sopenharmony_ci                    assert(cs0.lane<3>() == cs1.lane<3>());
70cc1dc7a3Sopenharmony_ci
71cc1dc7a3Sopenharmony_ci                    // Validate that astcenc output matches the top 8 integer bits
72cc1dc7a3Sopenharmony_ci                    vfloat4 colorv0f = decode_texel(colorv0, vmask4(false));
73cc1dc7a3Sopenharmony_ci                    vint4 colorv0_out = float_to_int_rtn(colorv0f * 255.0f);
74cc1dc7a3Sopenharmony_ci                    assert(colorv0_out.lane<0>() == cs0.lane<0>());
75cc1dc7a3Sopenharmony_ci                }
76cc1dc7a3Sopenharmony_ci
77cc1dc7a3Sopenharmony_ci                // Validate sRGB data with decode_unorm8 mode
78cc1dc7a3Sopenharmony_ci                {
79cc1dc7a3Sopenharmony_ci                    // Expand 8 bit to 16 bit
80cc1dc7a3Sopenharmony_ci                    vint4 weights(wt1);
81cc1dc7a3Sopenharmony_ci                    int ep0_v0s = (ep0 << 8) | 0x80;
82cc1dc7a3Sopenharmony_ci                    int ep1_v0s = (ep1 << 8) | 0x80;
83cc1dc7a3Sopenharmony_ci                    int ep0_v0 = ep0 * 257;
84cc1dc7a3Sopenharmony_ci                    int ep1_v0 = ep1 * 257;
85cc1dc7a3Sopenharmony_ci
86cc1dc7a3Sopenharmony_ci                    // sRGB RGB and linear A with decode_u8 handling
87cc1dc7a3Sopenharmony_ci                    vmask4 decode_u8_v0(true, true, true, true);
88cc1dc7a3Sopenharmony_ci                    vint4 ep0v0(ep0_v0s, ep0_v0s, ep0_v0s, ep0_v0);
89cc1dc7a3Sopenharmony_ci                    vint4 ep1v0(ep1_v0s, ep1_v0s, ep1_v0s, ep1_v0);
90cc1dc7a3Sopenharmony_ci
91cc1dc7a3Sopenharmony_ci                    // sRGB RGB and linear A without decode_u8 handling
92cc1dc7a3Sopenharmony_ci                    vmask4 decode_u8_v1(false, false, false, false);
93cc1dc7a3Sopenharmony_ci                    vint4 ep0v1(ep0_v0s, ep0_v0s, ep0_v0s, ep0_v0);
94cc1dc7a3Sopenharmony_ci                    vint4 ep1v1(ep1_v0s, ep1_v0s, ep1_v0s, ep1_v0);
95cc1dc7a3Sopenharmony_ci
96cc1dc7a3Sopenharmony_ci                    // Lerp both styles
97cc1dc7a3Sopenharmony_ci                    vint4 colorv0 = lerp_color_int(decode_u8_v0, ep0v0, ep1v0, weights);
98cc1dc7a3Sopenharmony_ci                    vint4 colorv1 = lerp_color_int(decode_u8_v1, ep0v1, ep1v1, weights);
99cc1dc7a3Sopenharmony_ci
100cc1dc7a3Sopenharmony_ci                    // Validate top 8 integer bits match in both cases
101cc1dc7a3Sopenharmony_ci                    //  - Shows that astcenc-style U8 doesn't differ from Khronos-style U8
102cc1dc7a3Sopenharmony_ci                    vint4 cs0 = lsr<8>(colorv0);
103cc1dc7a3Sopenharmony_ci                    vint4 cs1 = lsr<8>(colorv1);
104cc1dc7a3Sopenharmony_ci                    assert(cs0.lane<0>() == cs1.lane<0>());
105cc1dc7a3Sopenharmony_ci                    assert(cs0.lane<3>() == cs1.lane<3>());
106cc1dc7a3Sopenharmony_ci
107cc1dc7a3Sopenharmony_ci                    // Validate that astcenc output matches the top 8 integer bits
108cc1dc7a3Sopenharmony_ci                    vfloat4 colorv0f = decode_texel(colorv0, vmask4(false));
109cc1dc7a3Sopenharmony_ci                    vint4 colorv0_out = float_to_int_rtn(colorv0f * 255.0f);
110cc1dc7a3Sopenharmony_ci                    assert(colorv0_out.lane<0>() == cs0.lane<0>());
111cc1dc7a3Sopenharmony_ci                }
112cc1dc7a3Sopenharmony_ci            }
113cc1dc7a3Sopenharmony_ci        }
114cc1dc7a3Sopenharmony_ci    }
115cc1dc7a3Sopenharmony_ci
116cc1dc7a3Sopenharmony_ci    return 0;
117cc1dc7a3Sopenharmony_ci}
118