1cc1dc7a3Sopenharmony_ci// SPDX-License-Identifier: Apache-2.0 2cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 3cc1dc7a3Sopenharmony_ci// Copyright 2023 Arm Limited 4cc1dc7a3Sopenharmony_ci// 5cc1dc7a3Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); you may not 6cc1dc7a3Sopenharmony_ci// use this file except in compliance with the License. You may obtain a copy 7cc1dc7a3Sopenharmony_ci// of the License at: 8cc1dc7a3Sopenharmony_ci// 9cc1dc7a3Sopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0 10cc1dc7a3Sopenharmony_ci// 11cc1dc7a3Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software 12cc1dc7a3Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13cc1dc7a3Sopenharmony_ci// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14cc1dc7a3Sopenharmony_ci// License for the specific language governing permissions and limitations 15cc1dc7a3Sopenharmony_ci// under the License. 16cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 17cc1dc7a3Sopenharmony_ci 18cc1dc7a3Sopenharmony_ci// astcenc doesn't use the top 8 integer bits directly for sRGB RGB components 19cc1dc7a3Sopenharmony_ci// or when using the decode_unorm8 decode mode. An alterantive is used which 20cc1dc7a3Sopenharmony_ci// allows a common code path to be used. This test program shows that the two 21cc1dc7a3Sopenharmony_ci// produce equivalent output once rounded to a decode_unorm8 output. 22cc1dc7a3Sopenharmony_ci 23cc1dc7a3Sopenharmony_ci// Compile with e.g. clang++ astcenc_u8_test_bench.cpp -o astcenc_u8_test_bench -mavx2 -mf16c 24cc1dc7a3Sopenharmony_ci 25cc1dc7a3Sopenharmony_ci#define ASTCENC_AVX 2 26cc1dc7a3Sopenharmony_ci#define ASTCENC_F16C 1 27cc1dc7a3Sopenharmony_ci#define ASTCENC_SSE 41 28cc1dc7a3Sopenharmony_ci 29cc1dc7a3Sopenharmony_ci#include "../Source/astcenc_mathlib.cpp" 30cc1dc7a3Sopenharmony_ci#include "../Source/astcenc_color_unquantize.cpp" 31cc1dc7a3Sopenharmony_ci#include "../Source/astcenc_decompress_symbolic.cpp" 32cc1dc7a3Sopenharmony_ci 33cc1dc7a3Sopenharmony_ciint main() 34cc1dc7a3Sopenharmony_ci{ 35cc1dc7a3Sopenharmony_ci printf("Decode mode test bench\n"); 36cc1dc7a3Sopenharmony_ci 37cc1dc7a3Sopenharmony_ci for (int ep0 = 0; ep0 < 256; ep0++) 38cc1dc7a3Sopenharmony_ci { 39cc1dc7a3Sopenharmony_ci for (int ep1 = 0; ep1 < 256; ep1++) 40cc1dc7a3Sopenharmony_ci { 41cc1dc7a3Sopenharmony_ci for (int wt1 = 0; wt1 < 65; wt1++) 42cc1dc7a3Sopenharmony_ci { 43cc1dc7a3Sopenharmony_ci // Validate linear data with decode_unorm8 mode 44cc1dc7a3Sopenharmony_ci { 45cc1dc7a3Sopenharmony_ci // Expand 8 bit to 16 bit 46cc1dc7a3Sopenharmony_ci vint4 weights(wt1); 47cc1dc7a3Sopenharmony_ci int ep0_v0 = ep0 * 257; 48cc1dc7a3Sopenharmony_ci int ep1_v0 = ep1 * 257; 49cc1dc7a3Sopenharmony_ci 50cc1dc7a3Sopenharmony_ci // Linear with decode_u8 handling 51cc1dc7a3Sopenharmony_ci vmask4 decode_u8_v0(true, true, true, true); 52cc1dc7a3Sopenharmony_ci vint4 ep0v0(ep0_v0, ep0_v0, ep0_v0, ep0_v0); 53cc1dc7a3Sopenharmony_ci vint4 ep1v0(ep1_v0, ep1_v0, ep1_v0, ep1_v0); 54cc1dc7a3Sopenharmony_ci 55cc1dc7a3Sopenharmony_ci // Linear without decode_u8 handling 56cc1dc7a3Sopenharmony_ci vmask4 decode_u8_v1(false, false, false, false); 57cc1dc7a3Sopenharmony_ci vint4 ep0v1(ep0_v0, ep0_v0, ep0_v0, ep0_v0); 58cc1dc7a3Sopenharmony_ci vint4 ep1v1(ep1_v0, ep1_v0, ep1_v0, ep1_v0); 59cc1dc7a3Sopenharmony_ci 60cc1dc7a3Sopenharmony_ci // Lerp both styles 61cc1dc7a3Sopenharmony_ci vint4 colorv0 = lerp_color_int(decode_u8_v0, ep0v0, ep1v0, weights); 62cc1dc7a3Sopenharmony_ci vint4 colorv1 = lerp_color_int(decode_u8_v1, ep0v1, ep1v1, weights); 63cc1dc7a3Sopenharmony_ci 64cc1dc7a3Sopenharmony_ci // Validate top 8 integer bits match in both cases 65cc1dc7a3Sopenharmony_ci // - Shows that astcenc-style U8 doesn't differ from Khronos-style U8 66cc1dc7a3Sopenharmony_ci vint4 cs0 = lsr<8>(colorv0); 67cc1dc7a3Sopenharmony_ci vint4 cs1 = lsr<8>(colorv1); 68cc1dc7a3Sopenharmony_ci assert(cs0.lane<0>() == cs1.lane<0>()); 69cc1dc7a3Sopenharmony_ci assert(cs0.lane<3>() == cs1.lane<3>()); 70cc1dc7a3Sopenharmony_ci 71cc1dc7a3Sopenharmony_ci // Validate that astcenc output matches the top 8 integer bits 72cc1dc7a3Sopenharmony_ci vfloat4 colorv0f = decode_texel(colorv0, vmask4(false)); 73cc1dc7a3Sopenharmony_ci vint4 colorv0_out = float_to_int_rtn(colorv0f * 255.0f); 74cc1dc7a3Sopenharmony_ci assert(colorv0_out.lane<0>() == cs0.lane<0>()); 75cc1dc7a3Sopenharmony_ci } 76cc1dc7a3Sopenharmony_ci 77cc1dc7a3Sopenharmony_ci // Validate sRGB data with decode_unorm8 mode 78cc1dc7a3Sopenharmony_ci { 79cc1dc7a3Sopenharmony_ci // Expand 8 bit to 16 bit 80cc1dc7a3Sopenharmony_ci vint4 weights(wt1); 81cc1dc7a3Sopenharmony_ci int ep0_v0s = (ep0 << 8) | 0x80; 82cc1dc7a3Sopenharmony_ci int ep1_v0s = (ep1 << 8) | 0x80; 83cc1dc7a3Sopenharmony_ci int ep0_v0 = ep0 * 257; 84cc1dc7a3Sopenharmony_ci int ep1_v0 = ep1 * 257; 85cc1dc7a3Sopenharmony_ci 86cc1dc7a3Sopenharmony_ci // sRGB RGB and linear A with decode_u8 handling 87cc1dc7a3Sopenharmony_ci vmask4 decode_u8_v0(true, true, true, true); 88cc1dc7a3Sopenharmony_ci vint4 ep0v0(ep0_v0s, ep0_v0s, ep0_v0s, ep0_v0); 89cc1dc7a3Sopenharmony_ci vint4 ep1v0(ep1_v0s, ep1_v0s, ep1_v0s, ep1_v0); 90cc1dc7a3Sopenharmony_ci 91cc1dc7a3Sopenharmony_ci // sRGB RGB and linear A without decode_u8 handling 92cc1dc7a3Sopenharmony_ci vmask4 decode_u8_v1(false, false, false, false); 93cc1dc7a3Sopenharmony_ci vint4 ep0v1(ep0_v0s, ep0_v0s, ep0_v0s, ep0_v0); 94cc1dc7a3Sopenharmony_ci vint4 ep1v1(ep1_v0s, ep1_v0s, ep1_v0s, ep1_v0); 95cc1dc7a3Sopenharmony_ci 96cc1dc7a3Sopenharmony_ci // Lerp both styles 97cc1dc7a3Sopenharmony_ci vint4 colorv0 = lerp_color_int(decode_u8_v0, ep0v0, ep1v0, weights); 98cc1dc7a3Sopenharmony_ci vint4 colorv1 = lerp_color_int(decode_u8_v1, ep0v1, ep1v1, weights); 99cc1dc7a3Sopenharmony_ci 100cc1dc7a3Sopenharmony_ci // Validate top 8 integer bits match in both cases 101cc1dc7a3Sopenharmony_ci // - Shows that astcenc-style U8 doesn't differ from Khronos-style U8 102cc1dc7a3Sopenharmony_ci vint4 cs0 = lsr<8>(colorv0); 103cc1dc7a3Sopenharmony_ci vint4 cs1 = lsr<8>(colorv1); 104cc1dc7a3Sopenharmony_ci assert(cs0.lane<0>() == cs1.lane<0>()); 105cc1dc7a3Sopenharmony_ci assert(cs0.lane<3>() == cs1.lane<3>()); 106cc1dc7a3Sopenharmony_ci 107cc1dc7a3Sopenharmony_ci // Validate that astcenc output matches the top 8 integer bits 108cc1dc7a3Sopenharmony_ci vfloat4 colorv0f = decode_texel(colorv0, vmask4(false)); 109cc1dc7a3Sopenharmony_ci vint4 colorv0_out = float_to_int_rtn(colorv0f * 255.0f); 110cc1dc7a3Sopenharmony_ci assert(colorv0_out.lane<0>() == cs0.lane<0>()); 111cc1dc7a3Sopenharmony_ci } 112cc1dc7a3Sopenharmony_ci } 113cc1dc7a3Sopenharmony_ci } 114cc1dc7a3Sopenharmony_ci } 115cc1dc7a3Sopenharmony_ci 116cc1dc7a3Sopenharmony_ci return 0; 117cc1dc7a3Sopenharmony_ci} 118