1cc1dc7a3Sopenharmony_ci// SPDX-License-Identifier: Apache-2.0 2cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 3cc1dc7a3Sopenharmony_ci// Copyright 2011-2022 Arm Limited 4cc1dc7a3Sopenharmony_ci// 5cc1dc7a3Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); you may not 6cc1dc7a3Sopenharmony_ci// use this file except in compliance with the License. You may obtain a copy 7cc1dc7a3Sopenharmony_ci// of the License at: 8cc1dc7a3Sopenharmony_ci// 9cc1dc7a3Sopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0 10cc1dc7a3Sopenharmony_ci// 11cc1dc7a3Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software 12cc1dc7a3Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13cc1dc7a3Sopenharmony_ci// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14cc1dc7a3Sopenharmony_ci// License for the specific language governing permissions and limitations 15cc1dc7a3Sopenharmony_ci// under the License. 16cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 17cc1dc7a3Sopenharmony_ci 18cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY) 19cc1dc7a3Sopenharmony_ci 20cc1dc7a3Sopenharmony_ci/** 21cc1dc7a3Sopenharmony_ci * @brief Functions for finding best endpoint format. 22cc1dc7a3Sopenharmony_ci * 23cc1dc7a3Sopenharmony_ci * We assume there are two independent sources of error in any given partition: 24cc1dc7a3Sopenharmony_ci * 25cc1dc7a3Sopenharmony_ci * - Encoding choice errors 26cc1dc7a3Sopenharmony_ci * - Quantization errors 27cc1dc7a3Sopenharmony_ci * 28cc1dc7a3Sopenharmony_ci * Encoding choice errors are caused by encoder decisions. For example: 29cc1dc7a3Sopenharmony_ci * 30cc1dc7a3Sopenharmony_ci * - Using luminance instead of separate RGB components. 31cc1dc7a3Sopenharmony_ci * - Using a constant 1.0 alpha instead of storing an alpha component. 32cc1dc7a3Sopenharmony_ci * - Using RGB+scale instead of storing two full RGB endpoints. 33cc1dc7a3Sopenharmony_ci * 34cc1dc7a3Sopenharmony_ci * Quantization errors occur due to the limited precision we use for storage. These errors generally 35cc1dc7a3Sopenharmony_ci * scale with quantization level, but are not actually independent of color encoding. In particular: 36cc1dc7a3Sopenharmony_ci * 37cc1dc7a3Sopenharmony_ci * - If we can use offset encoding then quantization error is halved. 38cc1dc7a3Sopenharmony_ci * - If we can use blue-contraction then quantization error for RG is halved. 39cc1dc7a3Sopenharmony_ci * - If we use HDR endpoints the quantization error is higher. 40cc1dc7a3Sopenharmony_ci * 41cc1dc7a3Sopenharmony_ci * Apart from these effects, we assume the error is proportional to the quantization step size. 42cc1dc7a3Sopenharmony_ci */ 43cc1dc7a3Sopenharmony_ci 44cc1dc7a3Sopenharmony_ci 45cc1dc7a3Sopenharmony_ci#include "astcenc_internal.h" 46cc1dc7a3Sopenharmony_ci#include "astcenc_vecmathlib.h" 47cc1dc7a3Sopenharmony_ci 48cc1dc7a3Sopenharmony_ci#include <assert.h> 49cc1dc7a3Sopenharmony_ci 50cc1dc7a3Sopenharmony_ci/** 51cc1dc7a3Sopenharmony_ci * @brief Compute the errors of the endpoint line options for one partition. 52cc1dc7a3Sopenharmony_ci * 53cc1dc7a3Sopenharmony_ci * Uncorrelated data assumes storing completely independent RGBA channels for each endpoint. Same 54cc1dc7a3Sopenharmony_ci * chroma data assumes storing RGBA endpoints which pass though the origin (LDR only). RGBL data 55cc1dc7a3Sopenharmony_ci * assumes storing RGB + lumashift (HDR only). Luminance error assumes storing RGB channels as a 56cc1dc7a3Sopenharmony_ci * single value. 57cc1dc7a3Sopenharmony_ci * 58cc1dc7a3Sopenharmony_ci * 59cc1dc7a3Sopenharmony_ci * @param pi The partition info data. 60cc1dc7a3Sopenharmony_ci * @param partition_index The partition index to compule the error for. 61cc1dc7a3Sopenharmony_ci * @param blk The image block. 62cc1dc7a3Sopenharmony_ci * @param uncor_pline The endpoint line assuming uncorrelated endpoints. 63cc1dc7a3Sopenharmony_ci * @param[out] uncor_err The computed error for the uncorrelated endpoint line. 64cc1dc7a3Sopenharmony_ci * @param samec_pline The endpoint line assuming the same chroma for both endpoints. 65cc1dc7a3Sopenharmony_ci * @param[out] samec_err The computed error for the uncorrelated endpoint line. 66cc1dc7a3Sopenharmony_ci * @param rgbl_pline The endpoint line assuming RGB + lumashift data. 67cc1dc7a3Sopenharmony_ci * @param[out] rgbl_err The computed error for the RGB + lumashift endpoint line. 68cc1dc7a3Sopenharmony_ci * @param l_pline The endpoint line assuming luminance data. 69cc1dc7a3Sopenharmony_ci * @param[out] l_err The computed error for the luminance endpoint line. 70cc1dc7a3Sopenharmony_ci * @param[out] a_drop_err The computed error for dropping the alpha component. 71cc1dc7a3Sopenharmony_ci */ 72cc1dc7a3Sopenharmony_cistatic void compute_error_squared_rgb_single_partition( 73cc1dc7a3Sopenharmony_ci const partition_info& pi, 74cc1dc7a3Sopenharmony_ci int partition_index, 75cc1dc7a3Sopenharmony_ci const image_block& blk, 76cc1dc7a3Sopenharmony_ci const processed_line3& uncor_pline, 77cc1dc7a3Sopenharmony_ci float& uncor_err, 78cc1dc7a3Sopenharmony_ci const processed_line3& samec_pline, 79cc1dc7a3Sopenharmony_ci float& samec_err, 80cc1dc7a3Sopenharmony_ci const processed_line3& rgbl_pline, 81cc1dc7a3Sopenharmony_ci float& rgbl_err, 82cc1dc7a3Sopenharmony_ci const processed_line3& l_pline, 83cc1dc7a3Sopenharmony_ci float& l_err, 84cc1dc7a3Sopenharmony_ci float& a_drop_err 85cc1dc7a3Sopenharmony_ci) { 86cc1dc7a3Sopenharmony_ci vfloat4 ews = blk.channel_weight; 87cc1dc7a3Sopenharmony_ci 88cc1dc7a3Sopenharmony_ci unsigned int texel_count = pi.partition_texel_count[partition_index]; 89cc1dc7a3Sopenharmony_ci const uint8_t* texel_indexes = pi.texels_of_partition[partition_index]; 90cc1dc7a3Sopenharmony_ci promise(texel_count > 0); 91cc1dc7a3Sopenharmony_ci 92cc1dc7a3Sopenharmony_ci vfloatacc a_drop_errv = vfloatacc::zero(); 93cc1dc7a3Sopenharmony_ci vfloat default_a(blk.get_default_alpha()); 94cc1dc7a3Sopenharmony_ci 95cc1dc7a3Sopenharmony_ci vfloatacc uncor_errv = vfloatacc::zero(); 96cc1dc7a3Sopenharmony_ci vfloat uncor_bs0(uncor_pline.bs.lane<0>()); 97cc1dc7a3Sopenharmony_ci vfloat uncor_bs1(uncor_pline.bs.lane<1>()); 98cc1dc7a3Sopenharmony_ci vfloat uncor_bs2(uncor_pline.bs.lane<2>()); 99cc1dc7a3Sopenharmony_ci 100cc1dc7a3Sopenharmony_ci vfloat uncor_amod0(uncor_pline.amod.lane<0>()); 101cc1dc7a3Sopenharmony_ci vfloat uncor_amod1(uncor_pline.amod.lane<1>()); 102cc1dc7a3Sopenharmony_ci vfloat uncor_amod2(uncor_pline.amod.lane<2>()); 103cc1dc7a3Sopenharmony_ci 104cc1dc7a3Sopenharmony_ci vfloatacc samec_errv = vfloatacc::zero(); 105cc1dc7a3Sopenharmony_ci vfloat samec_bs0(samec_pline.bs.lane<0>()); 106cc1dc7a3Sopenharmony_ci vfloat samec_bs1(samec_pline.bs.lane<1>()); 107cc1dc7a3Sopenharmony_ci vfloat samec_bs2(samec_pline.bs.lane<2>()); 108cc1dc7a3Sopenharmony_ci 109cc1dc7a3Sopenharmony_ci vfloatacc rgbl_errv = vfloatacc::zero(); 110cc1dc7a3Sopenharmony_ci vfloat rgbl_bs0(rgbl_pline.bs.lane<0>()); 111cc1dc7a3Sopenharmony_ci vfloat rgbl_bs1(rgbl_pline.bs.lane<1>()); 112cc1dc7a3Sopenharmony_ci vfloat rgbl_bs2(rgbl_pline.bs.lane<2>()); 113cc1dc7a3Sopenharmony_ci 114cc1dc7a3Sopenharmony_ci vfloat rgbl_amod0(rgbl_pline.amod.lane<0>()); 115cc1dc7a3Sopenharmony_ci vfloat rgbl_amod1(rgbl_pline.amod.lane<1>()); 116cc1dc7a3Sopenharmony_ci vfloat rgbl_amod2(rgbl_pline.amod.lane<2>()); 117cc1dc7a3Sopenharmony_ci 118cc1dc7a3Sopenharmony_ci vfloatacc l_errv = vfloatacc::zero(); 119cc1dc7a3Sopenharmony_ci vfloat l_bs0(l_pline.bs.lane<0>()); 120cc1dc7a3Sopenharmony_ci vfloat l_bs1(l_pline.bs.lane<1>()); 121cc1dc7a3Sopenharmony_ci vfloat l_bs2(l_pline.bs.lane<2>()); 122cc1dc7a3Sopenharmony_ci 123cc1dc7a3Sopenharmony_ci vfloat one_third(1/3.0f, 1/3.0f, 1/3.0f, 1/3.0f); 124cc1dc7a3Sopenharmony_ci vfloat uncor_errv0 = vfloat::zero(); 125cc1dc7a3Sopenharmony_ci vfloat uncor_errv1 = vfloat::zero(); 126cc1dc7a3Sopenharmony_ci vfloat uncor_errv2 = vfloat::zero(); 127cc1dc7a3Sopenharmony_ci vfloat samec_errv0 = vfloat::zero(); 128cc1dc7a3Sopenharmony_ci vfloat samec_errv1 = vfloat::zero(); 129cc1dc7a3Sopenharmony_ci vfloat samec_errv2 = vfloat::zero(); 130cc1dc7a3Sopenharmony_ci vfloat rgbl_errv0 = vfloat::zero(); 131cc1dc7a3Sopenharmony_ci vfloat rgbl_errv1 = vfloat::zero(); 132cc1dc7a3Sopenharmony_ci vfloat rgbl_errv2 = vfloat::zero(); 133cc1dc7a3Sopenharmony_ci vfloat l_errv0 = vfloat::zero(); 134cc1dc7a3Sopenharmony_ci vfloat l_errv1 = vfloat::zero(); 135cc1dc7a3Sopenharmony_ci vfloat l_errv2 = vfloat::zero(); 136cc1dc7a3Sopenharmony_ci 137cc1dc7a3Sopenharmony_ci unsigned int i = 0; 138cc1dc7a3Sopenharmony_ci for (; i + ASTCENC_SIMD_WIDTH <= texel_count; i += ASTCENC_SIMD_WIDTH) 139cc1dc7a3Sopenharmony_ci { 140cc1dc7a3Sopenharmony_ci#ifdef ASTCENC_USE_COMMON_GATHERF 141cc1dc7a3Sopenharmony_ci const uint8_t* tix = texel_indexes + i; 142cc1dc7a3Sopenharmony_ci#else 143cc1dc7a3Sopenharmony_ci vint tix(texel_indexes + i); 144cc1dc7a3Sopenharmony_ci#endif 145cc1dc7a3Sopenharmony_ci 146cc1dc7a3Sopenharmony_ci // Compute the error that arises from just ditching alpha 147cc1dc7a3Sopenharmony_ci vfloat data_a = gatherf(blk.data_a, tix); 148cc1dc7a3Sopenharmony_ci vfloat alpha_diff = data_a - default_a; 149cc1dc7a3Sopenharmony_ci alpha_diff = alpha_diff * alpha_diff; 150cc1dc7a3Sopenharmony_ci 151cc1dc7a3Sopenharmony_ci haccumulate(a_drop_errv, alpha_diff); 152cc1dc7a3Sopenharmony_ci 153cc1dc7a3Sopenharmony_ci vfloat data_r = gatherf(blk.data_r, tix); 154cc1dc7a3Sopenharmony_ci vfloat data_g = gatherf(blk.data_g, tix); 155cc1dc7a3Sopenharmony_ci vfloat data_b = gatherf(blk.data_b, tix); 156cc1dc7a3Sopenharmony_ci 157cc1dc7a3Sopenharmony_ci vfloat data_rgb_avg = (data_r + data_g + data_b) * one_third; 158cc1dc7a3Sopenharmony_ci vfloat data_rgb_0 = data_rgb_avg - data_r; 159cc1dc7a3Sopenharmony_ci vfloat data_rgb_1 = data_rgb_avg - data_g; 160cc1dc7a3Sopenharmony_ci vfloat data_rgb_2 = data_rgb_avg - data_b; 161cc1dc7a3Sopenharmony_ci 162cc1dc7a3Sopenharmony_ci // Compute uncorrelated error 163cc1dc7a3Sopenharmony_ci vfloat param = data_r * uncor_bs0 164cc1dc7a3Sopenharmony_ci + data_g * uncor_bs1 165cc1dc7a3Sopenharmony_ci + data_b * uncor_bs2; 166cc1dc7a3Sopenharmony_ci 167cc1dc7a3Sopenharmony_ci vfloat dist0 = (uncor_amod0 + param * uncor_bs0) - data_r; 168cc1dc7a3Sopenharmony_ci vfloat dist1 = (uncor_amod1 + param * uncor_bs1) - data_g; 169cc1dc7a3Sopenharmony_ci vfloat dist2 = (uncor_amod2 + param * uncor_bs2) - data_b; 170cc1dc7a3Sopenharmony_ci 171cc1dc7a3Sopenharmony_ci haccumulate(uncor_errv0, dist0 * dist0); 172cc1dc7a3Sopenharmony_ci haccumulate(uncor_errv1, dist1 * dist1); 173cc1dc7a3Sopenharmony_ci haccumulate(uncor_errv2, dist2 * dist2); 174cc1dc7a3Sopenharmony_ci 175cc1dc7a3Sopenharmony_ci // Compute same chroma error - no "amod", its always zero 176cc1dc7a3Sopenharmony_ci param = data_r * samec_bs0 177cc1dc7a3Sopenharmony_ci + data_g * samec_bs1 178cc1dc7a3Sopenharmony_ci + data_b * samec_bs2; 179cc1dc7a3Sopenharmony_ci 180cc1dc7a3Sopenharmony_ci dist0 = (param * samec_bs0) - data_r; 181cc1dc7a3Sopenharmony_ci dist1 = (param * samec_bs1) - data_g; 182cc1dc7a3Sopenharmony_ci dist2 = (param * samec_bs2) - data_b; 183cc1dc7a3Sopenharmony_ci 184cc1dc7a3Sopenharmony_ci haccumulate(uncor_errv0, dist0 * dist0); 185cc1dc7a3Sopenharmony_ci haccumulate(uncor_errv1, dist1 * dist1); 186cc1dc7a3Sopenharmony_ci haccumulate(uncor_errv2, dist2 * dist2); 187cc1dc7a3Sopenharmony_ci 188cc1dc7a3Sopenharmony_ci // Compute rgbl error 189cc1dc7a3Sopenharmony_ci dist0 = rgbl_amod0 + data_rgb_0; 190cc1dc7a3Sopenharmony_ci dist1 = rgbl_amod1 + data_rgb_1; 191cc1dc7a3Sopenharmony_ci dist2 = rgbl_amod2 + data_rgb_2; 192cc1dc7a3Sopenharmony_ci 193cc1dc7a3Sopenharmony_ci haccumulate(rgbl_errv0, dist0 * dist0); 194cc1dc7a3Sopenharmony_ci haccumulate(rgbl_errv1, dist1 * dist1); 195cc1dc7a3Sopenharmony_ci haccumulate(rgbl_errv2, dist2 * dist2); 196cc1dc7a3Sopenharmony_ci 197cc1dc7a3Sopenharmony_ci // Compute luma error - no "amod", its always zero 198cc1dc7a3Sopenharmony_ci dist0 = data_rgb_0; 199cc1dc7a3Sopenharmony_ci dist1 = data_rgb_1; 200cc1dc7a3Sopenharmony_ci dist2 = data_rgb_2; 201cc1dc7a3Sopenharmony_ci 202cc1dc7a3Sopenharmony_ci haccumulate(l_errv0, dist0 * dist0); 203cc1dc7a3Sopenharmony_ci haccumulate(l_errv1, dist1 * dist1); 204cc1dc7a3Sopenharmony_ci haccumulate(l_errv2, dist2 * dist2); 205cc1dc7a3Sopenharmony_ci } 206cc1dc7a3Sopenharmony_ci 207cc1dc7a3Sopenharmony_ci uncor_errv = uncor_errv0 * ews.lane<0>() + uncor_errv1 * ews.lane<1>() + uncor_errv2 * ews.lane<2>(); // channel 0,1,2 208cc1dc7a3Sopenharmony_ci samec_errv = samec_errv0 * ews.lane<0>() + samec_errv1 * ews.lane<1>() + samec_errv2 * ews.lane<2>(); // channel 0,1,2 209cc1dc7a3Sopenharmony_ci rgbl_errv = rgbl_errv0 * ews.lane<0>() + rgbl_errv1 * ews.lane<1>() + rgbl_errv2 * ews.lane<2>(); // channel 0,1,2 210cc1dc7a3Sopenharmony_ci l_errv = l_errv0 * ews.lane<0>() + l_errv1 * ews.lane<1>() + l_errv2 * ews.lane<2>(); // channel 0,1,2 211cc1dc7a3Sopenharmony_ci 212cc1dc7a3Sopenharmony_ci if (i < texel_count) 213cc1dc7a3Sopenharmony_ci { 214cc1dc7a3Sopenharmony_ci vint lane_ids = vint::lane_id() + i; 215cc1dc7a3Sopenharmony_ci vint tix(texel_indexes + i); 216cc1dc7a3Sopenharmony_ci 217cc1dc7a3Sopenharmony_ci vmask mask = lane_ids < vint(texel_count); 218cc1dc7a3Sopenharmony_ci lane_ids += vint(ASTCENC_SIMD_WIDTH); 219cc1dc7a3Sopenharmony_ci 220cc1dc7a3Sopenharmony_ci // Compute the error that arises from just ditching alpha 221cc1dc7a3Sopenharmony_ci vfloat data_a = gatherf(blk.data_a, tix); 222cc1dc7a3Sopenharmony_ci vfloat alpha_diff = data_a - default_a; 223cc1dc7a3Sopenharmony_ci alpha_diff = alpha_diff * alpha_diff; 224cc1dc7a3Sopenharmony_ci 225cc1dc7a3Sopenharmony_ci haccumulate(a_drop_errv, alpha_diff, mask); 226cc1dc7a3Sopenharmony_ci 227cc1dc7a3Sopenharmony_ci vfloat data_r = gatherf(blk.data_r, tix); 228cc1dc7a3Sopenharmony_ci vfloat data_g = gatherf(blk.data_g, tix); 229cc1dc7a3Sopenharmony_ci vfloat data_b = gatherf(blk.data_b, tix); 230cc1dc7a3Sopenharmony_ci 231cc1dc7a3Sopenharmony_ci vfloat data_rgb_avg = (data_r + data_g + data_b) * one_third; 232cc1dc7a3Sopenharmony_ci vfloat data_rgb_0 = data_rgb_avg - data_r; 233cc1dc7a3Sopenharmony_ci vfloat data_rgb_1 = data_rgb_avg - data_g; 234cc1dc7a3Sopenharmony_ci vfloat data_rgb_2 = data_rgb_avg - data_b; 235cc1dc7a3Sopenharmony_ci 236cc1dc7a3Sopenharmony_ci // Compute uncorrelated error 237cc1dc7a3Sopenharmony_ci vfloat param = data_r * uncor_bs0 238cc1dc7a3Sopenharmony_ci + data_g * uncor_bs1 239cc1dc7a3Sopenharmony_ci + data_b * uncor_bs2; 240cc1dc7a3Sopenharmony_ci 241cc1dc7a3Sopenharmony_ci vfloat dist0 = (uncor_amod0 + param * uncor_bs0) - data_r; 242cc1dc7a3Sopenharmony_ci vfloat dist1 = (uncor_amod1 + param * uncor_bs1) - data_g; 243cc1dc7a3Sopenharmony_ci vfloat dist2 = (uncor_amod2 + param * uncor_bs2) - data_b; 244cc1dc7a3Sopenharmony_ci 245cc1dc7a3Sopenharmony_ci vfloat error = dist0 * dist0 * ews.lane<0>() 246cc1dc7a3Sopenharmony_ci + dist1 * dist1 * ews.lane<1>() 247cc1dc7a3Sopenharmony_ci + dist2 * dist2 * ews.lane<2>(); 248cc1dc7a3Sopenharmony_ci 249cc1dc7a3Sopenharmony_ci haccumulate(uncor_errv, error, mask); 250cc1dc7a3Sopenharmony_ci 251cc1dc7a3Sopenharmony_ci // Compute same chroma error - no "amod", its always zero 252cc1dc7a3Sopenharmony_ci param = data_r * samec_bs0 253cc1dc7a3Sopenharmony_ci + data_g * samec_bs1 254cc1dc7a3Sopenharmony_ci + data_b * samec_bs2; 255cc1dc7a3Sopenharmony_ci 256cc1dc7a3Sopenharmony_ci dist0 = (param * samec_bs0) - data_r; 257cc1dc7a3Sopenharmony_ci dist1 = (param * samec_bs1) - data_g; 258cc1dc7a3Sopenharmony_ci dist2 = (param * samec_bs2) - data_b; 259cc1dc7a3Sopenharmony_ci 260cc1dc7a3Sopenharmony_ci error = dist0 * dist0 * ews.lane<0>() 261cc1dc7a3Sopenharmony_ci + dist1 * dist1 * ews.lane<1>() 262cc1dc7a3Sopenharmony_ci + dist2 * dist2 * ews.lane<2>(); 263cc1dc7a3Sopenharmony_ci 264cc1dc7a3Sopenharmony_ci haccumulate(samec_errv, error, mask); 265cc1dc7a3Sopenharmony_ci 266cc1dc7a3Sopenharmony_ci // Compute rgbl error 267cc1dc7a3Sopenharmony_ci dist0 = rgbl_amod0 + data_rgb_0; 268cc1dc7a3Sopenharmony_ci dist1 = rgbl_amod1 + data_rgb_1; 269cc1dc7a3Sopenharmony_ci dist2 = rgbl_amod2 + data_rgb_2; 270cc1dc7a3Sopenharmony_ci 271cc1dc7a3Sopenharmony_ci error = dist0 * dist0 * ews.lane<0>() 272cc1dc7a3Sopenharmony_ci + dist1 * dist1 * ews.lane<1>() 273cc1dc7a3Sopenharmony_ci + dist2 * dist2 * ews.lane<2>(); 274cc1dc7a3Sopenharmony_ci 275cc1dc7a3Sopenharmony_ci haccumulate(rgbl_errv, error, mask); 276cc1dc7a3Sopenharmony_ci 277cc1dc7a3Sopenharmony_ci // Compute luma error - no "amod", its always zero 278cc1dc7a3Sopenharmony_ci dist0 = data_rgb_0; 279cc1dc7a3Sopenharmony_ci dist1 = data_rgb_1; 280cc1dc7a3Sopenharmony_ci dist2 = data_rgb_2; 281cc1dc7a3Sopenharmony_ci 282cc1dc7a3Sopenharmony_ci error = dist0 * dist0 * ews.lane<0>() 283cc1dc7a3Sopenharmony_ci + dist1 * dist1 * ews.lane<1>() 284cc1dc7a3Sopenharmony_ci + dist2 * dist2 * ews.lane<2>(); 285cc1dc7a3Sopenharmony_ci 286cc1dc7a3Sopenharmony_ci haccumulate(l_errv, error, mask); 287cc1dc7a3Sopenharmony_ci } 288cc1dc7a3Sopenharmony_ci 289cc1dc7a3Sopenharmony_ci a_drop_err = hadd_s(a_drop_errv) * ews.lane<3>(); 290cc1dc7a3Sopenharmony_ci uncor_err = hadd_s(uncor_errv); 291cc1dc7a3Sopenharmony_ci samec_err = hadd_s(samec_errv); 292cc1dc7a3Sopenharmony_ci rgbl_err = hadd_s(rgbl_errv); 293cc1dc7a3Sopenharmony_ci l_err = hadd_s(l_errv); 294cc1dc7a3Sopenharmony_ci} 295cc1dc7a3Sopenharmony_ci 296cc1dc7a3Sopenharmony_ci/** 297cc1dc7a3Sopenharmony_ci * @brief For a given set of input colors and partitioning determine endpoint encode errors. 298cc1dc7a3Sopenharmony_ci * 299cc1dc7a3Sopenharmony_ci * This function determines the color error that results from RGB-scale encoding (LDR only), 300cc1dc7a3Sopenharmony_ci * RGB-lumashift encoding (HDR only), luminance-encoding, and alpha drop. Also determines whether 301cc1dc7a3Sopenharmony_ci * the endpoints are eligible for offset encoding or blue-contraction 302cc1dc7a3Sopenharmony_ci * 303cc1dc7a3Sopenharmony_ci * @param blk The image block. 304cc1dc7a3Sopenharmony_ci * @param pi The partition info data. 305cc1dc7a3Sopenharmony_ci * @param ep The idealized endpoints. 306cc1dc7a3Sopenharmony_ci * @param[out] eci The resulting encoding choice error metrics. 307cc1dc7a3Sopenharmony_ci */ 308cc1dc7a3Sopenharmony_cistatic void compute_encoding_choice_errors( 309cc1dc7a3Sopenharmony_ci QualityProfile privateProfile, 310cc1dc7a3Sopenharmony_ci const image_block& blk, 311cc1dc7a3Sopenharmony_ci const partition_info& pi, 312cc1dc7a3Sopenharmony_ci const endpoints& ep, 313cc1dc7a3Sopenharmony_ci encoding_choice_errors eci[BLOCK_MAX_PARTITIONS]) 314cc1dc7a3Sopenharmony_ci{ 315cc1dc7a3Sopenharmony_ci int partition_count = pi.partition_count; 316cc1dc7a3Sopenharmony_ci promise(partition_count > 0); 317cc1dc7a3Sopenharmony_ci 318cc1dc7a3Sopenharmony_ci partition_metrics *pms = reinterpret_cast<partition_metrics *>(&blk.pms[0]); 319cc1dc7a3Sopenharmony_ci 320cc1dc7a3Sopenharmony_ci if (!blk.is_constant_channel(3) || (partition_count != 1 && privateProfile == HIGH_QUALITY_PROFILE)) 321cc1dc7a3Sopenharmony_ci { 322cc1dc7a3Sopenharmony_ci compute_avgs_and_dirs_3_comp_rgb(pi, blk, pms); 323cc1dc7a3Sopenharmony_ci } 324cc1dc7a3Sopenharmony_ci 325cc1dc7a3Sopenharmony_ci for (int i = 0; i < partition_count; i++) 326cc1dc7a3Sopenharmony_ci { 327cc1dc7a3Sopenharmony_ci partition_metrics& pm = pms[i]; 328cc1dc7a3Sopenharmony_ci 329cc1dc7a3Sopenharmony_ci line3 uncor_rgb_lines; 330cc1dc7a3Sopenharmony_ci line3 samec_rgb_lines; // for LDR-RGB-scale 331cc1dc7a3Sopenharmony_ci line3 rgb_luma_lines; // for HDR-RGB-scale 332cc1dc7a3Sopenharmony_ci 333cc1dc7a3Sopenharmony_ci processed_line3 uncor_rgb_plines; 334cc1dc7a3Sopenharmony_ci processed_line3 samec_rgb_plines; 335cc1dc7a3Sopenharmony_ci processed_line3 rgb_luma_plines; 336cc1dc7a3Sopenharmony_ci processed_line3 luminance_plines; 337cc1dc7a3Sopenharmony_ci 338cc1dc7a3Sopenharmony_ci float uncorr_rgb_error; 339cc1dc7a3Sopenharmony_ci float samechroma_rgb_error; 340cc1dc7a3Sopenharmony_ci float rgb_luma_error; 341cc1dc7a3Sopenharmony_ci float luminance_rgb_error; 342cc1dc7a3Sopenharmony_ci float alpha_drop_error; 343cc1dc7a3Sopenharmony_ci 344cc1dc7a3Sopenharmony_ci uncor_rgb_lines.a = pm.avg; 345cc1dc7a3Sopenharmony_ci uncor_rgb_lines.b = normalize_safe(pm.dir, unit3()); 346cc1dc7a3Sopenharmony_ci 347cc1dc7a3Sopenharmony_ci samec_rgb_lines.a = vfloat4::zero(); 348cc1dc7a3Sopenharmony_ci samec_rgb_lines.b = normalize_safe(pm.avg, unit3()); 349cc1dc7a3Sopenharmony_ci 350cc1dc7a3Sopenharmony_ci rgb_luma_lines.a = pm.avg; 351cc1dc7a3Sopenharmony_ci rgb_luma_lines.b = unit3(); 352cc1dc7a3Sopenharmony_ci 353cc1dc7a3Sopenharmony_ci uncor_rgb_plines.amod = uncor_rgb_lines.a - uncor_rgb_lines.b * dot3(uncor_rgb_lines.a, uncor_rgb_lines.b); 354cc1dc7a3Sopenharmony_ci uncor_rgb_plines.bs = uncor_rgb_lines.b; 355cc1dc7a3Sopenharmony_ci 356cc1dc7a3Sopenharmony_ci // Same chroma always goes though zero, so this is simpler than the others 357cc1dc7a3Sopenharmony_ci samec_rgb_plines.amod = vfloat4::zero(); 358cc1dc7a3Sopenharmony_ci samec_rgb_plines.bs = samec_rgb_lines.b; 359cc1dc7a3Sopenharmony_ci 360cc1dc7a3Sopenharmony_ci rgb_luma_plines.amod = rgb_luma_lines.a - rgb_luma_lines.b * dot3(rgb_luma_lines.a, rgb_luma_lines.b); 361cc1dc7a3Sopenharmony_ci rgb_luma_plines.bs = rgb_luma_lines.b; 362cc1dc7a3Sopenharmony_ci 363cc1dc7a3Sopenharmony_ci // Luminance always goes though zero, so this is simpler than the others 364cc1dc7a3Sopenharmony_ci luminance_plines.amod = vfloat4::zero(); 365cc1dc7a3Sopenharmony_ci luminance_plines.bs = unit3(); 366cc1dc7a3Sopenharmony_ci 367cc1dc7a3Sopenharmony_ci compute_error_squared_rgb_single_partition( 368cc1dc7a3Sopenharmony_ci pi, i, blk, 369cc1dc7a3Sopenharmony_ci uncor_rgb_plines, uncorr_rgb_error, 370cc1dc7a3Sopenharmony_ci samec_rgb_plines, samechroma_rgb_error, 371cc1dc7a3Sopenharmony_ci rgb_luma_plines, rgb_luma_error, 372cc1dc7a3Sopenharmony_ci luminance_plines, luminance_rgb_error, 373cc1dc7a3Sopenharmony_ci alpha_drop_error); 374cc1dc7a3Sopenharmony_ci 375cc1dc7a3Sopenharmony_ci // Determine if we can offset encode RGB lanes 376cc1dc7a3Sopenharmony_ci vfloat4 endpt0 = ep.endpt0[i]; 377cc1dc7a3Sopenharmony_ci vfloat4 endpt1 = ep.endpt1[i]; 378cc1dc7a3Sopenharmony_ci vfloat4 endpt_diff = abs(endpt1 - endpt0); 379cc1dc7a3Sopenharmony_ci vmask4 endpt_can_offset = endpt_diff < vfloat4(0.12f * 65535.0f); 380cc1dc7a3Sopenharmony_ci bool can_offset_encode = (mask(endpt_can_offset) & 0x7) == 0x7; 381cc1dc7a3Sopenharmony_ci 382cc1dc7a3Sopenharmony_ci // Store out the settings 383cc1dc7a3Sopenharmony_ci eci[i].rgb_scale_error = (samechroma_rgb_error - uncorr_rgb_error) * 0.7f; // empirical 384cc1dc7a3Sopenharmony_ci eci[i].rgb_luma_error = (rgb_luma_error - uncorr_rgb_error) * 1.5f; // wild guess 385cc1dc7a3Sopenharmony_ci eci[i].luminance_error = (luminance_rgb_error - uncorr_rgb_error) * 3.0f; // empirical 386cc1dc7a3Sopenharmony_ci eci[i].alpha_drop_error = alpha_drop_error * 3.0f; 387cc1dc7a3Sopenharmony_ci eci[i].can_offset_encode = can_offset_encode; 388cc1dc7a3Sopenharmony_ci eci[i].can_blue_contract = !blk.is_luminance(); 389cc1dc7a3Sopenharmony_ci } 390cc1dc7a3Sopenharmony_ci} 391cc1dc7a3Sopenharmony_ci 392cc1dc7a3Sopenharmony_ci/** 393cc1dc7a3Sopenharmony_ci * @brief For a given partition compute the error for every endpoint integer count and quant level. 394cc1dc7a3Sopenharmony_ci * 395cc1dc7a3Sopenharmony_ci * @param encode_hdr_rgb @c true if using HDR for RGB, @c false for LDR. 396cc1dc7a3Sopenharmony_ci * @param encode_hdr_alpha @c true if using HDR for alpha, @c false for LDR. 397cc1dc7a3Sopenharmony_ci * @param partition_index The partition index. 398cc1dc7a3Sopenharmony_ci * @param pi The partition info. 399cc1dc7a3Sopenharmony_ci * @param eci The encoding choice error metrics. 400cc1dc7a3Sopenharmony_ci * @param ep The idealized endpoints. 401cc1dc7a3Sopenharmony_ci * @param error_weight The resulting encoding choice error metrics. 402cc1dc7a3Sopenharmony_ci * @param[out] best_error The best error for each integer count and quant level. 403cc1dc7a3Sopenharmony_ci * @param[out] format_of_choice The preferred endpoint format for each integer count and quant level. 404cc1dc7a3Sopenharmony_ci */ 405cc1dc7a3Sopenharmony_cistatic void compute_color_error_for_every_integer_count_and_quant_level( 406cc1dc7a3Sopenharmony_ci bool encode_hdr_rgb, 407cc1dc7a3Sopenharmony_ci bool encode_hdr_alpha, 408cc1dc7a3Sopenharmony_ci int partition_index, 409cc1dc7a3Sopenharmony_ci const partition_info& pi, 410cc1dc7a3Sopenharmony_ci const encoding_choice_errors& eci, 411cc1dc7a3Sopenharmony_ci const endpoints& ep, 412cc1dc7a3Sopenharmony_ci vfloat4 error_weight, 413cc1dc7a3Sopenharmony_ci float best_error[21][4], 414cc1dc7a3Sopenharmony_ci uint8_t format_of_choice[21][4] 415cc1dc7a3Sopenharmony_ci) { 416cc1dc7a3Sopenharmony_ci int partition_size = pi.partition_texel_count[partition_index]; 417cc1dc7a3Sopenharmony_ci 418cc1dc7a3Sopenharmony_ci static const float baseline_quant_error[21 - QUANT_6] { 419cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (5 * 5), 420cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (7 * 7), 421cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (9 * 9), 422cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (11 * 11), 423cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (15 * 15), 424cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (19 * 19), 425cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (23 * 23), 426cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (31 * 31), 427cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (39 * 39), 428cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (47 * 47), 429cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (63 * 63), 430cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (79 * 79), 431cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (95 * 95), 432cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (127 * 127), 433cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (159 * 159), 434cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (191 * 191), 435cc1dc7a3Sopenharmony_ci (65536.0f * 65536.0f / 18.0f) / (255 * 255) 436cc1dc7a3Sopenharmony_ci }; 437cc1dc7a3Sopenharmony_ci 438cc1dc7a3Sopenharmony_ci vfloat4 ep0 = ep.endpt0[partition_index]; 439cc1dc7a3Sopenharmony_ci vfloat4 ep1 = ep.endpt1[partition_index]; 440cc1dc7a3Sopenharmony_ci 441cc1dc7a3Sopenharmony_ci float ep1_min = hmin_rgb_s(ep1); 442cc1dc7a3Sopenharmony_ci ep1_min = astc::max(ep1_min, 0.0f); 443cc1dc7a3Sopenharmony_ci 444cc1dc7a3Sopenharmony_ci float error_weight_rgbsum = hadd_rgb_s(error_weight); 445cc1dc7a3Sopenharmony_ci 446cc1dc7a3Sopenharmony_ci float range_upper_limit_rgb = encode_hdr_rgb ? 61440.0f : 65535.0f; 447cc1dc7a3Sopenharmony_ci float range_upper_limit_alpha = encode_hdr_alpha ? 61440.0f : 65535.0f; 448cc1dc7a3Sopenharmony_ci 449cc1dc7a3Sopenharmony_ci // It is possible to get endpoint colors significantly outside [0,upper-limit] even if the 450cc1dc7a3Sopenharmony_ci // input data are safely contained in [0,upper-limit]; we need to add an error term for this 451cc1dc7a3Sopenharmony_ci vfloat4 offset(range_upper_limit_rgb, range_upper_limit_rgb, range_upper_limit_rgb, range_upper_limit_alpha); 452cc1dc7a3Sopenharmony_ci vfloat4 ep0_range_error_high = max(ep0 - offset, 0.0f); 453cc1dc7a3Sopenharmony_ci vfloat4 ep1_range_error_high = max(ep1 - offset, 0.0f); 454cc1dc7a3Sopenharmony_ci 455cc1dc7a3Sopenharmony_ci vfloat4 ep0_range_error_low = min(ep0, 0.0f); 456cc1dc7a3Sopenharmony_ci vfloat4 ep1_range_error_low = min(ep1, 0.0f); 457cc1dc7a3Sopenharmony_ci 458cc1dc7a3Sopenharmony_ci vfloat4 sum_range_error = 459cc1dc7a3Sopenharmony_ci (ep0_range_error_low * ep0_range_error_low) + 460cc1dc7a3Sopenharmony_ci (ep1_range_error_low * ep1_range_error_low) + 461cc1dc7a3Sopenharmony_ci (ep0_range_error_high * ep0_range_error_high) + 462cc1dc7a3Sopenharmony_ci (ep1_range_error_high * ep1_range_error_high); 463cc1dc7a3Sopenharmony_ci 464cc1dc7a3Sopenharmony_ci float rgb_range_error = dot3_s(sum_range_error, error_weight) 465cc1dc7a3Sopenharmony_ci * 0.5f * static_cast<float>(partition_size); 466cc1dc7a3Sopenharmony_ci float alpha_range_error = sum_range_error.lane<3>() * error_weight.lane<3>() 467cc1dc7a3Sopenharmony_ci * 0.5f * static_cast<float>(partition_size); 468cc1dc7a3Sopenharmony_ci 469cc1dc7a3Sopenharmony_ci if (encode_hdr_rgb) 470cc1dc7a3Sopenharmony_ci { 471cc1dc7a3Sopenharmony_ci 472cc1dc7a3Sopenharmony_ci // Collect some statistics 473cc1dc7a3Sopenharmony_ci float af, cf; 474cc1dc7a3Sopenharmony_ci if (ep1.lane<0>() > ep1.lane<1>() && ep1.lane<0>() > ep1.lane<2>()) 475cc1dc7a3Sopenharmony_ci { 476cc1dc7a3Sopenharmony_ci af = ep1.lane<0>(); 477cc1dc7a3Sopenharmony_ci cf = ep1.lane<0>() - ep0.lane<0>(); 478cc1dc7a3Sopenharmony_ci } 479cc1dc7a3Sopenharmony_ci else if (ep1.lane<1>() > ep1.lane<2>()) 480cc1dc7a3Sopenharmony_ci { 481cc1dc7a3Sopenharmony_ci af = ep1.lane<1>(); 482cc1dc7a3Sopenharmony_ci cf = ep1.lane<1>() - ep0.lane<1>(); 483cc1dc7a3Sopenharmony_ci } 484cc1dc7a3Sopenharmony_ci else 485cc1dc7a3Sopenharmony_ci { 486cc1dc7a3Sopenharmony_ci af = ep1.lane<2>(); 487cc1dc7a3Sopenharmony_ci cf = ep1.lane<2>() - ep0.lane<2>(); 488cc1dc7a3Sopenharmony_ci } 489cc1dc7a3Sopenharmony_ci 490cc1dc7a3Sopenharmony_ci // Estimate of color-component spread in high endpoint color 491cc1dc7a3Sopenharmony_ci float bf = af - ep1_min; 492cc1dc7a3Sopenharmony_ci vfloat4 prd = (ep1 - vfloat4(cf)).swz<0, 1, 2>(); 493cc1dc7a3Sopenharmony_ci vfloat4 pdif = prd - ep0.swz<0, 1, 2>(); 494cc1dc7a3Sopenharmony_ci // Estimate of color-component spread in low endpoint color 495cc1dc7a3Sopenharmony_ci float df = hmax_s(abs(pdif)); 496cc1dc7a3Sopenharmony_ci 497cc1dc7a3Sopenharmony_ci int b = static_cast<int>(bf); 498cc1dc7a3Sopenharmony_ci int c = static_cast<int>(cf); 499cc1dc7a3Sopenharmony_ci int d = static_cast<int>(df); 500cc1dc7a3Sopenharmony_ci 501cc1dc7a3Sopenharmony_ci // Determine which one of the 6 submodes is likely to be used in case of an RGBO-mode 502cc1dc7a3Sopenharmony_ci int rgbo_mode = 5; // 7 bits per component 503cc1dc7a3Sopenharmony_ci // mode 4: 8 7 6 504cc1dc7a3Sopenharmony_ci if (b < 32768 && c < 16384) 505cc1dc7a3Sopenharmony_ci { 506cc1dc7a3Sopenharmony_ci rgbo_mode = 4; 507cc1dc7a3Sopenharmony_ci } 508cc1dc7a3Sopenharmony_ci 509cc1dc7a3Sopenharmony_ci // mode 3: 9 6 7 510cc1dc7a3Sopenharmony_ci if (b < 8192 && c < 16384) 511cc1dc7a3Sopenharmony_ci { 512cc1dc7a3Sopenharmony_ci rgbo_mode = 3; 513cc1dc7a3Sopenharmony_ci } 514cc1dc7a3Sopenharmony_ci 515cc1dc7a3Sopenharmony_ci // mode 2: 10 5 8 516cc1dc7a3Sopenharmony_ci if (b < 2048 && c < 16384) 517cc1dc7a3Sopenharmony_ci { 518cc1dc7a3Sopenharmony_ci rgbo_mode = 2; 519cc1dc7a3Sopenharmony_ci } 520cc1dc7a3Sopenharmony_ci 521cc1dc7a3Sopenharmony_ci // mode 1: 11 6 5 522cc1dc7a3Sopenharmony_ci if (b < 2048 && c < 1024) 523cc1dc7a3Sopenharmony_ci { 524cc1dc7a3Sopenharmony_ci rgbo_mode = 1; 525cc1dc7a3Sopenharmony_ci } 526cc1dc7a3Sopenharmony_ci 527cc1dc7a3Sopenharmony_ci // mode 0: 11 5 7 528cc1dc7a3Sopenharmony_ci if (b < 1024 && c < 4096) 529cc1dc7a3Sopenharmony_ci { 530cc1dc7a3Sopenharmony_ci rgbo_mode = 0; 531cc1dc7a3Sopenharmony_ci } 532cc1dc7a3Sopenharmony_ci 533cc1dc7a3Sopenharmony_ci // Determine which one of the 9 submodes is likely to be used in case of an RGB-mode. 534cc1dc7a3Sopenharmony_ci int rgb_mode = 8; // 8 bits per component, except 7 bits for blue 535cc1dc7a3Sopenharmony_ci 536cc1dc7a3Sopenharmony_ci // mode 0: 9 7 6 7 537cc1dc7a3Sopenharmony_ci if (b < 16384 && c < 8192 && d < 8192) 538cc1dc7a3Sopenharmony_ci { 539cc1dc7a3Sopenharmony_ci rgb_mode = 0; 540cc1dc7a3Sopenharmony_ci } 541cc1dc7a3Sopenharmony_ci 542cc1dc7a3Sopenharmony_ci // mode 1: 9 8 6 6 543cc1dc7a3Sopenharmony_ci if (b < 32768 && c < 8192 && d < 4096) 544cc1dc7a3Sopenharmony_ci { 545cc1dc7a3Sopenharmony_ci rgb_mode = 1; 546cc1dc7a3Sopenharmony_ci } 547cc1dc7a3Sopenharmony_ci 548cc1dc7a3Sopenharmony_ci // mode 2: 10 6 7 7 549cc1dc7a3Sopenharmony_ci if (b < 4096 && c < 8192 && d < 4096) 550cc1dc7a3Sopenharmony_ci { 551cc1dc7a3Sopenharmony_ci rgb_mode = 2; 552cc1dc7a3Sopenharmony_ci } 553cc1dc7a3Sopenharmony_ci 554cc1dc7a3Sopenharmony_ci // mode 3: 10 7 7 6 555cc1dc7a3Sopenharmony_ci if (b < 8192 && c < 8192 && d < 2048) 556cc1dc7a3Sopenharmony_ci { 557cc1dc7a3Sopenharmony_ci rgb_mode = 3; 558cc1dc7a3Sopenharmony_ci } 559cc1dc7a3Sopenharmony_ci 560cc1dc7a3Sopenharmony_ci // mode 4: 11 8 6 5 561cc1dc7a3Sopenharmony_ci if (b < 8192 && c < 2048 && d < 512) 562cc1dc7a3Sopenharmony_ci { 563cc1dc7a3Sopenharmony_ci rgb_mode = 4; 564cc1dc7a3Sopenharmony_ci } 565cc1dc7a3Sopenharmony_ci 566cc1dc7a3Sopenharmony_ci // mode 5: 11 6 8 6 567cc1dc7a3Sopenharmony_ci if (b < 2048 && c < 8192 && d < 1024) 568cc1dc7a3Sopenharmony_ci { 569cc1dc7a3Sopenharmony_ci rgb_mode = 5; 570cc1dc7a3Sopenharmony_ci } 571cc1dc7a3Sopenharmony_ci 572cc1dc7a3Sopenharmony_ci // mode 6: 12 7 7 5 573cc1dc7a3Sopenharmony_ci if (b < 2048 && c < 2048 && d < 256) 574cc1dc7a3Sopenharmony_ci { 575cc1dc7a3Sopenharmony_ci rgb_mode = 6; 576cc1dc7a3Sopenharmony_ci } 577cc1dc7a3Sopenharmony_ci 578cc1dc7a3Sopenharmony_ci // mode 7: 12 6 7 6 579cc1dc7a3Sopenharmony_ci if (b < 1024 && c < 2048 && d < 512) 580cc1dc7a3Sopenharmony_ci { 581cc1dc7a3Sopenharmony_ci rgb_mode = 7; 582cc1dc7a3Sopenharmony_ci } 583cc1dc7a3Sopenharmony_ci 584cc1dc7a3Sopenharmony_ci static const float rgbo_error_scales[6] { 4.0f, 4.0f, 16.0f, 64.0f, 256.0f, 1024.0f }; 585cc1dc7a3Sopenharmony_ci static const float rgb_error_scales[9] { 64.0f, 64.0f, 16.0f, 16.0f, 4.0f, 4.0f, 1.0f, 1.0f, 384.0f }; 586cc1dc7a3Sopenharmony_ci 587cc1dc7a3Sopenharmony_ci float mode7mult = rgbo_error_scales[rgbo_mode] * 0.0015f; // Empirically determined .... 588cc1dc7a3Sopenharmony_ci float mode11mult = rgb_error_scales[rgb_mode] * 0.010f; // Empirically determined .... 589cc1dc7a3Sopenharmony_ci 590cc1dc7a3Sopenharmony_ci 591cc1dc7a3Sopenharmony_ci float lum_high = hadd_rgb_s(ep1) * (1.0f / 3.0f); 592cc1dc7a3Sopenharmony_ci float lum_low = hadd_rgb_s(ep0) * (1.0f / 3.0f); 593cc1dc7a3Sopenharmony_ci float lumdif = lum_high - lum_low; 594cc1dc7a3Sopenharmony_ci float mode23mult = lumdif < 960 ? 4.0f : lumdif < 3968 ? 16.0f : 128.0f; 595cc1dc7a3Sopenharmony_ci 596cc1dc7a3Sopenharmony_ci mode23mult *= 0.0005f; // Empirically determined .... 597cc1dc7a3Sopenharmony_ci 598cc1dc7a3Sopenharmony_ci // Pick among the available HDR endpoint modes 599cc1dc7a3Sopenharmony_ci for (int i = QUANT_2; i < QUANT_16; i++) 600cc1dc7a3Sopenharmony_ci { 601cc1dc7a3Sopenharmony_ci best_error[i][3] = ERROR_CALC_DEFAULT; 602cc1dc7a3Sopenharmony_ci best_error[i][2] = ERROR_CALC_DEFAULT; 603cc1dc7a3Sopenharmony_ci best_error[i][1] = ERROR_CALC_DEFAULT; 604cc1dc7a3Sopenharmony_ci best_error[i][0] = ERROR_CALC_DEFAULT; 605cc1dc7a3Sopenharmony_ci 606cc1dc7a3Sopenharmony_ci format_of_choice[i][3] = static_cast<uint8_t>(encode_hdr_alpha ? FMT_HDR_RGBA : FMT_HDR_RGB_LDR_ALPHA); 607cc1dc7a3Sopenharmony_ci format_of_choice[i][2] = FMT_HDR_RGB; 608cc1dc7a3Sopenharmony_ci format_of_choice[i][1] = FMT_HDR_RGB_SCALE; 609cc1dc7a3Sopenharmony_ci format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE; 610cc1dc7a3Sopenharmony_ci } 611cc1dc7a3Sopenharmony_ci 612cc1dc7a3Sopenharmony_ci for (int i = QUANT_16; i <= QUANT_256; i++) 613cc1dc7a3Sopenharmony_ci { 614cc1dc7a3Sopenharmony_ci // The base_quant_error should depend on the scale-factor that would be used during 615cc1dc7a3Sopenharmony_ci // actual encode of the color value 616cc1dc7a3Sopenharmony_ci 617cc1dc7a3Sopenharmony_ci float base_quant_error = baseline_quant_error[i - QUANT_6] * static_cast<float>(partition_size); 618cc1dc7a3Sopenharmony_ci float rgb_quantization_error = error_weight_rgbsum * base_quant_error * 2.0f; 619cc1dc7a3Sopenharmony_ci float alpha_quantization_error = error_weight.lane<3>() * base_quant_error * 2.0f; 620cc1dc7a3Sopenharmony_ci float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error; 621cc1dc7a3Sopenharmony_ci 622cc1dc7a3Sopenharmony_ci // For 8 integers, we have two encodings: one with HDR A and another one with LDR A 623cc1dc7a3Sopenharmony_ci 624cc1dc7a3Sopenharmony_ci float full_hdr_rgba_error = rgba_quantization_error + rgb_range_error + alpha_range_error; 625cc1dc7a3Sopenharmony_ci best_error[i][3] = full_hdr_rgba_error; 626cc1dc7a3Sopenharmony_ci format_of_choice[i][3] = static_cast<uint8_t>(encode_hdr_alpha ? FMT_HDR_RGBA : FMT_HDR_RGB_LDR_ALPHA); 627cc1dc7a3Sopenharmony_ci 628cc1dc7a3Sopenharmony_ci // For 6 integers, we have one HDR-RGB encoding 629cc1dc7a3Sopenharmony_ci float full_hdr_rgb_error = (rgb_quantization_error * mode11mult) + rgb_range_error + eci.alpha_drop_error; 630cc1dc7a3Sopenharmony_ci best_error[i][2] = full_hdr_rgb_error; 631cc1dc7a3Sopenharmony_ci format_of_choice[i][2] = FMT_HDR_RGB; 632cc1dc7a3Sopenharmony_ci 633cc1dc7a3Sopenharmony_ci // For 4 integers, we have one HDR-RGB-Scale encoding 634cc1dc7a3Sopenharmony_ci float hdr_rgb_scale_error = (rgb_quantization_error * mode7mult) + rgb_range_error + eci.alpha_drop_error + eci.rgb_luma_error; 635cc1dc7a3Sopenharmony_ci 636cc1dc7a3Sopenharmony_ci best_error[i][1] = hdr_rgb_scale_error; 637cc1dc7a3Sopenharmony_ci format_of_choice[i][1] = FMT_HDR_RGB_SCALE; 638cc1dc7a3Sopenharmony_ci 639cc1dc7a3Sopenharmony_ci // For 2 integers, we assume luminance-with-large-range 640cc1dc7a3Sopenharmony_ci float hdr_luminance_error = (rgb_quantization_error * mode23mult) + rgb_range_error + eci.alpha_drop_error + eci.luminance_error; 641cc1dc7a3Sopenharmony_ci best_error[i][0] = hdr_luminance_error; 642cc1dc7a3Sopenharmony_ci format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE; 643cc1dc7a3Sopenharmony_ci } 644cc1dc7a3Sopenharmony_ci } 645cc1dc7a3Sopenharmony_ci else 646cc1dc7a3Sopenharmony_ci { 647cc1dc7a3Sopenharmony_ci for (int i = QUANT_2; i < QUANT_6; i++) 648cc1dc7a3Sopenharmony_ci { 649cc1dc7a3Sopenharmony_ci best_error[i][3] = ERROR_CALC_DEFAULT; 650cc1dc7a3Sopenharmony_ci best_error[i][2] = ERROR_CALC_DEFAULT; 651cc1dc7a3Sopenharmony_ci best_error[i][1] = ERROR_CALC_DEFAULT; 652cc1dc7a3Sopenharmony_ci best_error[i][0] = ERROR_CALC_DEFAULT; 653cc1dc7a3Sopenharmony_ci 654cc1dc7a3Sopenharmony_ci format_of_choice[i][3] = FMT_RGBA; 655cc1dc7a3Sopenharmony_ci format_of_choice[i][2] = FMT_RGB; 656cc1dc7a3Sopenharmony_ci format_of_choice[i][1] = FMT_RGB_SCALE; 657cc1dc7a3Sopenharmony_ci format_of_choice[i][0] = FMT_LUMINANCE; 658cc1dc7a3Sopenharmony_ci } 659cc1dc7a3Sopenharmony_ci 660cc1dc7a3Sopenharmony_ci float base_quant_error_rgb = error_weight_rgbsum * static_cast<float>(partition_size); 661cc1dc7a3Sopenharmony_ci float base_quant_error_a = error_weight.lane<3>() * static_cast<float>(partition_size); 662cc1dc7a3Sopenharmony_ci float base_quant_error_rgba = base_quant_error_rgb + base_quant_error_a; 663cc1dc7a3Sopenharmony_ci 664cc1dc7a3Sopenharmony_ci float error_scale_bc_rgba = eci.can_blue_contract ? 0.625f : 1.0f; 665cc1dc7a3Sopenharmony_ci float error_scale_oe_rgba = eci.can_offset_encode ? 0.5f : 1.0f; 666cc1dc7a3Sopenharmony_ci 667cc1dc7a3Sopenharmony_ci float error_scale_bc_rgb = eci.can_blue_contract ? 0.5f : 1.0f; 668cc1dc7a3Sopenharmony_ci float error_scale_oe_rgb = eci.can_offset_encode ? 0.25f : 1.0f; 669cc1dc7a3Sopenharmony_ci 670cc1dc7a3Sopenharmony_ci // Pick among the available LDR endpoint modes 671cc1dc7a3Sopenharmony_ci for (int i = QUANT_6; i <= QUANT_256; i++) 672cc1dc7a3Sopenharmony_ci { 673cc1dc7a3Sopenharmony_ci // Offset encoding not possible at higher quant levels 674cc1dc7a3Sopenharmony_ci if (i >= QUANT_192) 675cc1dc7a3Sopenharmony_ci { 676cc1dc7a3Sopenharmony_ci error_scale_oe_rgba = 1.0f; 677cc1dc7a3Sopenharmony_ci error_scale_oe_rgb = 1.0f; 678cc1dc7a3Sopenharmony_ci } 679cc1dc7a3Sopenharmony_ci 680cc1dc7a3Sopenharmony_ci float base_quant_error = baseline_quant_error[i - QUANT_6]; 681cc1dc7a3Sopenharmony_ci float quant_error_rgb = base_quant_error_rgb * base_quant_error; 682cc1dc7a3Sopenharmony_ci float quant_error_rgba = base_quant_error_rgba * base_quant_error; 683cc1dc7a3Sopenharmony_ci 684cc1dc7a3Sopenharmony_ci // 8 integers can encode as RGBA+RGBA 685cc1dc7a3Sopenharmony_ci float full_ldr_rgba_error = quant_error_rgba 686cc1dc7a3Sopenharmony_ci * error_scale_bc_rgba 687cc1dc7a3Sopenharmony_ci * error_scale_oe_rgba 688cc1dc7a3Sopenharmony_ci + rgb_range_error 689cc1dc7a3Sopenharmony_ci + alpha_range_error; 690cc1dc7a3Sopenharmony_ci 691cc1dc7a3Sopenharmony_ci best_error[i][3] = full_ldr_rgba_error; 692cc1dc7a3Sopenharmony_ci format_of_choice[i][3] = FMT_RGBA; 693cc1dc7a3Sopenharmony_ci 694cc1dc7a3Sopenharmony_ci // 6 integers can encode as RGB+RGB or RGBS+AA 695cc1dc7a3Sopenharmony_ci float full_ldr_rgb_error = quant_error_rgb 696cc1dc7a3Sopenharmony_ci * error_scale_bc_rgb 697cc1dc7a3Sopenharmony_ci * error_scale_oe_rgb 698cc1dc7a3Sopenharmony_ci + rgb_range_error 699cc1dc7a3Sopenharmony_ci + eci.alpha_drop_error; 700cc1dc7a3Sopenharmony_ci 701cc1dc7a3Sopenharmony_ci float rgbs_alpha_error = quant_error_rgba 702cc1dc7a3Sopenharmony_ci + eci.rgb_scale_error 703cc1dc7a3Sopenharmony_ci + rgb_range_error 704cc1dc7a3Sopenharmony_ci + alpha_range_error; 705cc1dc7a3Sopenharmony_ci 706cc1dc7a3Sopenharmony_ci if (rgbs_alpha_error < full_ldr_rgb_error) 707cc1dc7a3Sopenharmony_ci { 708cc1dc7a3Sopenharmony_ci best_error[i][2] = rgbs_alpha_error; 709cc1dc7a3Sopenharmony_ci format_of_choice[i][2] = FMT_RGB_SCALE_ALPHA; 710cc1dc7a3Sopenharmony_ci } 711cc1dc7a3Sopenharmony_ci else 712cc1dc7a3Sopenharmony_ci { 713cc1dc7a3Sopenharmony_ci best_error[i][2] = full_ldr_rgb_error; 714cc1dc7a3Sopenharmony_ci format_of_choice[i][2] = FMT_RGB; 715cc1dc7a3Sopenharmony_ci } 716cc1dc7a3Sopenharmony_ci 717cc1dc7a3Sopenharmony_ci // 4 integers can encode as RGBS or LA+LA 718cc1dc7a3Sopenharmony_ci float ldr_rgbs_error = quant_error_rgb 719cc1dc7a3Sopenharmony_ci + rgb_range_error 720cc1dc7a3Sopenharmony_ci + eci.alpha_drop_error 721cc1dc7a3Sopenharmony_ci + eci.rgb_scale_error; 722cc1dc7a3Sopenharmony_ci 723cc1dc7a3Sopenharmony_ci float lum_alpha_error = quant_error_rgba 724cc1dc7a3Sopenharmony_ci + rgb_range_error 725cc1dc7a3Sopenharmony_ci + alpha_range_error 726cc1dc7a3Sopenharmony_ci + eci.luminance_error; 727cc1dc7a3Sopenharmony_ci 728cc1dc7a3Sopenharmony_ci if (ldr_rgbs_error < lum_alpha_error) 729cc1dc7a3Sopenharmony_ci { 730cc1dc7a3Sopenharmony_ci best_error[i][1] = ldr_rgbs_error; 731cc1dc7a3Sopenharmony_ci format_of_choice[i][1] = FMT_RGB_SCALE; 732cc1dc7a3Sopenharmony_ci } 733cc1dc7a3Sopenharmony_ci else 734cc1dc7a3Sopenharmony_ci { 735cc1dc7a3Sopenharmony_ci best_error[i][1] = lum_alpha_error; 736cc1dc7a3Sopenharmony_ci format_of_choice[i][1] = FMT_LUMINANCE_ALPHA; 737cc1dc7a3Sopenharmony_ci } 738cc1dc7a3Sopenharmony_ci 739cc1dc7a3Sopenharmony_ci // 2 integers can encode as L+L 740cc1dc7a3Sopenharmony_ci float luminance_error = quant_error_rgb 741cc1dc7a3Sopenharmony_ci + rgb_range_error 742cc1dc7a3Sopenharmony_ci + eci.alpha_drop_error 743cc1dc7a3Sopenharmony_ci + eci.luminance_error; 744cc1dc7a3Sopenharmony_ci 745cc1dc7a3Sopenharmony_ci best_error[i][0] = luminance_error; 746cc1dc7a3Sopenharmony_ci format_of_choice[i][0] = FMT_LUMINANCE; 747cc1dc7a3Sopenharmony_ci } 748cc1dc7a3Sopenharmony_ci } 749cc1dc7a3Sopenharmony_ci} 750cc1dc7a3Sopenharmony_ci 751cc1dc7a3Sopenharmony_ci/** 752cc1dc7a3Sopenharmony_ci * @brief For one partition compute the best format and quantization for a given bit count. 753cc1dc7a3Sopenharmony_ci * 754cc1dc7a3Sopenharmony_ci * @param best_combined_error The best error for each quant level and integer count. 755cc1dc7a3Sopenharmony_ci * @param best_combined_format The best format for each quant level and integer count. 756cc1dc7a3Sopenharmony_ci * @param bits_available The number of bits available for encoding. 757cc1dc7a3Sopenharmony_ci * @param[out] best_quant_level The output best color quant level. 758cc1dc7a3Sopenharmony_ci * @param[out] best_format The output best color format. 759cc1dc7a3Sopenharmony_ci * 760cc1dc7a3Sopenharmony_ci * @return The output error for the best pairing. 761cc1dc7a3Sopenharmony_ci */ 762cc1dc7a3Sopenharmony_cistatic float one_partition_find_best_combination_for_bitcount( 763cc1dc7a3Sopenharmony_ci QualityProfile privateProfile, 764cc1dc7a3Sopenharmony_ci const float best_combined_error[21][4], 765cc1dc7a3Sopenharmony_ci const uint8_t best_combined_format[21][4], 766cc1dc7a3Sopenharmony_ci int bits_available, 767cc1dc7a3Sopenharmony_ci uint8_t& best_quant_level, 768cc1dc7a3Sopenharmony_ci uint8_t& best_format 769cc1dc7a3Sopenharmony_ci) { 770cc1dc7a3Sopenharmony_ci int best_integer_count = 0; 771cc1dc7a3Sopenharmony_ci float best_integer_count_error = ERROR_CALC_DEFAULT; 772cc1dc7a3Sopenharmony_ci 773cc1dc7a3Sopenharmony_ci for (int integer_count = 1; integer_count <= 4; integer_count++) 774cc1dc7a3Sopenharmony_ci { 775cc1dc7a3Sopenharmony_ci if (privateProfile != HIGH_QUALITY_PROFILE) 776cc1dc7a3Sopenharmony_ci { 777cc1dc7a3Sopenharmony_ci integer_count = 4; // constant 4 bit count for HIGH_SPEED_PROFILE mode 778cc1dc7a3Sopenharmony_ci } 779cc1dc7a3Sopenharmony_ci // Compute the quantization level for a given number of integers and a given number of bits 780cc1dc7a3Sopenharmony_ci int quant_level = quant_mode_table[integer_count][bits_available]; 781cc1dc7a3Sopenharmony_ci 782cc1dc7a3Sopenharmony_ci // Don't have enough bits to represent a given endpoint format at all! 783cc1dc7a3Sopenharmony_ci if (quant_level < QUANT_6) 784cc1dc7a3Sopenharmony_ci { 785cc1dc7a3Sopenharmony_ci continue; 786cc1dc7a3Sopenharmony_ci } 787cc1dc7a3Sopenharmony_ci 788cc1dc7a3Sopenharmony_ci float integer_count_error = best_combined_error[quant_level][integer_count - 1]; 789cc1dc7a3Sopenharmony_ci if (integer_count_error < best_integer_count_error) 790cc1dc7a3Sopenharmony_ci { 791cc1dc7a3Sopenharmony_ci best_integer_count_error = integer_count_error; 792cc1dc7a3Sopenharmony_ci best_integer_count = integer_count - 1; 793cc1dc7a3Sopenharmony_ci } 794cc1dc7a3Sopenharmony_ci } 795cc1dc7a3Sopenharmony_ci 796cc1dc7a3Sopenharmony_ci int ql = quant_mode_table[best_integer_count + 1][bits_available]; 797cc1dc7a3Sopenharmony_ci 798cc1dc7a3Sopenharmony_ci best_quant_level = static_cast<uint8_t>(ql); 799cc1dc7a3Sopenharmony_ci if (privateProfile != HIGH_QUALITY_PROFILE) // keep openSource code style 800cc1dc7a3Sopenharmony_ci { 801cc1dc7a3Sopenharmony_ci best_format = FMT_RGBA; 802cc1dc7a3Sopenharmony_ci } 803cc1dc7a3Sopenharmony_ci else 804cc1dc7a3Sopenharmony_ci { 805cc1dc7a3Sopenharmony_ci best_format = FMT_LUMINANCE; 806cc1dc7a3Sopenharmony_ci 807cc1dc7a3Sopenharmony_ci if (ql >= QUANT_6) 808cc1dc7a3Sopenharmony_ci { 809cc1dc7a3Sopenharmony_ci best_format = best_combined_format[ql][best_integer_count]; 810cc1dc7a3Sopenharmony_ci } 811cc1dc7a3Sopenharmony_ci } 812cc1dc7a3Sopenharmony_ci 813cc1dc7a3Sopenharmony_ci return best_integer_count_error; 814cc1dc7a3Sopenharmony_ci} 815cc1dc7a3Sopenharmony_ci 816cc1dc7a3Sopenharmony_ci/** 817cc1dc7a3Sopenharmony_ci * @brief For 2 partitions compute the best format combinations for every pair of quant mode and integer count. 818cc1dc7a3Sopenharmony_ci * 819cc1dc7a3Sopenharmony_ci * @param best_error The best error for a single endpoint quant level and integer count. 820cc1dc7a3Sopenharmony_ci * @param best_format The best format for a single endpoint quant level and integer count. 821cc1dc7a3Sopenharmony_ci * @param[out] best_combined_error The best combined error pairings for the 2 partitions. 822cc1dc7a3Sopenharmony_ci * @param[out] best_combined_format The best combined format pairings for the 2 partitions. 823cc1dc7a3Sopenharmony_ci */ 824cc1dc7a3Sopenharmony_cistatic void two_partitions_find_best_combination_for_every_quantization_and_integer_count( 825cc1dc7a3Sopenharmony_ci const float best_error[2][21][4], // indexed by (partition, quant-level, integer-pair-count-minus-1) 826cc1dc7a3Sopenharmony_ci const uint8_t best_format[2][21][4], 827cc1dc7a3Sopenharmony_ci float best_combined_error[21][7], // indexed by (quant-level, integer-pair-count-minus-2) 828cc1dc7a3Sopenharmony_ci uint8_t best_combined_format[21][7][2] 829cc1dc7a3Sopenharmony_ci) { 830cc1dc7a3Sopenharmony_ci for (int i = QUANT_2; i <= QUANT_256; i++) 831cc1dc7a3Sopenharmony_ci { 832cc1dc7a3Sopenharmony_ci for (int j = 0; j < 7; j++) 833cc1dc7a3Sopenharmony_ci { 834cc1dc7a3Sopenharmony_ci best_combined_error[i][j] = ERROR_CALC_DEFAULT; 835cc1dc7a3Sopenharmony_ci } 836cc1dc7a3Sopenharmony_ci } 837cc1dc7a3Sopenharmony_ci 838cc1dc7a3Sopenharmony_ci for (int quant = QUANT_6; quant <= QUANT_256; quant++) 839cc1dc7a3Sopenharmony_ci { 840cc1dc7a3Sopenharmony_ci for (int i = 0; i < 4; i++) // integer-count for first endpoint-pair 841cc1dc7a3Sopenharmony_ci { 842cc1dc7a3Sopenharmony_ci for (int j = 0; j < 4; j++) // integer-count for second endpoint-pair 843cc1dc7a3Sopenharmony_ci { 844cc1dc7a3Sopenharmony_ci int low2 = astc::min(i, j); 845cc1dc7a3Sopenharmony_ci int high2 = astc::max(i, j); 846cc1dc7a3Sopenharmony_ci if ((high2 - low2) > 1) 847cc1dc7a3Sopenharmony_ci { 848cc1dc7a3Sopenharmony_ci continue; 849cc1dc7a3Sopenharmony_ci } 850cc1dc7a3Sopenharmony_ci 851cc1dc7a3Sopenharmony_ci int intcnt = i + j; 852cc1dc7a3Sopenharmony_ci float errorterm = astc::min(best_error[0][quant][i] + best_error[1][quant][j], 1e10f); 853cc1dc7a3Sopenharmony_ci if (errorterm <= best_combined_error[quant][intcnt]) 854cc1dc7a3Sopenharmony_ci { 855cc1dc7a3Sopenharmony_ci best_combined_error[quant][intcnt] = errorterm; 856cc1dc7a3Sopenharmony_ci best_combined_format[quant][intcnt][0] = best_format[0][quant][i]; 857cc1dc7a3Sopenharmony_ci best_combined_format[quant][intcnt][1] = best_format[1][quant][j]; 858cc1dc7a3Sopenharmony_ci } 859cc1dc7a3Sopenharmony_ci } 860cc1dc7a3Sopenharmony_ci } 861cc1dc7a3Sopenharmony_ci } 862cc1dc7a3Sopenharmony_ci} 863cc1dc7a3Sopenharmony_ci 864cc1dc7a3Sopenharmony_ci/** 865cc1dc7a3Sopenharmony_ci * @brief For 2 partitions compute the best format and quantization for a given bit count. 866cc1dc7a3Sopenharmony_ci * 867cc1dc7a3Sopenharmony_ci * @param best_combined_error The best error for each quant level and integer count. 868cc1dc7a3Sopenharmony_ci * @param best_combined_format The best format for each quant level and integer count. 869cc1dc7a3Sopenharmony_ci * @param bits_available The number of bits available for encoding. 870cc1dc7a3Sopenharmony_ci * @param[out] best_quant_level The output best color quant level. 871cc1dc7a3Sopenharmony_ci * @param[out] best_quant_level_mod The output best color quant level assuming two more bits are available. 872cc1dc7a3Sopenharmony_ci * @param[out] best_formats The output best color formats. 873cc1dc7a3Sopenharmony_ci * 874cc1dc7a3Sopenharmony_ci * @return The output error for the best pairing. 875cc1dc7a3Sopenharmony_ci */ 876cc1dc7a3Sopenharmony_cistatic float two_partitions_find_best_combination_for_bitcount( 877cc1dc7a3Sopenharmony_ci unsigned int privateProfile, 878cc1dc7a3Sopenharmony_ci float best_combined_error[21][7], 879cc1dc7a3Sopenharmony_ci uint8_t best_combined_format[21][7][2], 880cc1dc7a3Sopenharmony_ci int bits_available, 881cc1dc7a3Sopenharmony_ci uint8_t& best_quant_level, 882cc1dc7a3Sopenharmony_ci uint8_t& best_quant_level_mod, 883cc1dc7a3Sopenharmony_ci uint8_t* best_formats 884cc1dc7a3Sopenharmony_ci) { 885cc1dc7a3Sopenharmony_ci int best_integer_count = 0; 886cc1dc7a3Sopenharmony_ci float best_integer_count_error = ERROR_CALC_DEFAULT; 887cc1dc7a3Sopenharmony_ci int integer_count = 2; 888cc1dc7a3Sopenharmony_ci if (privateProfile != HIGH_QUALITY_PROFILE) 889cc1dc7a3Sopenharmony_ci { 890cc1dc7a3Sopenharmony_ci integer_count = 8; // constant 8 bit count 891cc1dc7a3Sopenharmony_ci } 892cc1dc7a3Sopenharmony_ci 893cc1dc7a3Sopenharmony_ci for (; integer_count <= 8; integer_count++) 894cc1dc7a3Sopenharmony_ci { 895cc1dc7a3Sopenharmony_ci // Compute the quantization level for a given number of integers and a given number of bits 896cc1dc7a3Sopenharmony_ci int quant_level = quant_mode_table[integer_count][bits_available]; 897cc1dc7a3Sopenharmony_ci 898cc1dc7a3Sopenharmony_ci // Don't have enough bits to represent a given endpoint format at all! 899cc1dc7a3Sopenharmony_ci if (quant_level < QUANT_6) 900cc1dc7a3Sopenharmony_ci { 901cc1dc7a3Sopenharmony_ci break; 902cc1dc7a3Sopenharmony_ci } 903cc1dc7a3Sopenharmony_ci 904cc1dc7a3Sopenharmony_ci float integer_count_error = best_combined_error[quant_level][integer_count - 2]; 905cc1dc7a3Sopenharmony_ci if (integer_count_error < best_integer_count_error) 906cc1dc7a3Sopenharmony_ci { 907cc1dc7a3Sopenharmony_ci best_integer_count_error = integer_count_error; 908cc1dc7a3Sopenharmony_ci best_integer_count = integer_count; 909cc1dc7a3Sopenharmony_ci } 910cc1dc7a3Sopenharmony_ci } 911cc1dc7a3Sopenharmony_ci 912cc1dc7a3Sopenharmony_ci int ql = quant_mode_table[best_integer_count][bits_available]; 913cc1dc7a3Sopenharmony_ci int ql_mod = quant_mode_table[best_integer_count][bits_available + 2]; 914cc1dc7a3Sopenharmony_ci 915cc1dc7a3Sopenharmony_ci best_quant_level = static_cast<uint8_t>(ql); 916cc1dc7a3Sopenharmony_ci best_quant_level_mod = static_cast<uint8_t>(ql_mod); 917cc1dc7a3Sopenharmony_ci 918cc1dc7a3Sopenharmony_ci if (ql >= QUANT_6) 919cc1dc7a3Sopenharmony_ci { 920cc1dc7a3Sopenharmony_ci for (int i = 0; i < 2; i++) 921cc1dc7a3Sopenharmony_ci { 922cc1dc7a3Sopenharmony_ci best_formats[i] = best_combined_format[ql][best_integer_count - 2][i]; 923cc1dc7a3Sopenharmony_ci } 924cc1dc7a3Sopenharmony_ci } 925cc1dc7a3Sopenharmony_ci else 926cc1dc7a3Sopenharmony_ci { 927cc1dc7a3Sopenharmony_ci for (int i = 0; i < 2; i++) 928cc1dc7a3Sopenharmony_ci { 929cc1dc7a3Sopenharmony_ci best_formats[i] = FMT_LUMINANCE; 930cc1dc7a3Sopenharmony_ci } 931cc1dc7a3Sopenharmony_ci } 932cc1dc7a3Sopenharmony_ci 933cc1dc7a3Sopenharmony_ci return best_integer_count_error; 934cc1dc7a3Sopenharmony_ci} 935cc1dc7a3Sopenharmony_ci 936cc1dc7a3Sopenharmony_ci/** 937cc1dc7a3Sopenharmony_ci * @brief For 3 partitions compute the best format combinations for every pair of quant mode and integer count. 938cc1dc7a3Sopenharmony_ci * 939cc1dc7a3Sopenharmony_ci * @param best_error The best error for a single endpoint quant level and integer count. 940cc1dc7a3Sopenharmony_ci * @param best_format The best format for a single endpoint quant level and integer count. 941cc1dc7a3Sopenharmony_ci * @param[out] best_combined_error The best combined error pairings for the 3 partitions. 942cc1dc7a3Sopenharmony_ci * @param[out] best_combined_format The best combined format pairings for the 3 partitions. 943cc1dc7a3Sopenharmony_ci */ 944cc1dc7a3Sopenharmony_cistatic void three_partitions_find_best_combination_for_every_quantization_and_integer_count( 945cc1dc7a3Sopenharmony_ci const float best_error[3][21][4], // indexed by (partition, quant-level, integer-count) 946cc1dc7a3Sopenharmony_ci const uint8_t best_format[3][21][4], 947cc1dc7a3Sopenharmony_ci float best_combined_error[21][10], 948cc1dc7a3Sopenharmony_ci uint8_t best_combined_format[21][10][3] 949cc1dc7a3Sopenharmony_ci) { 950cc1dc7a3Sopenharmony_ci for (int i = QUANT_2; i <= QUANT_256; i++) 951cc1dc7a3Sopenharmony_ci { 952cc1dc7a3Sopenharmony_ci for (int j = 0; j < 10; j++) 953cc1dc7a3Sopenharmony_ci { 954cc1dc7a3Sopenharmony_ci best_combined_error[i][j] = ERROR_CALC_DEFAULT; 955cc1dc7a3Sopenharmony_ci } 956cc1dc7a3Sopenharmony_ci } 957cc1dc7a3Sopenharmony_ci 958cc1dc7a3Sopenharmony_ci for (int quant = QUANT_6; quant <= QUANT_256; quant++) 959cc1dc7a3Sopenharmony_ci { 960cc1dc7a3Sopenharmony_ci for (int i = 0; i < 4; i++) // integer-count for first endpoint-pair 961cc1dc7a3Sopenharmony_ci { 962cc1dc7a3Sopenharmony_ci for (int j = 0; j < 4; j++) // integer-count for second endpoint-pair 963cc1dc7a3Sopenharmony_ci { 964cc1dc7a3Sopenharmony_ci int low2 = astc::min(i, j); 965cc1dc7a3Sopenharmony_ci int high2 = astc::max(i, j); 966cc1dc7a3Sopenharmony_ci if ((high2 - low2) > 1) 967cc1dc7a3Sopenharmony_ci { 968cc1dc7a3Sopenharmony_ci continue; 969cc1dc7a3Sopenharmony_ci } 970cc1dc7a3Sopenharmony_ci 971cc1dc7a3Sopenharmony_ci for (int k = 0; k < 4; k++) // integer-count for third endpoint-pair 972cc1dc7a3Sopenharmony_ci { 973cc1dc7a3Sopenharmony_ci int low3 = astc::min(k, low2); 974cc1dc7a3Sopenharmony_ci int high3 = astc::max(k, high2); 975cc1dc7a3Sopenharmony_ci if ((high3 - low3) > 1) 976cc1dc7a3Sopenharmony_ci { 977cc1dc7a3Sopenharmony_ci continue; 978cc1dc7a3Sopenharmony_ci } 979cc1dc7a3Sopenharmony_ci 980cc1dc7a3Sopenharmony_ci int intcnt = i + j + k; 981cc1dc7a3Sopenharmony_ci float errorterm = astc::min(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k], 1e10f); 982cc1dc7a3Sopenharmony_ci if (errorterm <= best_combined_error[quant][intcnt]) 983cc1dc7a3Sopenharmony_ci { 984cc1dc7a3Sopenharmony_ci best_combined_error[quant][intcnt] = errorterm; 985cc1dc7a3Sopenharmony_ci best_combined_format[quant][intcnt][0] = best_format[0][quant][i]; 986cc1dc7a3Sopenharmony_ci best_combined_format[quant][intcnt][1] = best_format[1][quant][j]; 987cc1dc7a3Sopenharmony_ci best_combined_format[quant][intcnt][2] = best_format[2][quant][k]; 988cc1dc7a3Sopenharmony_ci } 989cc1dc7a3Sopenharmony_ci } 990cc1dc7a3Sopenharmony_ci } 991cc1dc7a3Sopenharmony_ci } 992cc1dc7a3Sopenharmony_ci } 993cc1dc7a3Sopenharmony_ci} 994cc1dc7a3Sopenharmony_ci 995cc1dc7a3Sopenharmony_ci/** 996cc1dc7a3Sopenharmony_ci * @brief For 3 partitions compute the best format and quantization for a given bit count. 997cc1dc7a3Sopenharmony_ci * 998cc1dc7a3Sopenharmony_ci * @param best_combined_error The best error for each quant level and integer count. 999cc1dc7a3Sopenharmony_ci * @param best_combined_format The best format for each quant level and integer count. 1000cc1dc7a3Sopenharmony_ci * @param bits_available The number of bits available for encoding. 1001cc1dc7a3Sopenharmony_ci * @param[out] best_quant_level The output best color quant level. 1002cc1dc7a3Sopenharmony_ci * @param[out] best_quant_level_mod The output best color quant level assuming two more bits are available. 1003cc1dc7a3Sopenharmony_ci * @param[out] best_formats The output best color formats. 1004cc1dc7a3Sopenharmony_ci * 1005cc1dc7a3Sopenharmony_ci * @return The output error for the best pairing. 1006cc1dc7a3Sopenharmony_ci */ 1007cc1dc7a3Sopenharmony_cistatic float three_partitions_find_best_combination_for_bitcount( 1008cc1dc7a3Sopenharmony_ci const float best_combined_error[21][10], 1009cc1dc7a3Sopenharmony_ci const uint8_t best_combined_format[21][10][3], 1010cc1dc7a3Sopenharmony_ci int bits_available, 1011cc1dc7a3Sopenharmony_ci uint8_t& best_quant_level, 1012cc1dc7a3Sopenharmony_ci uint8_t& best_quant_level_mod, 1013cc1dc7a3Sopenharmony_ci uint8_t* best_formats 1014cc1dc7a3Sopenharmony_ci) { 1015cc1dc7a3Sopenharmony_ci int best_integer_count = 0; 1016cc1dc7a3Sopenharmony_ci float best_integer_count_error = ERROR_CALC_DEFAULT; 1017cc1dc7a3Sopenharmony_ci 1018cc1dc7a3Sopenharmony_ci for (int integer_count = 3; integer_count <= 9; integer_count++) 1019cc1dc7a3Sopenharmony_ci { 1020cc1dc7a3Sopenharmony_ci // Compute the quantization level for a given number of integers and a given number of bits 1021cc1dc7a3Sopenharmony_ci int quant_level = quant_mode_table[integer_count][bits_available]; 1022cc1dc7a3Sopenharmony_ci 1023cc1dc7a3Sopenharmony_ci // Don't have enough bits to represent a given endpoint format at all! 1024cc1dc7a3Sopenharmony_ci if (quant_level < QUANT_6) 1025cc1dc7a3Sopenharmony_ci { 1026cc1dc7a3Sopenharmony_ci break; 1027cc1dc7a3Sopenharmony_ci } 1028cc1dc7a3Sopenharmony_ci 1029cc1dc7a3Sopenharmony_ci float integer_count_error = best_combined_error[quant_level][integer_count - 3]; 1030cc1dc7a3Sopenharmony_ci if (integer_count_error < best_integer_count_error) 1031cc1dc7a3Sopenharmony_ci { 1032cc1dc7a3Sopenharmony_ci best_integer_count_error = integer_count_error; 1033cc1dc7a3Sopenharmony_ci best_integer_count = integer_count; 1034cc1dc7a3Sopenharmony_ci } 1035cc1dc7a3Sopenharmony_ci } 1036cc1dc7a3Sopenharmony_ci 1037cc1dc7a3Sopenharmony_ci int ql = quant_mode_table[best_integer_count][bits_available]; 1038cc1dc7a3Sopenharmony_ci int ql_mod = quant_mode_table[best_integer_count][bits_available + 5]; 1039cc1dc7a3Sopenharmony_ci 1040cc1dc7a3Sopenharmony_ci best_quant_level = static_cast<uint8_t>(ql); 1041cc1dc7a3Sopenharmony_ci best_quant_level_mod = static_cast<uint8_t>(ql_mod); 1042cc1dc7a3Sopenharmony_ci 1043cc1dc7a3Sopenharmony_ci if (ql >= QUANT_6) 1044cc1dc7a3Sopenharmony_ci { 1045cc1dc7a3Sopenharmony_ci for (int i = 0; i < 3; i++) 1046cc1dc7a3Sopenharmony_ci { 1047cc1dc7a3Sopenharmony_ci best_formats[i] = best_combined_format[ql][best_integer_count - 3][i]; 1048cc1dc7a3Sopenharmony_ci } 1049cc1dc7a3Sopenharmony_ci } 1050cc1dc7a3Sopenharmony_ci else 1051cc1dc7a3Sopenharmony_ci { 1052cc1dc7a3Sopenharmony_ci for (int i = 0; i < 3; i++) 1053cc1dc7a3Sopenharmony_ci { 1054cc1dc7a3Sopenharmony_ci best_formats[i] = FMT_LUMINANCE; 1055cc1dc7a3Sopenharmony_ci } 1056cc1dc7a3Sopenharmony_ci } 1057cc1dc7a3Sopenharmony_ci 1058cc1dc7a3Sopenharmony_ci return best_integer_count_error; 1059cc1dc7a3Sopenharmony_ci} 1060cc1dc7a3Sopenharmony_ci 1061cc1dc7a3Sopenharmony_ci/** 1062cc1dc7a3Sopenharmony_ci * @brief For 4 partitions compute the best format combinations for every pair of quant mode and integer count. 1063cc1dc7a3Sopenharmony_ci * 1064cc1dc7a3Sopenharmony_ci * @param best_error The best error for a single endpoint quant level and integer count. 1065cc1dc7a3Sopenharmony_ci * @param best_format The best format for a single endpoint quant level and integer count. 1066cc1dc7a3Sopenharmony_ci * @param[out] best_combined_error The best combined error pairings for the 4 partitions. 1067cc1dc7a3Sopenharmony_ci * @param[out] best_combined_format The best combined format pairings for the 4 partitions. 1068cc1dc7a3Sopenharmony_ci */ 1069cc1dc7a3Sopenharmony_cistatic void four_partitions_find_best_combination_for_every_quantization_and_integer_count( 1070cc1dc7a3Sopenharmony_ci const float best_error[4][21][4], // indexed by (partition, quant-level, integer-count) 1071cc1dc7a3Sopenharmony_ci const uint8_t best_format[4][21][4], 1072cc1dc7a3Sopenharmony_ci float best_combined_error[21][13], 1073cc1dc7a3Sopenharmony_ci uint8_t best_combined_format[21][13][4] 1074cc1dc7a3Sopenharmony_ci) { 1075cc1dc7a3Sopenharmony_ci for (int i = QUANT_2; i <= QUANT_256; i++) 1076cc1dc7a3Sopenharmony_ci { 1077cc1dc7a3Sopenharmony_ci for (int j = 0; j < 13; j++) 1078cc1dc7a3Sopenharmony_ci { 1079cc1dc7a3Sopenharmony_ci best_combined_error[i][j] = ERROR_CALC_DEFAULT; 1080cc1dc7a3Sopenharmony_ci } 1081cc1dc7a3Sopenharmony_ci } 1082cc1dc7a3Sopenharmony_ci 1083cc1dc7a3Sopenharmony_ci for (int quant = QUANT_6; quant <= QUANT_256; quant++) 1084cc1dc7a3Sopenharmony_ci { 1085cc1dc7a3Sopenharmony_ci for (int i = 0; i < 4; i++) // integer-count for first endpoint-pair 1086cc1dc7a3Sopenharmony_ci { 1087cc1dc7a3Sopenharmony_ci for (int j = 0; j < 4; j++) // integer-count for second endpoint-pair 1088cc1dc7a3Sopenharmony_ci { 1089cc1dc7a3Sopenharmony_ci int low2 = astc::min(i, j); 1090cc1dc7a3Sopenharmony_ci int high2 = astc::max(i, j); 1091cc1dc7a3Sopenharmony_ci if ((high2 - low2) > 1) 1092cc1dc7a3Sopenharmony_ci { 1093cc1dc7a3Sopenharmony_ci continue; 1094cc1dc7a3Sopenharmony_ci } 1095cc1dc7a3Sopenharmony_ci 1096cc1dc7a3Sopenharmony_ci for (int k = 0; k < 4; k++) // integer-count for third endpoint-pair 1097cc1dc7a3Sopenharmony_ci { 1098cc1dc7a3Sopenharmony_ci int low3 = astc::min(k, low2); 1099cc1dc7a3Sopenharmony_ci int high3 = astc::max(k, high2); 1100cc1dc7a3Sopenharmony_ci if ((high3 - low3) > 1) 1101cc1dc7a3Sopenharmony_ci { 1102cc1dc7a3Sopenharmony_ci continue; 1103cc1dc7a3Sopenharmony_ci } 1104cc1dc7a3Sopenharmony_ci 1105cc1dc7a3Sopenharmony_ci for (int l = 0; l < 4; l++) // integer-count for fourth endpoint-pair 1106cc1dc7a3Sopenharmony_ci { 1107cc1dc7a3Sopenharmony_ci int low4 = astc::min(l, low3); 1108cc1dc7a3Sopenharmony_ci int high4 = astc::max(l, high3); 1109cc1dc7a3Sopenharmony_ci if ((high4 - low4) > 1) 1110cc1dc7a3Sopenharmony_ci { 1111cc1dc7a3Sopenharmony_ci continue; 1112cc1dc7a3Sopenharmony_ci } 1113cc1dc7a3Sopenharmony_ci 1114cc1dc7a3Sopenharmony_ci int intcnt = i + j + k + l; 1115cc1dc7a3Sopenharmony_ci float errorterm = astc::min(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k] + best_error[3][quant][l], 1e10f); 1116cc1dc7a3Sopenharmony_ci if (errorterm <= best_combined_error[quant][intcnt]) 1117cc1dc7a3Sopenharmony_ci { 1118cc1dc7a3Sopenharmony_ci best_combined_error[quant][intcnt] = errorterm; 1119cc1dc7a3Sopenharmony_ci best_combined_format[quant][intcnt][0] = best_format[0][quant][i]; 1120cc1dc7a3Sopenharmony_ci best_combined_format[quant][intcnt][1] = best_format[1][quant][j]; 1121cc1dc7a3Sopenharmony_ci best_combined_format[quant][intcnt][2] = best_format[2][quant][k]; 1122cc1dc7a3Sopenharmony_ci best_combined_format[quant][intcnt][3] = best_format[3][quant][l]; 1123cc1dc7a3Sopenharmony_ci } 1124cc1dc7a3Sopenharmony_ci } 1125cc1dc7a3Sopenharmony_ci } 1126cc1dc7a3Sopenharmony_ci } 1127cc1dc7a3Sopenharmony_ci } 1128cc1dc7a3Sopenharmony_ci } 1129cc1dc7a3Sopenharmony_ci} 1130cc1dc7a3Sopenharmony_ci 1131cc1dc7a3Sopenharmony_ci/** 1132cc1dc7a3Sopenharmony_ci * @brief For 4 partitions compute the best format and quantization for a given bit count. 1133cc1dc7a3Sopenharmony_ci * 1134cc1dc7a3Sopenharmony_ci * @param best_combined_error The best error for each quant level and integer count. 1135cc1dc7a3Sopenharmony_ci * @param best_combined_format The best format for each quant level and integer count. 1136cc1dc7a3Sopenharmony_ci * @param bits_available The number of bits available for encoding. 1137cc1dc7a3Sopenharmony_ci * @param[out] best_quant_level The output best color quant level. 1138cc1dc7a3Sopenharmony_ci * @param[out] best_quant_level_mod The output best color quant level assuming two more bits are available. 1139cc1dc7a3Sopenharmony_ci * @param[out] best_formats The output best color formats. 1140cc1dc7a3Sopenharmony_ci * 1141cc1dc7a3Sopenharmony_ci * @return best_error The output error for the best pairing. 1142cc1dc7a3Sopenharmony_ci */ 1143cc1dc7a3Sopenharmony_cistatic float four_partitions_find_best_combination_for_bitcount( 1144cc1dc7a3Sopenharmony_ci const float best_combined_error[21][13], 1145cc1dc7a3Sopenharmony_ci const uint8_t best_combined_format[21][13][4], 1146cc1dc7a3Sopenharmony_ci int bits_available, 1147cc1dc7a3Sopenharmony_ci uint8_t& best_quant_level, 1148cc1dc7a3Sopenharmony_ci uint8_t& best_quant_level_mod, 1149cc1dc7a3Sopenharmony_ci uint8_t* best_formats 1150cc1dc7a3Sopenharmony_ci) { 1151cc1dc7a3Sopenharmony_ci int best_integer_count = 0; 1152cc1dc7a3Sopenharmony_ci float best_integer_count_error = ERROR_CALC_DEFAULT; 1153cc1dc7a3Sopenharmony_ci 1154cc1dc7a3Sopenharmony_ci for (int integer_count = 4; integer_count <= 9; integer_count++) 1155cc1dc7a3Sopenharmony_ci { 1156cc1dc7a3Sopenharmony_ci // Compute the quantization level for a given number of integers and a given number of bits 1157cc1dc7a3Sopenharmony_ci int quant_level = quant_mode_table[integer_count][bits_available]; 1158cc1dc7a3Sopenharmony_ci 1159cc1dc7a3Sopenharmony_ci // Don't have enough bits to represent a given endpoint format at all! 1160cc1dc7a3Sopenharmony_ci if (quant_level < QUANT_6) 1161cc1dc7a3Sopenharmony_ci { 1162cc1dc7a3Sopenharmony_ci break; 1163cc1dc7a3Sopenharmony_ci } 1164cc1dc7a3Sopenharmony_ci 1165cc1dc7a3Sopenharmony_ci float integer_count_error = best_combined_error[quant_level][integer_count - 4]; 1166cc1dc7a3Sopenharmony_ci if (integer_count_error < best_integer_count_error) 1167cc1dc7a3Sopenharmony_ci { 1168cc1dc7a3Sopenharmony_ci best_integer_count_error = integer_count_error; 1169cc1dc7a3Sopenharmony_ci best_integer_count = integer_count; 1170cc1dc7a3Sopenharmony_ci } 1171cc1dc7a3Sopenharmony_ci } 1172cc1dc7a3Sopenharmony_ci 1173cc1dc7a3Sopenharmony_ci int ql = quant_mode_table[best_integer_count][bits_available]; 1174cc1dc7a3Sopenharmony_ci int ql_mod = quant_mode_table[best_integer_count][bits_available + 8]; 1175cc1dc7a3Sopenharmony_ci 1176cc1dc7a3Sopenharmony_ci best_quant_level = static_cast<uint8_t>(ql); 1177cc1dc7a3Sopenharmony_ci best_quant_level_mod = static_cast<uint8_t>(ql_mod); 1178cc1dc7a3Sopenharmony_ci 1179cc1dc7a3Sopenharmony_ci if (ql >= QUANT_6) 1180cc1dc7a3Sopenharmony_ci { 1181cc1dc7a3Sopenharmony_ci for (int i = 0; i < 4; i++) 1182cc1dc7a3Sopenharmony_ci { 1183cc1dc7a3Sopenharmony_ci best_formats[i] = best_combined_format[ql][best_integer_count - 4][i]; 1184cc1dc7a3Sopenharmony_ci } 1185cc1dc7a3Sopenharmony_ci } 1186cc1dc7a3Sopenharmony_ci else 1187cc1dc7a3Sopenharmony_ci { 1188cc1dc7a3Sopenharmony_ci for (int i = 0; i < 4; i++) 1189cc1dc7a3Sopenharmony_ci { 1190cc1dc7a3Sopenharmony_ci best_formats[i] = FMT_LUMINANCE; 1191cc1dc7a3Sopenharmony_ci } 1192cc1dc7a3Sopenharmony_ci } 1193cc1dc7a3Sopenharmony_ci 1194cc1dc7a3Sopenharmony_ci return best_integer_count_error; 1195cc1dc7a3Sopenharmony_ci} 1196cc1dc7a3Sopenharmony_ci 1197cc1dc7a3Sopenharmony_ci/* See header for documentation. */ 1198cc1dc7a3Sopenharmony_ciunsigned int compute_ideal_endpoint_formats( 1199cc1dc7a3Sopenharmony_ci QualityProfile privateProfile, 1200cc1dc7a3Sopenharmony_ci const partition_info& pi, 1201cc1dc7a3Sopenharmony_ci const image_block& blk, 1202cc1dc7a3Sopenharmony_ci const endpoints& ep, 1203cc1dc7a3Sopenharmony_ci // bitcounts and errors computed for the various quantization methods 1204cc1dc7a3Sopenharmony_ci const int8_t* qwt_bitcounts, 1205cc1dc7a3Sopenharmony_ci const float* qwt_errors, 1206cc1dc7a3Sopenharmony_ci unsigned int tune_candidate_limit, 1207cc1dc7a3Sopenharmony_ci unsigned int start_block_mode, 1208cc1dc7a3Sopenharmony_ci unsigned int end_block_mode, 1209cc1dc7a3Sopenharmony_ci // output data 1210cc1dc7a3Sopenharmony_ci uint8_t partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS], 1211cc1dc7a3Sopenharmony_ci int block_mode[TUNE_MAX_TRIAL_CANDIDATES], 1212cc1dc7a3Sopenharmony_ci quant_method quant_level[TUNE_MAX_TRIAL_CANDIDATES], 1213cc1dc7a3Sopenharmony_ci quant_method quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES], 1214cc1dc7a3Sopenharmony_ci compression_working_buffers& tmpbuf 1215cc1dc7a3Sopenharmony_ci) { 1216cc1dc7a3Sopenharmony_ci int partition_count = pi.partition_count; 1217cc1dc7a3Sopenharmony_ci 1218cc1dc7a3Sopenharmony_ci promise(partition_count > 0); 1219cc1dc7a3Sopenharmony_ci 1220cc1dc7a3Sopenharmony_ci bool encode_hdr_rgb = static_cast<bool>(blk.rgb_lns[0]); 1221cc1dc7a3Sopenharmony_ci bool encode_hdr_alpha = static_cast<bool>(blk.alpha_lns[0]); 1222cc1dc7a3Sopenharmony_ci 1223cc1dc7a3Sopenharmony_ci // Compute the errors that result from various encoding choices (such as using luminance instead 1224cc1dc7a3Sopenharmony_ci // of RGB, discarding Alpha, using RGB-scale in place of two separate RGB endpoints and so on) 1225cc1dc7a3Sopenharmony_ci encoding_choice_errors eci[BLOCK_MAX_PARTITIONS]; 1226cc1dc7a3Sopenharmony_ci compute_encoding_choice_errors(privateProfile, blk, pi, ep, eci); 1227cc1dc7a3Sopenharmony_ci 1228cc1dc7a3Sopenharmony_ci float best_error[BLOCK_MAX_PARTITIONS][21][4]; 1229cc1dc7a3Sopenharmony_ci uint8_t format_of_choice[BLOCK_MAX_PARTITIONS][21][4]; 1230cc1dc7a3Sopenharmony_ci for (int i = 0; i < partition_count; i++) 1231cc1dc7a3Sopenharmony_ci { 1232cc1dc7a3Sopenharmony_ci compute_color_error_for_every_integer_count_and_quant_level( 1233cc1dc7a3Sopenharmony_ci encode_hdr_rgb, encode_hdr_alpha, i, 1234cc1dc7a3Sopenharmony_ci pi, eci[i], ep, blk.channel_weight, best_error[i], 1235cc1dc7a3Sopenharmony_ci format_of_choice[i]); 1236cc1dc7a3Sopenharmony_ci } 1237cc1dc7a3Sopenharmony_ci 1238cc1dc7a3Sopenharmony_ci float* errors_of_best_combination = tmpbuf.errors_of_best_combination; 1239cc1dc7a3Sopenharmony_ci uint8_t* best_quant_levels = tmpbuf.best_quant_levels; 1240cc1dc7a3Sopenharmony_ci uint8_t* best_quant_levels_mod = tmpbuf.best_quant_levels_mod; 1241cc1dc7a3Sopenharmony_ci uint8_t (&best_ep_formats)[WEIGHTS_MAX_BLOCK_MODES][BLOCK_MAX_PARTITIONS] = tmpbuf.best_ep_formats; 1242cc1dc7a3Sopenharmony_ci 1243cc1dc7a3Sopenharmony_ci // Ensure that the first iteration understep contains data that will never be picked 1244cc1dc7a3Sopenharmony_ci vfloat clear_error(ERROR_CALC_DEFAULT); 1245cc1dc7a3Sopenharmony_ci vint clear_quant(0); 1246cc1dc7a3Sopenharmony_ci 1247cc1dc7a3Sopenharmony_ci unsigned int packed_start_block_mode = round_down_to_simd_multiple_vla(start_block_mode); 1248cc1dc7a3Sopenharmony_ci storea(clear_error, errors_of_best_combination + packed_start_block_mode); 1249cc1dc7a3Sopenharmony_ci store_nbytes(clear_quant, best_quant_levels + packed_start_block_mode); 1250cc1dc7a3Sopenharmony_ci store_nbytes(clear_quant, best_quant_levels_mod + packed_start_block_mode); 1251cc1dc7a3Sopenharmony_ci 1252cc1dc7a3Sopenharmony_ci // Ensure that last iteration overstep contains data that will never be picked 1253cc1dc7a3Sopenharmony_ci unsigned int packed_end_block_mode = round_down_to_simd_multiple_vla(end_block_mode - 1); 1254cc1dc7a3Sopenharmony_ci storea(clear_error, errors_of_best_combination + packed_end_block_mode); 1255cc1dc7a3Sopenharmony_ci store_nbytes(clear_quant, best_quant_levels + packed_end_block_mode); 1256cc1dc7a3Sopenharmony_ci store_nbytes(clear_quant, best_quant_levels_mod + packed_end_block_mode); 1257cc1dc7a3Sopenharmony_ci 1258cc1dc7a3Sopenharmony_ci // Track a scalar best to avoid expensive search at least once ... 1259cc1dc7a3Sopenharmony_ci float error_of_best_combination = ERROR_CALC_DEFAULT; 1260cc1dc7a3Sopenharmony_ci int index_of_best_combination = -1; 1261cc1dc7a3Sopenharmony_ci 1262cc1dc7a3Sopenharmony_ci // The block contains 1 partition 1263cc1dc7a3Sopenharmony_ci if (partition_count == 1) 1264cc1dc7a3Sopenharmony_ci { 1265cc1dc7a3Sopenharmony_ci for (unsigned int i = start_block_mode; i < end_block_mode; i++) 1266cc1dc7a3Sopenharmony_ci { 1267cc1dc7a3Sopenharmony_ci if (qwt_errors[i] >= ERROR_CALC_DEFAULT) 1268cc1dc7a3Sopenharmony_ci { 1269cc1dc7a3Sopenharmony_ci errors_of_best_combination[i] = ERROR_CALC_DEFAULT; 1270cc1dc7a3Sopenharmony_ci continue; 1271cc1dc7a3Sopenharmony_ci } 1272cc1dc7a3Sopenharmony_ci 1273cc1dc7a3Sopenharmony_ci float error_of_best = one_partition_find_best_combination_for_bitcount( 1274cc1dc7a3Sopenharmony_ci privateProfile, 1275cc1dc7a3Sopenharmony_ci best_error[0], format_of_choice[0], qwt_bitcounts[i], 1276cc1dc7a3Sopenharmony_ci best_quant_levels[i], best_ep_formats[i][0]); 1277cc1dc7a3Sopenharmony_ci 1278cc1dc7a3Sopenharmony_ci float total_error = error_of_best + qwt_errors[i]; 1279cc1dc7a3Sopenharmony_ci errors_of_best_combination[i] = total_error; 1280cc1dc7a3Sopenharmony_ci best_quant_levels_mod[i] = best_quant_levels[i]; 1281cc1dc7a3Sopenharmony_ci 1282cc1dc7a3Sopenharmony_ci if (total_error < error_of_best_combination) 1283cc1dc7a3Sopenharmony_ci { 1284cc1dc7a3Sopenharmony_ci error_of_best_combination = total_error; 1285cc1dc7a3Sopenharmony_ci index_of_best_combination = i; 1286cc1dc7a3Sopenharmony_ci } 1287cc1dc7a3Sopenharmony_ci } 1288cc1dc7a3Sopenharmony_ci } 1289cc1dc7a3Sopenharmony_ci // The block contains 2 partitions 1290cc1dc7a3Sopenharmony_ci else if (partition_count == 2) 1291cc1dc7a3Sopenharmony_ci { 1292cc1dc7a3Sopenharmony_ci float combined_best_error[21][7]; 1293cc1dc7a3Sopenharmony_ci uint8_t formats_of_choice[21][7][2]; 1294cc1dc7a3Sopenharmony_ci 1295cc1dc7a3Sopenharmony_ci two_partitions_find_best_combination_for_every_quantization_and_integer_count( 1296cc1dc7a3Sopenharmony_ci best_error, format_of_choice, combined_best_error, formats_of_choice); 1297cc1dc7a3Sopenharmony_ci 1298cc1dc7a3Sopenharmony_ci assert(start_block_mode == 0); 1299cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < end_block_mode; i++) 1300cc1dc7a3Sopenharmony_ci { 1301cc1dc7a3Sopenharmony_ci if (qwt_errors[i] >= ERROR_CALC_DEFAULT) 1302cc1dc7a3Sopenharmony_ci { 1303cc1dc7a3Sopenharmony_ci errors_of_best_combination[i] = ERROR_CALC_DEFAULT; 1304cc1dc7a3Sopenharmony_ci continue; 1305cc1dc7a3Sopenharmony_ci } 1306cc1dc7a3Sopenharmony_ci 1307cc1dc7a3Sopenharmony_ci float error_of_best = two_partitions_find_best_combination_for_bitcount( 1308cc1dc7a3Sopenharmony_ci privateProfile, 1309cc1dc7a3Sopenharmony_ci combined_best_error, formats_of_choice, qwt_bitcounts[i], 1310cc1dc7a3Sopenharmony_ci best_quant_levels[i], best_quant_levels_mod[i], 1311cc1dc7a3Sopenharmony_ci best_ep_formats[i]); 1312cc1dc7a3Sopenharmony_ci 1313cc1dc7a3Sopenharmony_ci float total_error = error_of_best + qwt_errors[i]; 1314cc1dc7a3Sopenharmony_ci errors_of_best_combination[i] = total_error; 1315cc1dc7a3Sopenharmony_ci 1316cc1dc7a3Sopenharmony_ci if (total_error < error_of_best_combination) 1317cc1dc7a3Sopenharmony_ci { 1318cc1dc7a3Sopenharmony_ci error_of_best_combination = total_error; 1319cc1dc7a3Sopenharmony_ci index_of_best_combination = i; 1320cc1dc7a3Sopenharmony_ci } 1321cc1dc7a3Sopenharmony_ci } 1322cc1dc7a3Sopenharmony_ci } 1323cc1dc7a3Sopenharmony_ci // The block contains 3 partitions 1324cc1dc7a3Sopenharmony_ci else if (partition_count == 3) 1325cc1dc7a3Sopenharmony_ci { 1326cc1dc7a3Sopenharmony_ci float combined_best_error[21][10]; 1327cc1dc7a3Sopenharmony_ci uint8_t formats_of_choice[21][10][3]; 1328cc1dc7a3Sopenharmony_ci 1329cc1dc7a3Sopenharmony_ci three_partitions_find_best_combination_for_every_quantization_and_integer_count( 1330cc1dc7a3Sopenharmony_ci best_error, format_of_choice, combined_best_error, formats_of_choice); 1331cc1dc7a3Sopenharmony_ci 1332cc1dc7a3Sopenharmony_ci assert(start_block_mode == 0); 1333cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < end_block_mode; i++) 1334cc1dc7a3Sopenharmony_ci { 1335cc1dc7a3Sopenharmony_ci if (qwt_errors[i] >= ERROR_CALC_DEFAULT) 1336cc1dc7a3Sopenharmony_ci { 1337cc1dc7a3Sopenharmony_ci errors_of_best_combination[i] = ERROR_CALC_DEFAULT; 1338cc1dc7a3Sopenharmony_ci continue; 1339cc1dc7a3Sopenharmony_ci } 1340cc1dc7a3Sopenharmony_ci 1341cc1dc7a3Sopenharmony_ci float error_of_best = three_partitions_find_best_combination_for_bitcount( 1342cc1dc7a3Sopenharmony_ci combined_best_error, formats_of_choice, qwt_bitcounts[i], 1343cc1dc7a3Sopenharmony_ci best_quant_levels[i], best_quant_levels_mod[i], 1344cc1dc7a3Sopenharmony_ci best_ep_formats[i]); 1345cc1dc7a3Sopenharmony_ci 1346cc1dc7a3Sopenharmony_ci float total_error = error_of_best + qwt_errors[i]; 1347cc1dc7a3Sopenharmony_ci errors_of_best_combination[i] = total_error; 1348cc1dc7a3Sopenharmony_ci 1349cc1dc7a3Sopenharmony_ci if (total_error < error_of_best_combination) 1350cc1dc7a3Sopenharmony_ci { 1351cc1dc7a3Sopenharmony_ci error_of_best_combination = total_error; 1352cc1dc7a3Sopenharmony_ci index_of_best_combination = i; 1353cc1dc7a3Sopenharmony_ci } 1354cc1dc7a3Sopenharmony_ci } 1355cc1dc7a3Sopenharmony_ci } 1356cc1dc7a3Sopenharmony_ci // The block contains 4 partitions 1357cc1dc7a3Sopenharmony_ci else // if (partition_count == 4) 1358cc1dc7a3Sopenharmony_ci { 1359cc1dc7a3Sopenharmony_ci assert(partition_count == 4); 1360cc1dc7a3Sopenharmony_ci float combined_best_error[21][13]; 1361cc1dc7a3Sopenharmony_ci uint8_t formats_of_choice[21][13][4]; 1362cc1dc7a3Sopenharmony_ci 1363cc1dc7a3Sopenharmony_ci four_partitions_find_best_combination_for_every_quantization_and_integer_count( 1364cc1dc7a3Sopenharmony_ci best_error, format_of_choice, combined_best_error, formats_of_choice); 1365cc1dc7a3Sopenharmony_ci 1366cc1dc7a3Sopenharmony_ci assert(start_block_mode == 0); 1367cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < end_block_mode; i++) 1368cc1dc7a3Sopenharmony_ci { 1369cc1dc7a3Sopenharmony_ci if (qwt_errors[i] >= ERROR_CALC_DEFAULT) 1370cc1dc7a3Sopenharmony_ci { 1371cc1dc7a3Sopenharmony_ci errors_of_best_combination[i] = ERROR_CALC_DEFAULT; 1372cc1dc7a3Sopenharmony_ci continue; 1373cc1dc7a3Sopenharmony_ci } 1374cc1dc7a3Sopenharmony_ci 1375cc1dc7a3Sopenharmony_ci float error_of_best = four_partitions_find_best_combination_for_bitcount( 1376cc1dc7a3Sopenharmony_ci combined_best_error, formats_of_choice, qwt_bitcounts[i], 1377cc1dc7a3Sopenharmony_ci best_quant_levels[i], best_quant_levels_mod[i], 1378cc1dc7a3Sopenharmony_ci best_ep_formats[i]); 1379cc1dc7a3Sopenharmony_ci 1380cc1dc7a3Sopenharmony_ci float total_error = error_of_best + qwt_errors[i]; 1381cc1dc7a3Sopenharmony_ci errors_of_best_combination[i] = total_error; 1382cc1dc7a3Sopenharmony_ci 1383cc1dc7a3Sopenharmony_ci if (total_error < error_of_best_combination) 1384cc1dc7a3Sopenharmony_ci { 1385cc1dc7a3Sopenharmony_ci error_of_best_combination = total_error; 1386cc1dc7a3Sopenharmony_ci index_of_best_combination = i; 1387cc1dc7a3Sopenharmony_ci } 1388cc1dc7a3Sopenharmony_ci } 1389cc1dc7a3Sopenharmony_ci } 1390cc1dc7a3Sopenharmony_ci 1391cc1dc7a3Sopenharmony_ci int best_error_weights[TUNE_MAX_TRIAL_CANDIDATES]; 1392cc1dc7a3Sopenharmony_ci 1393cc1dc7a3Sopenharmony_ci // Fast path the first result and avoid the list search for trial 0 1394cc1dc7a3Sopenharmony_ci best_error_weights[0] = index_of_best_combination; 1395cc1dc7a3Sopenharmony_ci if (index_of_best_combination >= 0) 1396cc1dc7a3Sopenharmony_ci { 1397cc1dc7a3Sopenharmony_ci errors_of_best_combination[index_of_best_combination] = ERROR_CALC_DEFAULT; 1398cc1dc7a3Sopenharmony_ci } 1399cc1dc7a3Sopenharmony_ci 1400cc1dc7a3Sopenharmony_ci // Search the remaining results and pick the best candidate modes for trial 1+ 1401cc1dc7a3Sopenharmony_ci for (unsigned int i = 1; i < tune_candidate_limit; i++) 1402cc1dc7a3Sopenharmony_ci { 1403cc1dc7a3Sopenharmony_ci vint vbest_error_index(-1); 1404cc1dc7a3Sopenharmony_ci vfloat vbest_ep_error(ERROR_CALC_DEFAULT); 1405cc1dc7a3Sopenharmony_ci 1406cc1dc7a3Sopenharmony_ci start_block_mode = round_down_to_simd_multiple_vla(start_block_mode); 1407cc1dc7a3Sopenharmony_ci vint lane_ids = vint::lane_id() + vint(start_block_mode); 1408cc1dc7a3Sopenharmony_ci for (unsigned int j = start_block_mode; j < end_block_mode; j += ASTCENC_SIMD_WIDTH) 1409cc1dc7a3Sopenharmony_ci { 1410cc1dc7a3Sopenharmony_ci vfloat err = vfloat(errors_of_best_combination + j); 1411cc1dc7a3Sopenharmony_ci vmask mask = err < vbest_ep_error; 1412cc1dc7a3Sopenharmony_ci vbest_ep_error = select(vbest_ep_error, err, mask); 1413cc1dc7a3Sopenharmony_ci vbest_error_index = select(vbest_error_index, lane_ids, mask); 1414cc1dc7a3Sopenharmony_ci lane_ids += vint(ASTCENC_SIMD_WIDTH); 1415cc1dc7a3Sopenharmony_ci } 1416cc1dc7a3Sopenharmony_ci 1417cc1dc7a3Sopenharmony_ci // Pick best mode from the SIMD result, using lowest matching index to ensure invariance 1418cc1dc7a3Sopenharmony_ci vmask lanes_min_error = vbest_ep_error == hmin(vbest_ep_error); 1419cc1dc7a3Sopenharmony_ci vbest_error_index = select(vint(0x7FFFFFFF), vbest_error_index, lanes_min_error); 1420cc1dc7a3Sopenharmony_ci vbest_error_index = hmin(vbest_error_index); 1421cc1dc7a3Sopenharmony_ci int best_error_index = vbest_error_index.lane<0>(); 1422cc1dc7a3Sopenharmony_ci 1423cc1dc7a3Sopenharmony_ci best_error_weights[i] = best_error_index; 1424cc1dc7a3Sopenharmony_ci 1425cc1dc7a3Sopenharmony_ci // Max the error for this candidate so we don't pick it again 1426cc1dc7a3Sopenharmony_ci if (best_error_index >= 0) 1427cc1dc7a3Sopenharmony_ci { 1428cc1dc7a3Sopenharmony_ci errors_of_best_combination[best_error_index] = ERROR_CALC_DEFAULT; 1429cc1dc7a3Sopenharmony_ci } 1430cc1dc7a3Sopenharmony_ci // Early-out if no more candidates are valid 1431cc1dc7a3Sopenharmony_ci else 1432cc1dc7a3Sopenharmony_ci { 1433cc1dc7a3Sopenharmony_ci break; 1434cc1dc7a3Sopenharmony_ci } 1435cc1dc7a3Sopenharmony_ci } 1436cc1dc7a3Sopenharmony_ci 1437cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < tune_candidate_limit; i++) 1438cc1dc7a3Sopenharmony_ci { 1439cc1dc7a3Sopenharmony_ci if (best_error_weights[i] < 0) 1440cc1dc7a3Sopenharmony_ci { 1441cc1dc7a3Sopenharmony_ci return i; 1442cc1dc7a3Sopenharmony_ci } 1443cc1dc7a3Sopenharmony_ci 1444cc1dc7a3Sopenharmony_ci block_mode[i] = best_error_weights[i]; 1445cc1dc7a3Sopenharmony_ci 1446cc1dc7a3Sopenharmony_ci quant_level[i] = static_cast<quant_method>(best_quant_levels[best_error_weights[i]]); 1447cc1dc7a3Sopenharmony_ci quant_level_mod[i] = static_cast<quant_method>(best_quant_levels_mod[best_error_weights[i]]); 1448cc1dc7a3Sopenharmony_ci 1449cc1dc7a3Sopenharmony_ci assert(quant_level[i] >= QUANT_6 && quant_level[i] <= QUANT_256); 1450cc1dc7a3Sopenharmony_ci assert(quant_level_mod[i] >= QUANT_6 && quant_level_mod[i] <= QUANT_256); 1451cc1dc7a3Sopenharmony_ci 1452cc1dc7a3Sopenharmony_ci for (int j = 0; j < partition_count; j++) 1453cc1dc7a3Sopenharmony_ci { 1454cc1dc7a3Sopenharmony_ci partition_format_specifiers[i][j] = best_ep_formats[best_error_weights[i]][j]; 1455cc1dc7a3Sopenharmony_ci } 1456cc1dc7a3Sopenharmony_ci } 1457cc1dc7a3Sopenharmony_ci 1458cc1dc7a3Sopenharmony_ci return tune_candidate_limit; 1459cc1dc7a3Sopenharmony_ci} 1460cc1dc7a3Sopenharmony_ci 1461cc1dc7a3Sopenharmony_ci#endif 1462