1cc1dc7a3Sopenharmony_ci// SPDX-License-Identifier: Apache-2.0 2cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 3cc1dc7a3Sopenharmony_ci// Copyright 2011-2023 Arm Limited 4cc1dc7a3Sopenharmony_ci// 5cc1dc7a3Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); you may not 6cc1dc7a3Sopenharmony_ci// use this file except in compliance with the License. You may obtain a copy 7cc1dc7a3Sopenharmony_ci// of the License at: 8cc1dc7a3Sopenharmony_ci// 9cc1dc7a3Sopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0 10cc1dc7a3Sopenharmony_ci// 11cc1dc7a3Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software 12cc1dc7a3Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13cc1dc7a3Sopenharmony_ci// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14cc1dc7a3Sopenharmony_ci// License for the specific language governing permissions and limitations 15cc1dc7a3Sopenharmony_ci// under the License. 16cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 17cc1dc7a3Sopenharmony_ci 18cc1dc7a3Sopenharmony_ci/** 19cc1dc7a3Sopenharmony_ci * @brief Functions to generate block size descriptor and decimation tables. 20cc1dc7a3Sopenharmony_ci */ 21cc1dc7a3Sopenharmony_ci 22cc1dc7a3Sopenharmony_ci#include "astcenc_internal.h" 23cc1dc7a3Sopenharmony_ci 24cc1dc7a3Sopenharmony_ci/** 25cc1dc7a3Sopenharmony_ci * @brief Decode the properties of an encoded 2D block mode. 26cc1dc7a3Sopenharmony_ci * 27cc1dc7a3Sopenharmony_ci * @param block_mode The encoded block mode. 28cc1dc7a3Sopenharmony_ci * @param[out] x_weights The number of weights in the X dimension. 29cc1dc7a3Sopenharmony_ci * @param[out] y_weights The number of weights in the Y dimension. 30cc1dc7a3Sopenharmony_ci * @param[out] is_dual_plane True if this block mode has two weight planes. 31cc1dc7a3Sopenharmony_ci * @param[out] quant_mode The quantization level for the weights. 32cc1dc7a3Sopenharmony_ci * @param[out] weight_bits The storage bit count for the weights. 33cc1dc7a3Sopenharmony_ci * 34cc1dc7a3Sopenharmony_ci * @return Returns true if a valid mode, false otherwise. 35cc1dc7a3Sopenharmony_ci */ 36cc1dc7a3Sopenharmony_cistatic bool decode_block_mode_2d( 37cc1dc7a3Sopenharmony_ci unsigned int block_mode, 38cc1dc7a3Sopenharmony_ci unsigned int& x_weights, 39cc1dc7a3Sopenharmony_ci unsigned int& y_weights, 40cc1dc7a3Sopenharmony_ci bool& is_dual_plane, 41cc1dc7a3Sopenharmony_ci unsigned int& quant_mode, 42cc1dc7a3Sopenharmony_ci unsigned int& weight_bits 43cc1dc7a3Sopenharmony_ci) { 44cc1dc7a3Sopenharmony_ci unsigned int base_quant_mode = (block_mode >> 4) & 1; 45cc1dc7a3Sopenharmony_ci unsigned int H = (block_mode >> 9) & 1; 46cc1dc7a3Sopenharmony_ci unsigned int D = (block_mode >> 10) & 1; 47cc1dc7a3Sopenharmony_ci unsigned int A = (block_mode >> 5) & 0x3; 48cc1dc7a3Sopenharmony_ci 49cc1dc7a3Sopenharmony_ci x_weights = 0; 50cc1dc7a3Sopenharmony_ci y_weights = 0; 51cc1dc7a3Sopenharmony_ci 52cc1dc7a3Sopenharmony_ci if ((block_mode & 3) != 0) 53cc1dc7a3Sopenharmony_ci { 54cc1dc7a3Sopenharmony_ci base_quant_mode |= (block_mode & 3) << 1; 55cc1dc7a3Sopenharmony_ci unsigned int B = (block_mode >> 7) & 3; 56cc1dc7a3Sopenharmony_ci switch ((block_mode >> 2) & 3) 57cc1dc7a3Sopenharmony_ci { 58cc1dc7a3Sopenharmony_ci case 0: 59cc1dc7a3Sopenharmony_ci x_weights = B + 4; 60cc1dc7a3Sopenharmony_ci y_weights = A + 2; 61cc1dc7a3Sopenharmony_ci break; 62cc1dc7a3Sopenharmony_ci case 1: 63cc1dc7a3Sopenharmony_ci x_weights = B + 8; 64cc1dc7a3Sopenharmony_ci y_weights = A + 2; 65cc1dc7a3Sopenharmony_ci break; 66cc1dc7a3Sopenharmony_ci case 2: 67cc1dc7a3Sopenharmony_ci x_weights = A + 2; 68cc1dc7a3Sopenharmony_ci y_weights = B + 8; 69cc1dc7a3Sopenharmony_ci break; 70cc1dc7a3Sopenharmony_ci case 3: 71cc1dc7a3Sopenharmony_ci B &= 1; 72cc1dc7a3Sopenharmony_ci if (block_mode & 0x100) 73cc1dc7a3Sopenharmony_ci { 74cc1dc7a3Sopenharmony_ci x_weights = B + 2; 75cc1dc7a3Sopenharmony_ci y_weights = A + 2; 76cc1dc7a3Sopenharmony_ci } 77cc1dc7a3Sopenharmony_ci else 78cc1dc7a3Sopenharmony_ci { 79cc1dc7a3Sopenharmony_ci x_weights = A + 2; 80cc1dc7a3Sopenharmony_ci y_weights = B + 6; 81cc1dc7a3Sopenharmony_ci } 82cc1dc7a3Sopenharmony_ci break; 83cc1dc7a3Sopenharmony_ci } 84cc1dc7a3Sopenharmony_ci } 85cc1dc7a3Sopenharmony_ci else 86cc1dc7a3Sopenharmony_ci { 87cc1dc7a3Sopenharmony_ci base_quant_mode |= ((block_mode >> 2) & 3) << 1; 88cc1dc7a3Sopenharmony_ci if (((block_mode >> 2) & 3) == 0) 89cc1dc7a3Sopenharmony_ci { 90cc1dc7a3Sopenharmony_ci return false; 91cc1dc7a3Sopenharmony_ci } 92cc1dc7a3Sopenharmony_ci 93cc1dc7a3Sopenharmony_ci unsigned int B = (block_mode >> 9) & 3; 94cc1dc7a3Sopenharmony_ci switch ((block_mode >> 7) & 3) 95cc1dc7a3Sopenharmony_ci { 96cc1dc7a3Sopenharmony_ci case 0: 97cc1dc7a3Sopenharmony_ci x_weights = 12; 98cc1dc7a3Sopenharmony_ci y_weights = A + 2; 99cc1dc7a3Sopenharmony_ci break; 100cc1dc7a3Sopenharmony_ci case 1: 101cc1dc7a3Sopenharmony_ci x_weights = A + 2; 102cc1dc7a3Sopenharmony_ci y_weights = 12; 103cc1dc7a3Sopenharmony_ci break; 104cc1dc7a3Sopenharmony_ci case 2: 105cc1dc7a3Sopenharmony_ci x_weights = A + 6; 106cc1dc7a3Sopenharmony_ci y_weights = B + 6; 107cc1dc7a3Sopenharmony_ci D = 0; 108cc1dc7a3Sopenharmony_ci H = 0; 109cc1dc7a3Sopenharmony_ci break; 110cc1dc7a3Sopenharmony_ci case 3: 111cc1dc7a3Sopenharmony_ci switch ((block_mode >> 5) & 3) 112cc1dc7a3Sopenharmony_ci { 113cc1dc7a3Sopenharmony_ci case 0: 114cc1dc7a3Sopenharmony_ci x_weights = 6; 115cc1dc7a3Sopenharmony_ci y_weights = 10; 116cc1dc7a3Sopenharmony_ci break; 117cc1dc7a3Sopenharmony_ci case 1: 118cc1dc7a3Sopenharmony_ci x_weights = 10; 119cc1dc7a3Sopenharmony_ci y_weights = 6; 120cc1dc7a3Sopenharmony_ci break; 121cc1dc7a3Sopenharmony_ci case 2: 122cc1dc7a3Sopenharmony_ci case 3: 123cc1dc7a3Sopenharmony_ci return false; 124cc1dc7a3Sopenharmony_ci } 125cc1dc7a3Sopenharmony_ci break; 126cc1dc7a3Sopenharmony_ci } 127cc1dc7a3Sopenharmony_ci } 128cc1dc7a3Sopenharmony_ci 129cc1dc7a3Sopenharmony_ci unsigned int weight_count = x_weights * y_weights * (D + 1); 130cc1dc7a3Sopenharmony_ci quant_mode = (base_quant_mode - 2) + 6 * H; 131cc1dc7a3Sopenharmony_ci is_dual_plane = D != 0; 132cc1dc7a3Sopenharmony_ci 133cc1dc7a3Sopenharmony_ci weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode)); 134cc1dc7a3Sopenharmony_ci return (weight_count <= BLOCK_MAX_WEIGHTS && 135cc1dc7a3Sopenharmony_ci weight_bits >= BLOCK_MIN_WEIGHT_BITS && 136cc1dc7a3Sopenharmony_ci weight_bits <= BLOCK_MAX_WEIGHT_BITS); 137cc1dc7a3Sopenharmony_ci} 138cc1dc7a3Sopenharmony_ci 139cc1dc7a3Sopenharmony_ci/** 140cc1dc7a3Sopenharmony_ci * @brief Decode the properties of an encoded 3D block mode. 141cc1dc7a3Sopenharmony_ci * 142cc1dc7a3Sopenharmony_ci * @param block_mode The encoded block mode. 143cc1dc7a3Sopenharmony_ci * @param[out] x_weights The number of weights in the X dimension. 144cc1dc7a3Sopenharmony_ci * @param[out] y_weights The number of weights in the Y dimension. 145cc1dc7a3Sopenharmony_ci * @param[out] z_weights The number of weights in the Z dimension. 146cc1dc7a3Sopenharmony_ci * @param[out] is_dual_plane True if this block mode has two weight planes. 147cc1dc7a3Sopenharmony_ci * @param[out] quant_mode The quantization level for the weights. 148cc1dc7a3Sopenharmony_ci * @param[out] weight_bits The storage bit count for the weights. 149cc1dc7a3Sopenharmony_ci * 150cc1dc7a3Sopenharmony_ci * @return Returns true if a valid mode, false otherwise. 151cc1dc7a3Sopenharmony_ci */ 152cc1dc7a3Sopenharmony_cistatic bool decode_block_mode_3d( 153cc1dc7a3Sopenharmony_ci unsigned int block_mode, 154cc1dc7a3Sopenharmony_ci unsigned int& x_weights, 155cc1dc7a3Sopenharmony_ci unsigned int& y_weights, 156cc1dc7a3Sopenharmony_ci unsigned int& z_weights, 157cc1dc7a3Sopenharmony_ci bool& is_dual_plane, 158cc1dc7a3Sopenharmony_ci unsigned int& quant_mode, 159cc1dc7a3Sopenharmony_ci unsigned int& weight_bits 160cc1dc7a3Sopenharmony_ci) { 161cc1dc7a3Sopenharmony_ci unsigned int base_quant_mode = (block_mode >> 4) & 1; 162cc1dc7a3Sopenharmony_ci unsigned int H = (block_mode >> 9) & 1; 163cc1dc7a3Sopenharmony_ci unsigned int D = (block_mode >> 10) & 1; 164cc1dc7a3Sopenharmony_ci unsigned int A = (block_mode >> 5) & 0x3; 165cc1dc7a3Sopenharmony_ci 166cc1dc7a3Sopenharmony_ci x_weights = 0; 167cc1dc7a3Sopenharmony_ci y_weights = 0; 168cc1dc7a3Sopenharmony_ci z_weights = 0; 169cc1dc7a3Sopenharmony_ci 170cc1dc7a3Sopenharmony_ci if ((block_mode & 3) != 0) 171cc1dc7a3Sopenharmony_ci { 172cc1dc7a3Sopenharmony_ci base_quant_mode |= (block_mode & 3) << 1; 173cc1dc7a3Sopenharmony_ci unsigned int B = (block_mode >> 7) & 3; 174cc1dc7a3Sopenharmony_ci unsigned int C = (block_mode >> 2) & 0x3; 175cc1dc7a3Sopenharmony_ci x_weights = A + 2; 176cc1dc7a3Sopenharmony_ci y_weights = B + 2; 177cc1dc7a3Sopenharmony_ci z_weights = C + 2; 178cc1dc7a3Sopenharmony_ci } 179cc1dc7a3Sopenharmony_ci else 180cc1dc7a3Sopenharmony_ci { 181cc1dc7a3Sopenharmony_ci base_quant_mode |= ((block_mode >> 2) & 3) << 1; 182cc1dc7a3Sopenharmony_ci if (((block_mode >> 2) & 3) == 0) 183cc1dc7a3Sopenharmony_ci { 184cc1dc7a3Sopenharmony_ci return false; 185cc1dc7a3Sopenharmony_ci } 186cc1dc7a3Sopenharmony_ci 187cc1dc7a3Sopenharmony_ci int B = (block_mode >> 9) & 3; 188cc1dc7a3Sopenharmony_ci if (((block_mode >> 7) & 3) != 3) 189cc1dc7a3Sopenharmony_ci { 190cc1dc7a3Sopenharmony_ci D = 0; 191cc1dc7a3Sopenharmony_ci H = 0; 192cc1dc7a3Sopenharmony_ci } 193cc1dc7a3Sopenharmony_ci switch ((block_mode >> 7) & 3) 194cc1dc7a3Sopenharmony_ci { 195cc1dc7a3Sopenharmony_ci case 0: 196cc1dc7a3Sopenharmony_ci x_weights = 6; 197cc1dc7a3Sopenharmony_ci y_weights = B + 2; 198cc1dc7a3Sopenharmony_ci z_weights = A + 2; 199cc1dc7a3Sopenharmony_ci break; 200cc1dc7a3Sopenharmony_ci case 1: 201cc1dc7a3Sopenharmony_ci x_weights = A + 2; 202cc1dc7a3Sopenharmony_ci y_weights = 6; 203cc1dc7a3Sopenharmony_ci z_weights = B + 2; 204cc1dc7a3Sopenharmony_ci break; 205cc1dc7a3Sopenharmony_ci case 2: 206cc1dc7a3Sopenharmony_ci x_weights = A + 2; 207cc1dc7a3Sopenharmony_ci y_weights = B + 2; 208cc1dc7a3Sopenharmony_ci z_weights = 6; 209cc1dc7a3Sopenharmony_ci break; 210cc1dc7a3Sopenharmony_ci case 3: 211cc1dc7a3Sopenharmony_ci x_weights = 2; 212cc1dc7a3Sopenharmony_ci y_weights = 2; 213cc1dc7a3Sopenharmony_ci z_weights = 2; 214cc1dc7a3Sopenharmony_ci switch ((block_mode >> 5) & 3) 215cc1dc7a3Sopenharmony_ci { 216cc1dc7a3Sopenharmony_ci case 0: 217cc1dc7a3Sopenharmony_ci x_weights = 6; 218cc1dc7a3Sopenharmony_ci break; 219cc1dc7a3Sopenharmony_ci case 1: 220cc1dc7a3Sopenharmony_ci y_weights = 6; 221cc1dc7a3Sopenharmony_ci break; 222cc1dc7a3Sopenharmony_ci case 2: 223cc1dc7a3Sopenharmony_ci z_weights = 6; 224cc1dc7a3Sopenharmony_ci break; 225cc1dc7a3Sopenharmony_ci case 3: 226cc1dc7a3Sopenharmony_ci return false; 227cc1dc7a3Sopenharmony_ci } 228cc1dc7a3Sopenharmony_ci break; 229cc1dc7a3Sopenharmony_ci } 230cc1dc7a3Sopenharmony_ci } 231cc1dc7a3Sopenharmony_ci 232cc1dc7a3Sopenharmony_ci unsigned int weight_count = x_weights * y_weights * z_weights * (D + 1); 233cc1dc7a3Sopenharmony_ci quant_mode = (base_quant_mode - 2) + 6 * H; 234cc1dc7a3Sopenharmony_ci is_dual_plane = D != 0; 235cc1dc7a3Sopenharmony_ci 236cc1dc7a3Sopenharmony_ci weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode)); 237cc1dc7a3Sopenharmony_ci return (weight_count <= BLOCK_MAX_WEIGHTS && 238cc1dc7a3Sopenharmony_ci weight_bits >= BLOCK_MIN_WEIGHT_BITS && 239cc1dc7a3Sopenharmony_ci weight_bits <= BLOCK_MAX_WEIGHT_BITS); 240cc1dc7a3Sopenharmony_ci} 241cc1dc7a3Sopenharmony_ci 242cc1dc7a3Sopenharmony_ci/** 243cc1dc7a3Sopenharmony_ci * @brief Create a 2D decimation entry for a block-size and weight-decimation pair. 244cc1dc7a3Sopenharmony_ci * 245cc1dc7a3Sopenharmony_ci * @param x_texels The number of texels in the X dimension. 246cc1dc7a3Sopenharmony_ci * @param y_texels The number of texels in the Y dimension. 247cc1dc7a3Sopenharmony_ci * @param x_weights The number of weights in the X dimension. 248cc1dc7a3Sopenharmony_ci * @param y_weights The number of weights in the Y dimension. 249cc1dc7a3Sopenharmony_ci * @param[out] di The decimation info structure to populate. 250cc1dc7a3Sopenharmony_ci * @param[out] wb The decimation table init scratch working buffers. 251cc1dc7a3Sopenharmony_ci */ 252cc1dc7a3Sopenharmony_cistatic void init_decimation_info_2d( 253cc1dc7a3Sopenharmony_ci unsigned int x_texels, 254cc1dc7a3Sopenharmony_ci unsigned int y_texels, 255cc1dc7a3Sopenharmony_ci unsigned int x_weights, 256cc1dc7a3Sopenharmony_ci unsigned int y_weights, 257cc1dc7a3Sopenharmony_ci decimation_info& di, 258cc1dc7a3Sopenharmony_ci dt_init_working_buffers& wb 259cc1dc7a3Sopenharmony_ci) { 260cc1dc7a3Sopenharmony_ci unsigned int texels_per_block = x_texels * y_texels; 261cc1dc7a3Sopenharmony_ci unsigned int weights_per_block = x_weights * y_weights; 262cc1dc7a3Sopenharmony_ci 263cc1dc7a3Sopenharmony_ci uint8_t max_texel_count_of_weight = 0; 264cc1dc7a3Sopenharmony_ci 265cc1dc7a3Sopenharmony_ci promise(weights_per_block > 0); 266cc1dc7a3Sopenharmony_ci promise(texels_per_block > 0); 267cc1dc7a3Sopenharmony_ci promise(x_texels > 0); 268cc1dc7a3Sopenharmony_ci promise(y_texels > 0); 269cc1dc7a3Sopenharmony_ci 270cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < weights_per_block; i++) 271cc1dc7a3Sopenharmony_ci { 272cc1dc7a3Sopenharmony_ci wb.texel_count_of_weight[i] = 0; 273cc1dc7a3Sopenharmony_ci } 274cc1dc7a3Sopenharmony_ci 275cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < texels_per_block; i++) 276cc1dc7a3Sopenharmony_ci { 277cc1dc7a3Sopenharmony_ci wb.weight_count_of_texel[i] = 0; 278cc1dc7a3Sopenharmony_ci } 279cc1dc7a3Sopenharmony_ci 280cc1dc7a3Sopenharmony_ci for (unsigned int y = 0; y < y_texels; y++) 281cc1dc7a3Sopenharmony_ci { 282cc1dc7a3Sopenharmony_ci for (unsigned int x = 0; x < x_texels; x++) 283cc1dc7a3Sopenharmony_ci { 284cc1dc7a3Sopenharmony_ci unsigned int texel = y * x_texels + x; 285cc1dc7a3Sopenharmony_ci 286cc1dc7a3Sopenharmony_ci unsigned int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6; 287cc1dc7a3Sopenharmony_ci unsigned int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6; 288cc1dc7a3Sopenharmony_ci 289cc1dc7a3Sopenharmony_ci unsigned int x_weight_frac = x_weight & 0xF; 290cc1dc7a3Sopenharmony_ci unsigned int y_weight_frac = y_weight & 0xF; 291cc1dc7a3Sopenharmony_ci unsigned int x_weight_int = x_weight >> 4; 292cc1dc7a3Sopenharmony_ci unsigned int y_weight_int = y_weight >> 4; 293cc1dc7a3Sopenharmony_ci 294cc1dc7a3Sopenharmony_ci unsigned int qweight[4]; 295cc1dc7a3Sopenharmony_ci qweight[0] = x_weight_int + y_weight_int * x_weights; 296cc1dc7a3Sopenharmony_ci qweight[1] = qweight[0] + 1; 297cc1dc7a3Sopenharmony_ci qweight[2] = qweight[0] + x_weights; 298cc1dc7a3Sopenharmony_ci qweight[3] = qweight[2] + 1; 299cc1dc7a3Sopenharmony_ci 300cc1dc7a3Sopenharmony_ci // Truncated-precision bilinear interpolation 301cc1dc7a3Sopenharmony_ci unsigned int prod = x_weight_frac * y_weight_frac; 302cc1dc7a3Sopenharmony_ci 303cc1dc7a3Sopenharmony_ci unsigned int weight[4]; 304cc1dc7a3Sopenharmony_ci weight[3] = (prod + 8) >> 4; 305cc1dc7a3Sopenharmony_ci weight[1] = x_weight_frac - weight[3]; 306cc1dc7a3Sopenharmony_ci weight[2] = y_weight_frac - weight[3]; 307cc1dc7a3Sopenharmony_ci weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3]; 308cc1dc7a3Sopenharmony_ci 309cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < 4; i++) 310cc1dc7a3Sopenharmony_ci { 311cc1dc7a3Sopenharmony_ci if (weight[i] != 0) 312cc1dc7a3Sopenharmony_ci { 313cc1dc7a3Sopenharmony_ci wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]); 314cc1dc7a3Sopenharmony_ci wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]); 315cc1dc7a3Sopenharmony_ci wb.weight_count_of_texel[texel]++; 316cc1dc7a3Sopenharmony_ci wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel); 317cc1dc7a3Sopenharmony_ci wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]); 318cc1dc7a3Sopenharmony_ci wb.texel_count_of_weight[qweight[i]]++; 319cc1dc7a3Sopenharmony_ci max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]); 320cc1dc7a3Sopenharmony_ci } 321cc1dc7a3Sopenharmony_ci } 322cc1dc7a3Sopenharmony_ci } 323cc1dc7a3Sopenharmony_ci } 324cc1dc7a3Sopenharmony_ci 325cc1dc7a3Sopenharmony_ci uint8_t max_texel_weight_count = 0; 326cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < texels_per_block; i++) 327cc1dc7a3Sopenharmony_ci { 328cc1dc7a3Sopenharmony_ci di.texel_weight_count[i] = wb.weight_count_of_texel[i]; 329cc1dc7a3Sopenharmony_ci max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]); 330cc1dc7a3Sopenharmony_ci 331cc1dc7a3Sopenharmony_ci for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++) 332cc1dc7a3Sopenharmony_ci { 333cc1dc7a3Sopenharmony_ci di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j]; 334cc1dc7a3Sopenharmony_ci di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM); 335cc1dc7a3Sopenharmony_ci di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j]; 336cc1dc7a3Sopenharmony_ci } 337cc1dc7a3Sopenharmony_ci 338cc1dc7a3Sopenharmony_ci // Init all 4 entries so we can rely on zeros for vectorization 339cc1dc7a3Sopenharmony_ci for (unsigned int j = wb.weight_count_of_texel[i]; j < 4; j++) 340cc1dc7a3Sopenharmony_ci { 341cc1dc7a3Sopenharmony_ci di.texel_weight_contribs_int_tr[j][i] = 0; 342cc1dc7a3Sopenharmony_ci di.texel_weight_contribs_float_tr[j][i] = 0.0f; 343cc1dc7a3Sopenharmony_ci di.texel_weights_tr[j][i] = 0; 344cc1dc7a3Sopenharmony_ci } 345cc1dc7a3Sopenharmony_ci } 346cc1dc7a3Sopenharmony_ci 347cc1dc7a3Sopenharmony_ci di.max_texel_weight_count = max_texel_weight_count; 348cc1dc7a3Sopenharmony_ci 349cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < weights_per_block; i++) 350cc1dc7a3Sopenharmony_ci { 351cc1dc7a3Sopenharmony_ci unsigned int texel_count_wt = wb.texel_count_of_weight[i]; 352cc1dc7a3Sopenharmony_ci di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt); 353cc1dc7a3Sopenharmony_ci 354cc1dc7a3Sopenharmony_ci for (unsigned int j = 0; j < texel_count_wt; j++) 355cc1dc7a3Sopenharmony_ci { 356cc1dc7a3Sopenharmony_ci uint8_t texel = wb.texels_of_weight[i][j]; 357cc1dc7a3Sopenharmony_ci 358cc1dc7a3Sopenharmony_ci // Create transposed versions of these for better vectorization 359cc1dc7a3Sopenharmony_ci di.weight_texels_tr[j][i] = texel; 360cc1dc7a3Sopenharmony_ci di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]); 361cc1dc7a3Sopenharmony_ci 362cc1dc7a3Sopenharmony_ci // Store the per-texel contribution of this weight for each texel it contributes to 363cc1dc7a3Sopenharmony_ci di.texel_contrib_for_weight[j][i] = 0.0f; 364cc1dc7a3Sopenharmony_ci for (unsigned int k = 0; k < 4; k++) 365cc1dc7a3Sopenharmony_ci { 366cc1dc7a3Sopenharmony_ci uint8_t dttw = di.texel_weights_tr[k][texel]; 367cc1dc7a3Sopenharmony_ci float dttwf = di.texel_weight_contribs_float_tr[k][texel]; 368cc1dc7a3Sopenharmony_ci if (dttw == i && dttwf != 0.0f) 369cc1dc7a3Sopenharmony_ci { 370cc1dc7a3Sopenharmony_ci di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel]; 371cc1dc7a3Sopenharmony_ci break; 372cc1dc7a3Sopenharmony_ci } 373cc1dc7a3Sopenharmony_ci } 374cc1dc7a3Sopenharmony_ci } 375cc1dc7a3Sopenharmony_ci 376cc1dc7a3Sopenharmony_ci // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails 377cc1dc7a3Sopenharmony_ci // Match last texel in active lane in SIMD group, for better gathers 378cc1dc7a3Sopenharmony_ci uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i]; 379cc1dc7a3Sopenharmony_ci for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++) 380cc1dc7a3Sopenharmony_ci { 381cc1dc7a3Sopenharmony_ci di.weight_texels_tr[j][i] = last_texel; 382cc1dc7a3Sopenharmony_ci di.weights_texel_contribs_tr[j][i] = 0.0f; 383cc1dc7a3Sopenharmony_ci } 384cc1dc7a3Sopenharmony_ci } 385cc1dc7a3Sopenharmony_ci 386cc1dc7a3Sopenharmony_ci // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails 387cc1dc7a3Sopenharmony_ci unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); 388cc1dc7a3Sopenharmony_ci for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++) 389cc1dc7a3Sopenharmony_ci { 390cc1dc7a3Sopenharmony_ci di.texel_weight_count[i] = 0; 391cc1dc7a3Sopenharmony_ci 392cc1dc7a3Sopenharmony_ci for (unsigned int j = 0; j < 4; j++) 393cc1dc7a3Sopenharmony_ci { 394cc1dc7a3Sopenharmony_ci di.texel_weight_contribs_float_tr[j][i] = 0; 395cc1dc7a3Sopenharmony_ci di.texel_weights_tr[j][i] = 0; 396cc1dc7a3Sopenharmony_ci di.texel_weight_contribs_int_tr[j][i] = 0; 397cc1dc7a3Sopenharmony_ci } 398cc1dc7a3Sopenharmony_ci } 399cc1dc7a3Sopenharmony_ci 400cc1dc7a3Sopenharmony_ci // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails 401cc1dc7a3Sopenharmony_ci // Match last texel in active lane in SIMD group, for better gathers 402cc1dc7a3Sopenharmony_ci unsigned int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1]; 403cc1dc7a3Sopenharmony_ci uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1]; 404cc1dc7a3Sopenharmony_ci 405cc1dc7a3Sopenharmony_ci unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); 406cc1dc7a3Sopenharmony_ci for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++) 407cc1dc7a3Sopenharmony_ci { 408cc1dc7a3Sopenharmony_ci di.weight_texel_count[i] = 0; 409cc1dc7a3Sopenharmony_ci 410cc1dc7a3Sopenharmony_ci for (unsigned int j = 0; j < max_texel_count_of_weight; j++) 411cc1dc7a3Sopenharmony_ci { 412cc1dc7a3Sopenharmony_ci di.weight_texels_tr[j][i] = last_texel; 413cc1dc7a3Sopenharmony_ci di.weights_texel_contribs_tr[j][i] = 0.0f; 414cc1dc7a3Sopenharmony_ci } 415cc1dc7a3Sopenharmony_ci } 416cc1dc7a3Sopenharmony_ci 417cc1dc7a3Sopenharmony_ci di.texel_count = static_cast<uint8_t>(texels_per_block); 418cc1dc7a3Sopenharmony_ci di.weight_count = static_cast<uint8_t>(weights_per_block); 419cc1dc7a3Sopenharmony_ci di.weight_x = static_cast<uint8_t>(x_weights); 420cc1dc7a3Sopenharmony_ci di.weight_y = static_cast<uint8_t>(y_weights); 421cc1dc7a3Sopenharmony_ci di.weight_z = 1; 422cc1dc7a3Sopenharmony_ci} 423cc1dc7a3Sopenharmony_ci 424cc1dc7a3Sopenharmony_ci/** 425cc1dc7a3Sopenharmony_ci * @brief Create a 3D decimation entry for a block-size and weight-decimation pair. 426cc1dc7a3Sopenharmony_ci * 427cc1dc7a3Sopenharmony_ci * @param x_texels The number of texels in the X dimension. 428cc1dc7a3Sopenharmony_ci * @param y_texels The number of texels in the Y dimension. 429cc1dc7a3Sopenharmony_ci * @param z_texels The number of texels in the Z dimension. 430cc1dc7a3Sopenharmony_ci * @param x_weights The number of weights in the X dimension. 431cc1dc7a3Sopenharmony_ci * @param y_weights The number of weights in the Y dimension. 432cc1dc7a3Sopenharmony_ci * @param z_weights The number of weights in the Z dimension. 433cc1dc7a3Sopenharmony_ci * @param[out] di The decimation info structure to populate. 434cc1dc7a3Sopenharmony_ci @param[out] wb The decimation table init scratch working buffers. 435cc1dc7a3Sopenharmony_ci */ 436cc1dc7a3Sopenharmony_cistatic void init_decimation_info_3d( 437cc1dc7a3Sopenharmony_ci unsigned int x_texels, 438cc1dc7a3Sopenharmony_ci unsigned int y_texels, 439cc1dc7a3Sopenharmony_ci unsigned int z_texels, 440cc1dc7a3Sopenharmony_ci unsigned int x_weights, 441cc1dc7a3Sopenharmony_ci unsigned int y_weights, 442cc1dc7a3Sopenharmony_ci unsigned int z_weights, 443cc1dc7a3Sopenharmony_ci decimation_info& di, 444cc1dc7a3Sopenharmony_ci dt_init_working_buffers& wb 445cc1dc7a3Sopenharmony_ci) { 446cc1dc7a3Sopenharmony_ci unsigned int texels_per_block = x_texels * y_texels * z_texels; 447cc1dc7a3Sopenharmony_ci unsigned int weights_per_block = x_weights * y_weights * z_weights; 448cc1dc7a3Sopenharmony_ci 449cc1dc7a3Sopenharmony_ci uint8_t max_texel_count_of_weight = 0; 450cc1dc7a3Sopenharmony_ci 451cc1dc7a3Sopenharmony_ci promise(weights_per_block > 0); 452cc1dc7a3Sopenharmony_ci promise(texels_per_block > 0); 453cc1dc7a3Sopenharmony_ci 454cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < weights_per_block; i++) 455cc1dc7a3Sopenharmony_ci { 456cc1dc7a3Sopenharmony_ci wb.texel_count_of_weight[i] = 0; 457cc1dc7a3Sopenharmony_ci } 458cc1dc7a3Sopenharmony_ci 459cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < texels_per_block; i++) 460cc1dc7a3Sopenharmony_ci { 461cc1dc7a3Sopenharmony_ci wb.weight_count_of_texel[i] = 0; 462cc1dc7a3Sopenharmony_ci } 463cc1dc7a3Sopenharmony_ci 464cc1dc7a3Sopenharmony_ci for (unsigned int z = 0; z < z_texels; z++) 465cc1dc7a3Sopenharmony_ci { 466cc1dc7a3Sopenharmony_ci for (unsigned int y = 0; y < y_texels; y++) 467cc1dc7a3Sopenharmony_ci { 468cc1dc7a3Sopenharmony_ci for (unsigned int x = 0; x < x_texels; x++) 469cc1dc7a3Sopenharmony_ci { 470cc1dc7a3Sopenharmony_ci int texel = (z * y_texels + y) * x_texels + x; 471cc1dc7a3Sopenharmony_ci 472cc1dc7a3Sopenharmony_ci int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6; 473cc1dc7a3Sopenharmony_ci int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6; 474cc1dc7a3Sopenharmony_ci int z_weight = (((1024 + z_texels / 2) / (z_texels - 1)) * z * (z_weights - 1) + 32) >> 6; 475cc1dc7a3Sopenharmony_ci 476cc1dc7a3Sopenharmony_ci int x_weight_frac = x_weight & 0xF; 477cc1dc7a3Sopenharmony_ci int y_weight_frac = y_weight & 0xF; 478cc1dc7a3Sopenharmony_ci int z_weight_frac = z_weight & 0xF; 479cc1dc7a3Sopenharmony_ci int x_weight_int = x_weight >> 4; 480cc1dc7a3Sopenharmony_ci int y_weight_int = y_weight >> 4; 481cc1dc7a3Sopenharmony_ci int z_weight_int = z_weight >> 4; 482cc1dc7a3Sopenharmony_ci int qweight[4]; 483cc1dc7a3Sopenharmony_ci int weight[4]; 484cc1dc7a3Sopenharmony_ci qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int; 485cc1dc7a3Sopenharmony_ci qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1); 486cc1dc7a3Sopenharmony_ci 487cc1dc7a3Sopenharmony_ci // simplex interpolation 488cc1dc7a3Sopenharmony_ci int fs = x_weight_frac; 489cc1dc7a3Sopenharmony_ci int ft = y_weight_frac; 490cc1dc7a3Sopenharmony_ci int fp = z_weight_frac; 491cc1dc7a3Sopenharmony_ci 492cc1dc7a3Sopenharmony_ci int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp)); 493cc1dc7a3Sopenharmony_ci int N = x_weights; 494cc1dc7a3Sopenharmony_ci int NM = x_weights * y_weights; 495cc1dc7a3Sopenharmony_ci 496cc1dc7a3Sopenharmony_ci int s1, s2, w0, w1, w2, w3; 497cc1dc7a3Sopenharmony_ci switch (cas) 498cc1dc7a3Sopenharmony_ci { 499cc1dc7a3Sopenharmony_ci case 7: 500cc1dc7a3Sopenharmony_ci s1 = 1; 501cc1dc7a3Sopenharmony_ci s2 = N; 502cc1dc7a3Sopenharmony_ci w0 = 16 - fs; 503cc1dc7a3Sopenharmony_ci w1 = fs - ft; 504cc1dc7a3Sopenharmony_ci w2 = ft - fp; 505cc1dc7a3Sopenharmony_ci w3 = fp; 506cc1dc7a3Sopenharmony_ci break; 507cc1dc7a3Sopenharmony_ci case 3: 508cc1dc7a3Sopenharmony_ci s1 = N; 509cc1dc7a3Sopenharmony_ci s2 = 1; 510cc1dc7a3Sopenharmony_ci w0 = 16 - ft; 511cc1dc7a3Sopenharmony_ci w1 = ft - fs; 512cc1dc7a3Sopenharmony_ci w2 = fs - fp; 513cc1dc7a3Sopenharmony_ci w3 = fp; 514cc1dc7a3Sopenharmony_ci break; 515cc1dc7a3Sopenharmony_ci case 5: 516cc1dc7a3Sopenharmony_ci s1 = 1; 517cc1dc7a3Sopenharmony_ci s2 = NM; 518cc1dc7a3Sopenharmony_ci w0 = 16 - fs; 519cc1dc7a3Sopenharmony_ci w1 = fs - fp; 520cc1dc7a3Sopenharmony_ci w2 = fp - ft; 521cc1dc7a3Sopenharmony_ci w3 = ft; 522cc1dc7a3Sopenharmony_ci break; 523cc1dc7a3Sopenharmony_ci case 4: 524cc1dc7a3Sopenharmony_ci s1 = NM; 525cc1dc7a3Sopenharmony_ci s2 = 1; 526cc1dc7a3Sopenharmony_ci w0 = 16 - fp; 527cc1dc7a3Sopenharmony_ci w1 = fp - fs; 528cc1dc7a3Sopenharmony_ci w2 = fs - ft; 529cc1dc7a3Sopenharmony_ci w3 = ft; 530cc1dc7a3Sopenharmony_ci break; 531cc1dc7a3Sopenharmony_ci case 2: 532cc1dc7a3Sopenharmony_ci s1 = N; 533cc1dc7a3Sopenharmony_ci s2 = NM; 534cc1dc7a3Sopenharmony_ci w0 = 16 - ft; 535cc1dc7a3Sopenharmony_ci w1 = ft - fp; 536cc1dc7a3Sopenharmony_ci w2 = fp - fs; 537cc1dc7a3Sopenharmony_ci w3 = fs; 538cc1dc7a3Sopenharmony_ci break; 539cc1dc7a3Sopenharmony_ci case 0: 540cc1dc7a3Sopenharmony_ci s1 = NM; 541cc1dc7a3Sopenharmony_ci s2 = N; 542cc1dc7a3Sopenharmony_ci w0 = 16 - fp; 543cc1dc7a3Sopenharmony_ci w1 = fp - ft; 544cc1dc7a3Sopenharmony_ci w2 = ft - fs; 545cc1dc7a3Sopenharmony_ci w3 = fs; 546cc1dc7a3Sopenharmony_ci break; 547cc1dc7a3Sopenharmony_ci default: 548cc1dc7a3Sopenharmony_ci s1 = NM; 549cc1dc7a3Sopenharmony_ci s2 = N; 550cc1dc7a3Sopenharmony_ci w0 = 16 - fp; 551cc1dc7a3Sopenharmony_ci w1 = fp - ft; 552cc1dc7a3Sopenharmony_ci w2 = ft - fs; 553cc1dc7a3Sopenharmony_ci w3 = fs; 554cc1dc7a3Sopenharmony_ci break; 555cc1dc7a3Sopenharmony_ci } 556cc1dc7a3Sopenharmony_ci 557cc1dc7a3Sopenharmony_ci qweight[1] = qweight[0] + s1; 558cc1dc7a3Sopenharmony_ci qweight[2] = qweight[1] + s2; 559cc1dc7a3Sopenharmony_ci weight[0] = w0; 560cc1dc7a3Sopenharmony_ci weight[1] = w1; 561cc1dc7a3Sopenharmony_ci weight[2] = w2; 562cc1dc7a3Sopenharmony_ci weight[3] = w3; 563cc1dc7a3Sopenharmony_ci 564cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < 4; i++) 565cc1dc7a3Sopenharmony_ci { 566cc1dc7a3Sopenharmony_ci if (weight[i] != 0) 567cc1dc7a3Sopenharmony_ci { 568cc1dc7a3Sopenharmony_ci wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]); 569cc1dc7a3Sopenharmony_ci wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]); 570cc1dc7a3Sopenharmony_ci wb.weight_count_of_texel[texel]++; 571cc1dc7a3Sopenharmony_ci wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel); 572cc1dc7a3Sopenharmony_ci wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]); 573cc1dc7a3Sopenharmony_ci wb.texel_count_of_weight[qweight[i]]++; 574cc1dc7a3Sopenharmony_ci max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]); 575cc1dc7a3Sopenharmony_ci } 576cc1dc7a3Sopenharmony_ci } 577cc1dc7a3Sopenharmony_ci } 578cc1dc7a3Sopenharmony_ci } 579cc1dc7a3Sopenharmony_ci } 580cc1dc7a3Sopenharmony_ci 581cc1dc7a3Sopenharmony_ci uint8_t max_texel_weight_count = 0; 582cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < texels_per_block; i++) 583cc1dc7a3Sopenharmony_ci { 584cc1dc7a3Sopenharmony_ci di.texel_weight_count[i] = wb.weight_count_of_texel[i]; 585cc1dc7a3Sopenharmony_ci max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]); 586cc1dc7a3Sopenharmony_ci 587cc1dc7a3Sopenharmony_ci // Init all 4 entries so we can rely on zeros for vectorization 588cc1dc7a3Sopenharmony_ci for (unsigned int j = 0; j < 4; j++) 589cc1dc7a3Sopenharmony_ci { 590cc1dc7a3Sopenharmony_ci di.texel_weight_contribs_int_tr[j][i] = 0; 591cc1dc7a3Sopenharmony_ci di.texel_weight_contribs_float_tr[j][i] = 0.0f; 592cc1dc7a3Sopenharmony_ci di.texel_weights_tr[j][i] = 0; 593cc1dc7a3Sopenharmony_ci } 594cc1dc7a3Sopenharmony_ci 595cc1dc7a3Sopenharmony_ci for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++) 596cc1dc7a3Sopenharmony_ci { 597cc1dc7a3Sopenharmony_ci di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j]; 598cc1dc7a3Sopenharmony_ci di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM); 599cc1dc7a3Sopenharmony_ci di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j]; 600cc1dc7a3Sopenharmony_ci } 601cc1dc7a3Sopenharmony_ci } 602cc1dc7a3Sopenharmony_ci 603cc1dc7a3Sopenharmony_ci di.max_texel_weight_count = max_texel_weight_count; 604cc1dc7a3Sopenharmony_ci 605cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < weights_per_block; i++) 606cc1dc7a3Sopenharmony_ci { 607cc1dc7a3Sopenharmony_ci unsigned int texel_count_wt = wb.texel_count_of_weight[i]; 608cc1dc7a3Sopenharmony_ci di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt); 609cc1dc7a3Sopenharmony_ci 610cc1dc7a3Sopenharmony_ci for (unsigned int j = 0; j < texel_count_wt; j++) 611cc1dc7a3Sopenharmony_ci { 612cc1dc7a3Sopenharmony_ci unsigned int texel = wb.texels_of_weight[i][j]; 613cc1dc7a3Sopenharmony_ci 614cc1dc7a3Sopenharmony_ci // Create transposed versions of these for better vectorization 615cc1dc7a3Sopenharmony_ci di.weight_texels_tr[j][i] = static_cast<uint8_t>(texel); 616cc1dc7a3Sopenharmony_ci di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]); 617cc1dc7a3Sopenharmony_ci 618cc1dc7a3Sopenharmony_ci // Store the per-texel contribution of this weight for each texel it contributes to 619cc1dc7a3Sopenharmony_ci di.texel_contrib_for_weight[j][i] = 0.0f; 620cc1dc7a3Sopenharmony_ci for (unsigned int k = 0; k < 4; k++) 621cc1dc7a3Sopenharmony_ci { 622cc1dc7a3Sopenharmony_ci uint8_t dttw = di.texel_weights_tr[k][texel]; 623cc1dc7a3Sopenharmony_ci float dttwf = di.texel_weight_contribs_float_tr[k][texel]; 624cc1dc7a3Sopenharmony_ci if (dttw == i && dttwf != 0.0f) 625cc1dc7a3Sopenharmony_ci { 626cc1dc7a3Sopenharmony_ci di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel]; 627cc1dc7a3Sopenharmony_ci break; 628cc1dc7a3Sopenharmony_ci } 629cc1dc7a3Sopenharmony_ci } 630cc1dc7a3Sopenharmony_ci } 631cc1dc7a3Sopenharmony_ci 632cc1dc7a3Sopenharmony_ci // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails 633cc1dc7a3Sopenharmony_ci // Match last texel in active lane in SIMD group, for better gathers 634cc1dc7a3Sopenharmony_ci uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i]; 635cc1dc7a3Sopenharmony_ci for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++) 636cc1dc7a3Sopenharmony_ci { 637cc1dc7a3Sopenharmony_ci di.weight_texels_tr[j][i] = last_texel; 638cc1dc7a3Sopenharmony_ci di.weights_texel_contribs_tr[j][i] = 0.0f; 639cc1dc7a3Sopenharmony_ci } 640cc1dc7a3Sopenharmony_ci } 641cc1dc7a3Sopenharmony_ci 642cc1dc7a3Sopenharmony_ci // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails 643cc1dc7a3Sopenharmony_ci unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); 644cc1dc7a3Sopenharmony_ci for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++) 645cc1dc7a3Sopenharmony_ci { 646cc1dc7a3Sopenharmony_ci di.texel_weight_count[i] = 0; 647cc1dc7a3Sopenharmony_ci 648cc1dc7a3Sopenharmony_ci for (unsigned int j = 0; j < 4; j++) 649cc1dc7a3Sopenharmony_ci { 650cc1dc7a3Sopenharmony_ci di.texel_weight_contribs_float_tr[j][i] = 0; 651cc1dc7a3Sopenharmony_ci di.texel_weights_tr[j][i] = 0; 652cc1dc7a3Sopenharmony_ci di.texel_weight_contribs_int_tr[j][i] = 0; 653cc1dc7a3Sopenharmony_ci } 654cc1dc7a3Sopenharmony_ci } 655cc1dc7a3Sopenharmony_ci 656cc1dc7a3Sopenharmony_ci // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails 657cc1dc7a3Sopenharmony_ci // Match last texel in active lane in SIMD group, for better gathers 658cc1dc7a3Sopenharmony_ci int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1]; 659cc1dc7a3Sopenharmony_ci uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1]; 660cc1dc7a3Sopenharmony_ci 661cc1dc7a3Sopenharmony_ci unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); 662cc1dc7a3Sopenharmony_ci for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++) 663cc1dc7a3Sopenharmony_ci { 664cc1dc7a3Sopenharmony_ci di.weight_texel_count[i] = 0; 665cc1dc7a3Sopenharmony_ci 666cc1dc7a3Sopenharmony_ci for (int j = 0; j < max_texel_count_of_weight; j++) 667cc1dc7a3Sopenharmony_ci { 668cc1dc7a3Sopenharmony_ci di.weight_texels_tr[j][i] = last_texel; 669cc1dc7a3Sopenharmony_ci di.weights_texel_contribs_tr[j][i] = 0.0f; 670cc1dc7a3Sopenharmony_ci } 671cc1dc7a3Sopenharmony_ci } 672cc1dc7a3Sopenharmony_ci 673cc1dc7a3Sopenharmony_ci di.texel_count = static_cast<uint8_t>(texels_per_block); 674cc1dc7a3Sopenharmony_ci di.weight_count = static_cast<uint8_t>(weights_per_block); 675cc1dc7a3Sopenharmony_ci di.weight_x = static_cast<uint8_t>(x_weights); 676cc1dc7a3Sopenharmony_ci di.weight_y = static_cast<uint8_t>(y_weights); 677cc1dc7a3Sopenharmony_ci di.weight_z = static_cast<uint8_t>(z_weights); 678cc1dc7a3Sopenharmony_ci} 679cc1dc7a3Sopenharmony_ci 680cc1dc7a3Sopenharmony_ci/** 681cc1dc7a3Sopenharmony_ci * @brief Assign the texels to use for kmeans clustering. 682cc1dc7a3Sopenharmony_ci * 683cc1dc7a3Sopenharmony_ci * The max limit is @c BLOCK_MAX_KMEANS_TEXELS; above this a random selection is used. 684cc1dc7a3Sopenharmony_ci * The @c bsd.texel_count is an input and must be populated beforehand. 685cc1dc7a3Sopenharmony_ci * 686cc1dc7a3Sopenharmony_ci * @param[in,out] bsd The block size descriptor to populate. 687cc1dc7a3Sopenharmony_ci */ 688cc1dc7a3Sopenharmony_cistatic void assign_kmeans_texels( 689cc1dc7a3Sopenharmony_ci block_size_descriptor& bsd 690cc1dc7a3Sopenharmony_ci) { 691cc1dc7a3Sopenharmony_ci // Use all texels for kmeans on a small block 692cc1dc7a3Sopenharmony_ci if (bsd.texel_count <= BLOCK_MAX_KMEANS_TEXELS) 693cc1dc7a3Sopenharmony_ci { 694cc1dc7a3Sopenharmony_ci for (uint8_t i = 0; i < bsd.texel_count; i++) 695cc1dc7a3Sopenharmony_ci { 696cc1dc7a3Sopenharmony_ci bsd.kmeans_texels[i] = i; 697cc1dc7a3Sopenharmony_ci } 698cc1dc7a3Sopenharmony_ci 699cc1dc7a3Sopenharmony_ci return; 700cc1dc7a3Sopenharmony_ci } 701cc1dc7a3Sopenharmony_ci 702cc1dc7a3Sopenharmony_ci // Select a random subset of BLOCK_MAX_KMEANS_TEXELS for kmeans on a large block 703cc1dc7a3Sopenharmony_ci uint64_t rng_state[2]; 704cc1dc7a3Sopenharmony_ci astc::rand_init(rng_state); 705cc1dc7a3Sopenharmony_ci 706cc1dc7a3Sopenharmony_ci // Initialize array used for tracking used indices 707cc1dc7a3Sopenharmony_ci bool seen[BLOCK_MAX_TEXELS]; 708cc1dc7a3Sopenharmony_ci for (uint8_t i = 0; i < bsd.texel_count; i++) 709cc1dc7a3Sopenharmony_ci { 710cc1dc7a3Sopenharmony_ci seen[i] = false; 711cc1dc7a3Sopenharmony_ci } 712cc1dc7a3Sopenharmony_ci 713cc1dc7a3Sopenharmony_ci // Assign 64 random indices, retrying if we see repeats 714cc1dc7a3Sopenharmony_ci unsigned int arr_elements_set = 0; 715cc1dc7a3Sopenharmony_ci while (arr_elements_set < BLOCK_MAX_KMEANS_TEXELS) 716cc1dc7a3Sopenharmony_ci { 717cc1dc7a3Sopenharmony_ci uint8_t texel = static_cast<uint8_t>(astc::rand(rng_state)); 718cc1dc7a3Sopenharmony_ci texel = texel % bsd.texel_count; 719cc1dc7a3Sopenharmony_ci if (!seen[texel]) 720cc1dc7a3Sopenharmony_ci { 721cc1dc7a3Sopenharmony_ci bsd.kmeans_texels[arr_elements_set++] = texel; 722cc1dc7a3Sopenharmony_ci seen[texel] = true; 723cc1dc7a3Sopenharmony_ci } 724cc1dc7a3Sopenharmony_ci } 725cc1dc7a3Sopenharmony_ci} 726cc1dc7a3Sopenharmony_ci 727cc1dc7a3Sopenharmony_ci/** 728cc1dc7a3Sopenharmony_ci * @brief Allocate a single 2D decimation table entry. 729cc1dc7a3Sopenharmony_ci * 730cc1dc7a3Sopenharmony_ci * @param x_texels The number of texels in the X dimension. 731cc1dc7a3Sopenharmony_ci * @param y_texels The number of texels in the Y dimension. 732cc1dc7a3Sopenharmony_ci * @param x_weights The number of weights in the X dimension. 733cc1dc7a3Sopenharmony_ci * @param y_weights The number of weights in the Y dimension. 734cc1dc7a3Sopenharmony_ci * @param bsd The block size descriptor we are populating. 735cc1dc7a3Sopenharmony_ci * @param wb The decimation table init scratch working buffers. 736cc1dc7a3Sopenharmony_ci * @param index The packed array index to populate. 737cc1dc7a3Sopenharmony_ci */ 738cc1dc7a3Sopenharmony_cistatic void construct_dt_entry_2d( 739cc1dc7a3Sopenharmony_ci unsigned int x_texels, 740cc1dc7a3Sopenharmony_ci unsigned int y_texels, 741cc1dc7a3Sopenharmony_ci unsigned int x_weights, 742cc1dc7a3Sopenharmony_ci unsigned int y_weights, 743cc1dc7a3Sopenharmony_ci block_size_descriptor& bsd, 744cc1dc7a3Sopenharmony_ci dt_init_working_buffers& wb, 745cc1dc7a3Sopenharmony_ci unsigned int index 746cc1dc7a3Sopenharmony_ci) { 747cc1dc7a3Sopenharmony_ci unsigned int weight_count = x_weights * y_weights; 748cc1dc7a3Sopenharmony_ci assert(weight_count <= BLOCK_MAX_WEIGHTS); 749cc1dc7a3Sopenharmony_ci 750cc1dc7a3Sopenharmony_ci bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS; 751cc1dc7a3Sopenharmony_ci 752cc1dc7a3Sopenharmony_ci decimation_info& di = bsd.decimation_tables[index]; 753cc1dc7a3Sopenharmony_ci init_decimation_info_2d(x_texels, y_texels, x_weights, y_weights, di, wb); 754cc1dc7a3Sopenharmony_ci 755cc1dc7a3Sopenharmony_ci int maxprec_1plane = -1; 756cc1dc7a3Sopenharmony_ci int maxprec_2planes = -1; 757cc1dc7a3Sopenharmony_ci for (int i = 0; i < 12; i++) 758cc1dc7a3Sopenharmony_ci { 759cc1dc7a3Sopenharmony_ci unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i)); 760cc1dc7a3Sopenharmony_ci if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS) 761cc1dc7a3Sopenharmony_ci { 762cc1dc7a3Sopenharmony_ci maxprec_1plane = i; 763cc1dc7a3Sopenharmony_ci } 764cc1dc7a3Sopenharmony_ci 765cc1dc7a3Sopenharmony_ci if (try_2planes) 766cc1dc7a3Sopenharmony_ci { 767cc1dc7a3Sopenharmony_ci unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i)); 768cc1dc7a3Sopenharmony_ci if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS) 769cc1dc7a3Sopenharmony_ci { 770cc1dc7a3Sopenharmony_ci maxprec_2planes = i; 771cc1dc7a3Sopenharmony_ci } 772cc1dc7a3Sopenharmony_ci } 773cc1dc7a3Sopenharmony_ci } 774cc1dc7a3Sopenharmony_ci 775cc1dc7a3Sopenharmony_ci // At least one of the two should be valid ... 776cc1dc7a3Sopenharmony_ci assert(maxprec_1plane >= 0 || maxprec_2planes >= 0); 777cc1dc7a3Sopenharmony_ci bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane); 778cc1dc7a3Sopenharmony_ci bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes); 779cc1dc7a3Sopenharmony_ci bsd.decimation_modes[index].refprec_1plane = 0; 780cc1dc7a3Sopenharmony_ci bsd.decimation_modes[index].refprec_2planes = 0; 781cc1dc7a3Sopenharmony_ci} 782cc1dc7a3Sopenharmony_ci 783cc1dc7a3Sopenharmony_ci/** 784cc1dc7a3Sopenharmony_ci * @brief Allocate block modes and decimation tables for a single 2D block size. 785cc1dc7a3Sopenharmony_ci * 786cc1dc7a3Sopenharmony_ci * @param x_texels The number of texels in the X dimension. 787cc1dc7a3Sopenharmony_ci * @param y_texels The number of texels in the Y dimension. 788cc1dc7a3Sopenharmony_ci * @param can_omit_modes Can we discard modes that astcenc won't use, even if legal? 789cc1dc7a3Sopenharmony_ci * @param mode_cutoff Percentile cutoff in range [0,1]. Low values more likely to be used. 790cc1dc7a3Sopenharmony_ci * @param[out] bsd The block size descriptor to populate. 791cc1dc7a3Sopenharmony_ci */ 792cc1dc7a3Sopenharmony_ci#ifdef ASTC_CUSTOMIZED_ENABLE 793cc1dc7a3Sopenharmony_cistatic bool construct_block_size_descriptor_2d( 794cc1dc7a3Sopenharmony_ci#else 795cc1dc7a3Sopenharmony_cistatic void construct_block_size_descriptor_2d( 796cc1dc7a3Sopenharmony_ci#endif 797cc1dc7a3Sopenharmony_ci QualityProfile privateProfile, 798cc1dc7a3Sopenharmony_ci unsigned int x_texels, 799cc1dc7a3Sopenharmony_ci unsigned int y_texels, 800cc1dc7a3Sopenharmony_ci bool can_omit_modes, 801cc1dc7a3Sopenharmony_ci float mode_cutoff, 802cc1dc7a3Sopenharmony_ci block_size_descriptor& bsd 803cc1dc7a3Sopenharmony_ci) { 804cc1dc7a3Sopenharmony_ci // Store a remap table for storing packed decimation modes. 805cc1dc7a3Sopenharmony_ci // Indexing uses [Y * 16 + X] and max size for each axis is 12. 806cc1dc7a3Sopenharmony_ci static const unsigned int MAX_DMI = 12 * 16 + 12; 807cc1dc7a3Sopenharmony_ci int decimation_mode_index[MAX_DMI]; 808cc1dc7a3Sopenharmony_ci 809cc1dc7a3Sopenharmony_ci dt_init_working_buffers* wb = new dt_init_working_buffers; 810cc1dc7a3Sopenharmony_ci 811cc1dc7a3Sopenharmony_ci bsd.xdim = static_cast<uint8_t>(x_texels); 812cc1dc7a3Sopenharmony_ci bsd.ydim = static_cast<uint8_t>(y_texels); 813cc1dc7a3Sopenharmony_ci bsd.zdim = 1; 814cc1dc7a3Sopenharmony_ci bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels); 815cc1dc7a3Sopenharmony_ci 816cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < MAX_DMI; i++) 817cc1dc7a3Sopenharmony_ci { 818cc1dc7a3Sopenharmony_ci decimation_mode_index[i] = -1; 819cc1dc7a3Sopenharmony_ci } 820cc1dc7a3Sopenharmony_ci 821cc1dc7a3Sopenharmony_ci // Gather all the decimation grids that can be used with the current block 822cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY) 823cc1dc7a3Sopenharmony_ci const float *percentiles = get_2d_percentile_table(x_texels, y_texels); 824cc1dc7a3Sopenharmony_ci float always_cutoff = (privateProfile != HIGH_QUALITY_PROFILE) ? 1.0f : 0.0f; 825cc1dc7a3Sopenharmony_ci#else 826cc1dc7a3Sopenharmony_ci // Unused in decompress-only builds 827cc1dc7a3Sopenharmony_ci (void)can_omit_modes; 828cc1dc7a3Sopenharmony_ci (void)mode_cutoff; 829cc1dc7a3Sopenharmony_ci#endif 830cc1dc7a3Sopenharmony_ci 831cc1dc7a3Sopenharmony_ci // Construct the list of block formats referencing the decimation tables 832cc1dc7a3Sopenharmony_ci unsigned int packed_bm_idx = 0; 833cc1dc7a3Sopenharmony_ci unsigned int packed_dm_idx = 0; 834cc1dc7a3Sopenharmony_ci 835cc1dc7a3Sopenharmony_ci // Trackers 836cc1dc7a3Sopenharmony_ci unsigned int bm_counts[4] { 0 }; 837cc1dc7a3Sopenharmony_ci unsigned int dm_counts[4] { 0 }; 838cc1dc7a3Sopenharmony_ci 839cc1dc7a3Sopenharmony_ci // Clear the list to a known-bad value 840cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) 841cc1dc7a3Sopenharmony_ci { 842cc1dc7a3Sopenharmony_ci bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE; 843cc1dc7a3Sopenharmony_ci } 844cc1dc7a3Sopenharmony_ci 845cc1dc7a3Sopenharmony_ci // Iterate four times to build a usefully ordered list: 846cc1dc7a3Sopenharmony_ci // - Pass 0 - keep selected single plane "always" block modes 847cc1dc7a3Sopenharmony_ci // - Pass 1 - keep selected single plane "non-always" block modes 848cc1dc7a3Sopenharmony_ci // - Pass 2 - keep select dual plane block modes 849cc1dc7a3Sopenharmony_ci // - Pass 3 - keep everything else that's legal 850cc1dc7a3Sopenharmony_ci unsigned int limit = can_omit_modes ? 3 : 4; 851cc1dc7a3Sopenharmony_ci for (unsigned int j = 0; j < limit; j ++) 852cc1dc7a3Sopenharmony_ci { 853cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) 854cc1dc7a3Sopenharmony_ci { 855cc1dc7a3Sopenharmony_ci // Skip modes we've already included in a previous pass 856cc1dc7a3Sopenharmony_ci if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE) 857cc1dc7a3Sopenharmony_ci { 858cc1dc7a3Sopenharmony_ci continue; 859cc1dc7a3Sopenharmony_ci } 860cc1dc7a3Sopenharmony_ci if ((privateProfile == HIGH_SPEED_PROFILE) && (i != HIGH_SPEED_PROFILE_BLOCK_MODE)) 861cc1dc7a3Sopenharmony_ci { 862cc1dc7a3Sopenharmony_ci continue; 863cc1dc7a3Sopenharmony_ci } 864cc1dc7a3Sopenharmony_ci#ifdef ASTC_CUSTOMIZED_ENABLE 865cc1dc7a3Sopenharmony_ci if (privateProfile == CUSTOMIZED_PROFILE) 866cc1dc7a3Sopenharmony_ci { 867cc1dc7a3Sopenharmony_ci if (!g_astcCustomizedSoManager.LoadSutCustomizedSo() || 868cc1dc7a3Sopenharmony_ci g_astcCustomizedSoManager.isCustomizedBlockModeFunc_ == nullptr) 869cc1dc7a3Sopenharmony_ci { 870cc1dc7a3Sopenharmony_ci printf("astcenc customized so dlopen failed or isCustomizedBlockModeFunc_ is nullptr!\n"); 871cc1dc7a3Sopenharmony_ci delete wb; 872cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY) 873cc1dc7a3Sopenharmony_ci delete[] percentiles; 874cc1dc7a3Sopenharmony_ci#endif 875cc1dc7a3Sopenharmony_ci return false; 876cc1dc7a3Sopenharmony_ci } 877cc1dc7a3Sopenharmony_ci if (!g_astcCustomizedSoManager.isCustomizedBlockModeFunc_(i)) 878cc1dc7a3Sopenharmony_ci { 879cc1dc7a3Sopenharmony_ci continue; 880cc1dc7a3Sopenharmony_ci } 881cc1dc7a3Sopenharmony_ci } 882cc1dc7a3Sopenharmony_ci#endif 883cc1dc7a3Sopenharmony_ci // Decode parameters 884cc1dc7a3Sopenharmony_ci unsigned int x_weights; 885cc1dc7a3Sopenharmony_ci unsigned int y_weights; 886cc1dc7a3Sopenharmony_ci bool is_dual_plane; 887cc1dc7a3Sopenharmony_ci unsigned int quant_mode; 888cc1dc7a3Sopenharmony_ci unsigned int weight_bits; 889cc1dc7a3Sopenharmony_ci bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits); 890cc1dc7a3Sopenharmony_ci 891cc1dc7a3Sopenharmony_ci // Always skip invalid encodings for the current block size 892cc1dc7a3Sopenharmony_ci if (!valid || (x_weights > x_texels) || (y_weights > y_texels)) 893cc1dc7a3Sopenharmony_ci { 894cc1dc7a3Sopenharmony_ci continue; 895cc1dc7a3Sopenharmony_ci } 896cc1dc7a3Sopenharmony_ci 897cc1dc7a3Sopenharmony_ci // Selectively skip dual plane encodings 898cc1dc7a3Sopenharmony_ci if (((j <= 1) && is_dual_plane) || (j == 2 && !is_dual_plane)) 899cc1dc7a3Sopenharmony_ci { 900cc1dc7a3Sopenharmony_ci continue; 901cc1dc7a3Sopenharmony_ci } 902cc1dc7a3Sopenharmony_ci 903cc1dc7a3Sopenharmony_ci // Always skip encodings we can't physically encode based on 904cc1dc7a3Sopenharmony_ci // generic encoding bit availability 905cc1dc7a3Sopenharmony_ci if (is_dual_plane) 906cc1dc7a3Sopenharmony_ci { 907cc1dc7a3Sopenharmony_ci // This is the only check we need as only support 1 partition 908cc1dc7a3Sopenharmony_ci if ((109 - weight_bits) <= 0) 909cc1dc7a3Sopenharmony_ci { 910cc1dc7a3Sopenharmony_ci continue; 911cc1dc7a3Sopenharmony_ci } 912cc1dc7a3Sopenharmony_ci } 913cc1dc7a3Sopenharmony_ci else 914cc1dc7a3Sopenharmony_ci { 915cc1dc7a3Sopenharmony_ci // This is conservative - fewer bits may be available for > 1 partition 916cc1dc7a3Sopenharmony_ci if ((111 - weight_bits) <= 0) 917cc1dc7a3Sopenharmony_ci { 918cc1dc7a3Sopenharmony_ci continue; 919cc1dc7a3Sopenharmony_ci } 920cc1dc7a3Sopenharmony_ci } 921cc1dc7a3Sopenharmony_ci 922cc1dc7a3Sopenharmony_ci // Selectively skip encodings based on percentile 923cc1dc7a3Sopenharmony_ci bool percentile_hit = false; 924cc1dc7a3Sopenharmony_ci #if !defined(ASTCENC_DECOMPRESS_ONLY) 925cc1dc7a3Sopenharmony_ci if (j == 0) 926cc1dc7a3Sopenharmony_ci { 927cc1dc7a3Sopenharmony_ci percentile_hit = percentiles[i] <= always_cutoff; 928cc1dc7a3Sopenharmony_ci } 929cc1dc7a3Sopenharmony_ci else 930cc1dc7a3Sopenharmony_ci { 931cc1dc7a3Sopenharmony_ci percentile_hit = percentiles[i] <= mode_cutoff; 932cc1dc7a3Sopenharmony_ci } 933cc1dc7a3Sopenharmony_ci #endif 934cc1dc7a3Sopenharmony_ci 935cc1dc7a3Sopenharmony_ci if (j != 3 && !percentile_hit) 936cc1dc7a3Sopenharmony_ci { 937cc1dc7a3Sopenharmony_ci continue; 938cc1dc7a3Sopenharmony_ci } 939cc1dc7a3Sopenharmony_ci 940cc1dc7a3Sopenharmony_ci // Allocate and initialize the decimation table entry if we've not used it yet 941cc1dc7a3Sopenharmony_ci int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights]; 942cc1dc7a3Sopenharmony_ci if (decimation_mode < 0) 943cc1dc7a3Sopenharmony_ci { 944cc1dc7a3Sopenharmony_ci construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb, packed_dm_idx); 945cc1dc7a3Sopenharmony_ci if (privateProfile == HIGH_SPEED_PROFILE) 946cc1dc7a3Sopenharmony_ci { 947cc1dc7a3Sopenharmony_ci bsd.decimation_modes[packed_dm_idx].maxprec_1plane = 4; // Speed optimization: max prec num is limited to 4 948cc1dc7a3Sopenharmony_ci } 949cc1dc7a3Sopenharmony_ci decimation_mode_index[y_weights * 16 + x_weights] = packed_dm_idx; 950cc1dc7a3Sopenharmony_ci decimation_mode = packed_dm_idx; 951cc1dc7a3Sopenharmony_ci 952cc1dc7a3Sopenharmony_ci dm_counts[j]++; 953cc1dc7a3Sopenharmony_ci packed_dm_idx++; 954cc1dc7a3Sopenharmony_ci } 955cc1dc7a3Sopenharmony_ci 956cc1dc7a3Sopenharmony_ci auto& bm = bsd.block_modes[packed_bm_idx]; 957cc1dc7a3Sopenharmony_ci 958cc1dc7a3Sopenharmony_ci bm.decimation_mode = static_cast<uint8_t>(decimation_mode); 959cc1dc7a3Sopenharmony_ci bm.quant_mode = static_cast<uint8_t>(quant_mode); 960cc1dc7a3Sopenharmony_ci bm.is_dual_plane = static_cast<uint8_t>(is_dual_plane); 961cc1dc7a3Sopenharmony_ci bm.weight_bits = static_cast<uint8_t>(weight_bits); 962cc1dc7a3Sopenharmony_ci bm.mode_index = static_cast<uint16_t>(i); 963cc1dc7a3Sopenharmony_ci 964cc1dc7a3Sopenharmony_ci auto& dm = bsd.decimation_modes[decimation_mode]; 965cc1dc7a3Sopenharmony_ci 966cc1dc7a3Sopenharmony_ci if (is_dual_plane) 967cc1dc7a3Sopenharmony_ci { 968cc1dc7a3Sopenharmony_ci dm.set_ref_2plane(bm.get_weight_quant_mode()); 969cc1dc7a3Sopenharmony_ci } 970cc1dc7a3Sopenharmony_ci else 971cc1dc7a3Sopenharmony_ci { 972cc1dc7a3Sopenharmony_ci dm.set_ref_1plane(bm.get_weight_quant_mode()); 973cc1dc7a3Sopenharmony_ci } 974cc1dc7a3Sopenharmony_ci 975cc1dc7a3Sopenharmony_ci bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_bm_idx); 976cc1dc7a3Sopenharmony_ci 977cc1dc7a3Sopenharmony_ci packed_bm_idx++; 978cc1dc7a3Sopenharmony_ci bm_counts[j]++; 979cc1dc7a3Sopenharmony_ci } 980cc1dc7a3Sopenharmony_ci } 981cc1dc7a3Sopenharmony_ci 982cc1dc7a3Sopenharmony_ci bsd.block_mode_count_1plane_always = bm_counts[0]; 983cc1dc7a3Sopenharmony_ci bsd.block_mode_count_1plane_selected = bm_counts[0] + bm_counts[1]; 984cc1dc7a3Sopenharmony_ci bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1] + bm_counts[2]; 985cc1dc7a3Sopenharmony_ci bsd.block_mode_count_all = bm_counts[0] + bm_counts[1] + bm_counts[2] + bm_counts[3]; 986cc1dc7a3Sopenharmony_ci 987cc1dc7a3Sopenharmony_ci bsd.decimation_mode_count_always = dm_counts[0]; 988cc1dc7a3Sopenharmony_ci bsd.decimation_mode_count_selected = dm_counts[0] + dm_counts[1] + dm_counts[2]; 989cc1dc7a3Sopenharmony_ci bsd.decimation_mode_count_all = dm_counts[0] + dm_counts[1] + dm_counts[2] + dm_counts[3]; 990cc1dc7a3Sopenharmony_ci 991cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY) 992cc1dc7a3Sopenharmony_ci assert(bsd.block_mode_count_1plane_always > 0); 993cc1dc7a3Sopenharmony_ci assert(bsd.decimation_mode_count_always > 0); 994cc1dc7a3Sopenharmony_ci 995cc1dc7a3Sopenharmony_ci delete[] percentiles; 996cc1dc7a3Sopenharmony_ci#endif 997cc1dc7a3Sopenharmony_ci 998cc1dc7a3Sopenharmony_ci // Ensure the end of the array contains valid data (should never get read) 999cc1dc7a3Sopenharmony_ci for (unsigned int i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++) 1000cc1dc7a3Sopenharmony_ci { 1001cc1dc7a3Sopenharmony_ci bsd.decimation_modes[i].maxprec_1plane = -1; 1002cc1dc7a3Sopenharmony_ci bsd.decimation_modes[i].maxprec_2planes = -1; 1003cc1dc7a3Sopenharmony_ci bsd.decimation_modes[i].refprec_1plane = 0; 1004cc1dc7a3Sopenharmony_ci bsd.decimation_modes[i].refprec_2planes = 0; 1005cc1dc7a3Sopenharmony_ci } 1006cc1dc7a3Sopenharmony_ci 1007cc1dc7a3Sopenharmony_ci // Determine the texels to use for kmeans clustering. 1008cc1dc7a3Sopenharmony_ci assign_kmeans_texels(bsd); 1009cc1dc7a3Sopenharmony_ci 1010cc1dc7a3Sopenharmony_ci delete wb; 1011cc1dc7a3Sopenharmony_ci#ifdef ASTC_CUSTOMIZED_ENABLE 1012cc1dc7a3Sopenharmony_ci return true; 1013cc1dc7a3Sopenharmony_ci#endif 1014cc1dc7a3Sopenharmony_ci} 1015cc1dc7a3Sopenharmony_ci 1016cc1dc7a3Sopenharmony_ci/** 1017cc1dc7a3Sopenharmony_ci * @brief Allocate block modes and decimation tables for a single 3D block size. 1018cc1dc7a3Sopenharmony_ci * 1019cc1dc7a3Sopenharmony_ci * TODO: This function doesn't include all of the heuristics that we use for 2D block sizes such as 1020cc1dc7a3Sopenharmony_ci * the percentile mode cutoffs. If 3D becomes more widely used we should look at this. 1021cc1dc7a3Sopenharmony_ci * 1022cc1dc7a3Sopenharmony_ci * @param x_texels The number of texels in the X dimension. 1023cc1dc7a3Sopenharmony_ci * @param y_texels The number of texels in the Y dimension. 1024cc1dc7a3Sopenharmony_ci * @param z_texels The number of texels in the Z dimension. 1025cc1dc7a3Sopenharmony_ci * @param[out] bsd The block size descriptor to populate. 1026cc1dc7a3Sopenharmony_ci */ 1027cc1dc7a3Sopenharmony_cistatic void construct_block_size_descriptor_3d( 1028cc1dc7a3Sopenharmony_ci unsigned int x_texels, 1029cc1dc7a3Sopenharmony_ci unsigned int y_texels, 1030cc1dc7a3Sopenharmony_ci unsigned int z_texels, 1031cc1dc7a3Sopenharmony_ci block_size_descriptor& bsd 1032cc1dc7a3Sopenharmony_ci) { 1033cc1dc7a3Sopenharmony_ci // Store a remap table for storing packed decimation modes. 1034cc1dc7a3Sopenharmony_ci // Indexing uses [Z * 64 + Y * 8 + X] and max size for each axis is 6. 1035cc1dc7a3Sopenharmony_ci static constexpr unsigned int MAX_DMI = 6 * 64 + 6 * 8 + 6; 1036cc1dc7a3Sopenharmony_ci int decimation_mode_index[MAX_DMI]; 1037cc1dc7a3Sopenharmony_ci unsigned int decimation_mode_count = 0; 1038cc1dc7a3Sopenharmony_ci 1039cc1dc7a3Sopenharmony_ci dt_init_working_buffers* wb = new dt_init_working_buffers; 1040cc1dc7a3Sopenharmony_ci 1041cc1dc7a3Sopenharmony_ci bsd.xdim = static_cast<uint8_t>(x_texels); 1042cc1dc7a3Sopenharmony_ci bsd.ydim = static_cast<uint8_t>(y_texels); 1043cc1dc7a3Sopenharmony_ci bsd.zdim = static_cast<uint8_t>(z_texels); 1044cc1dc7a3Sopenharmony_ci bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels * z_texels); 1045cc1dc7a3Sopenharmony_ci 1046cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < MAX_DMI; i++) 1047cc1dc7a3Sopenharmony_ci { 1048cc1dc7a3Sopenharmony_ci decimation_mode_index[i] = -1; 1049cc1dc7a3Sopenharmony_ci } 1050cc1dc7a3Sopenharmony_ci 1051cc1dc7a3Sopenharmony_ci // gather all the infill-modes that can be used with the current block size 1052cc1dc7a3Sopenharmony_ci for (unsigned int x_weights = 2; x_weights <= x_texels; x_weights++) 1053cc1dc7a3Sopenharmony_ci { 1054cc1dc7a3Sopenharmony_ci for (unsigned int y_weights = 2; y_weights <= y_texels; y_weights++) 1055cc1dc7a3Sopenharmony_ci { 1056cc1dc7a3Sopenharmony_ci for (unsigned int z_weights = 2; z_weights <= z_texels; z_weights++) 1057cc1dc7a3Sopenharmony_ci { 1058cc1dc7a3Sopenharmony_ci unsigned int weight_count = x_weights * y_weights * z_weights; 1059cc1dc7a3Sopenharmony_ci if (weight_count > BLOCK_MAX_WEIGHTS) 1060cc1dc7a3Sopenharmony_ci { 1061cc1dc7a3Sopenharmony_ci continue; 1062cc1dc7a3Sopenharmony_ci } 1063cc1dc7a3Sopenharmony_ci 1064cc1dc7a3Sopenharmony_ci decimation_info& di = bsd.decimation_tables[decimation_mode_count]; 1065cc1dc7a3Sopenharmony_ci decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count; 1066cc1dc7a3Sopenharmony_ci init_decimation_info_3d(x_texels, y_texels, z_texels, x_weights, y_weights, z_weights, di, *wb); 1067cc1dc7a3Sopenharmony_ci 1068cc1dc7a3Sopenharmony_ci int maxprec_1plane = -1; 1069cc1dc7a3Sopenharmony_ci int maxprec_2planes = -1; 1070cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < 12; i++) 1071cc1dc7a3Sopenharmony_ci { 1072cc1dc7a3Sopenharmony_ci unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i)); 1073cc1dc7a3Sopenharmony_ci if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS) 1074cc1dc7a3Sopenharmony_ci { 1075cc1dc7a3Sopenharmony_ci maxprec_1plane = i; 1076cc1dc7a3Sopenharmony_ci } 1077cc1dc7a3Sopenharmony_ci 1078cc1dc7a3Sopenharmony_ci unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i)); 1079cc1dc7a3Sopenharmony_ci if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS) 1080cc1dc7a3Sopenharmony_ci { 1081cc1dc7a3Sopenharmony_ci maxprec_2planes = i; 1082cc1dc7a3Sopenharmony_ci } 1083cc1dc7a3Sopenharmony_ci } 1084cc1dc7a3Sopenharmony_ci 1085cc1dc7a3Sopenharmony_ci if ((2 * weight_count) > BLOCK_MAX_WEIGHTS) 1086cc1dc7a3Sopenharmony_ci { 1087cc1dc7a3Sopenharmony_ci maxprec_2planes = -1; 1088cc1dc7a3Sopenharmony_ci } 1089cc1dc7a3Sopenharmony_ci 1090cc1dc7a3Sopenharmony_ci bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane); 1091cc1dc7a3Sopenharmony_ci bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes); 1092cc1dc7a3Sopenharmony_ci bsd.decimation_modes[decimation_mode_count].refprec_1plane = maxprec_1plane == -1 ? 0 : 0xFFFF; 1093cc1dc7a3Sopenharmony_ci bsd.decimation_modes[decimation_mode_count].refprec_2planes = maxprec_2planes == -1 ? 0 : 0xFFFF; 1094cc1dc7a3Sopenharmony_ci decimation_mode_count++; 1095cc1dc7a3Sopenharmony_ci } 1096cc1dc7a3Sopenharmony_ci } 1097cc1dc7a3Sopenharmony_ci } 1098cc1dc7a3Sopenharmony_ci 1099cc1dc7a3Sopenharmony_ci // Ensure the end of the array contains valid data (should never get read) 1100cc1dc7a3Sopenharmony_ci for (unsigned int i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++) 1101cc1dc7a3Sopenharmony_ci { 1102cc1dc7a3Sopenharmony_ci bsd.decimation_modes[i].maxprec_1plane = -1; 1103cc1dc7a3Sopenharmony_ci bsd.decimation_modes[i].maxprec_2planes = -1; 1104cc1dc7a3Sopenharmony_ci bsd.decimation_modes[i].refprec_1plane = 0; 1105cc1dc7a3Sopenharmony_ci bsd.decimation_modes[i].refprec_2planes = 0; 1106cc1dc7a3Sopenharmony_ci } 1107cc1dc7a3Sopenharmony_ci 1108cc1dc7a3Sopenharmony_ci bsd.decimation_mode_count_always = 0; // Skipped for 3D modes 1109cc1dc7a3Sopenharmony_ci bsd.decimation_mode_count_selected = decimation_mode_count; 1110cc1dc7a3Sopenharmony_ci bsd.decimation_mode_count_all = decimation_mode_count; 1111cc1dc7a3Sopenharmony_ci 1112cc1dc7a3Sopenharmony_ci // Construct the list of block formats referencing the decimation tables 1113cc1dc7a3Sopenharmony_ci 1114cc1dc7a3Sopenharmony_ci // Clear the list to a known-bad value 1115cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) 1116cc1dc7a3Sopenharmony_ci { 1117cc1dc7a3Sopenharmony_ci bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE; 1118cc1dc7a3Sopenharmony_ci } 1119cc1dc7a3Sopenharmony_ci 1120cc1dc7a3Sopenharmony_ci unsigned int packed_idx = 0; 1121cc1dc7a3Sopenharmony_ci unsigned int bm_counts[2] { 0 }; 1122cc1dc7a3Sopenharmony_ci 1123cc1dc7a3Sopenharmony_ci // Iterate two times to build a usefully ordered list: 1124cc1dc7a3Sopenharmony_ci // - Pass 0 - keep valid single plane block modes 1125cc1dc7a3Sopenharmony_ci // - Pass 1 - keep valid dual plane block modes 1126cc1dc7a3Sopenharmony_ci for (unsigned int j = 0; j < 2; j++) 1127cc1dc7a3Sopenharmony_ci { 1128cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) 1129cc1dc7a3Sopenharmony_ci { 1130cc1dc7a3Sopenharmony_ci // Skip modes we've already included in a previous pass 1131cc1dc7a3Sopenharmony_ci if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE) 1132cc1dc7a3Sopenharmony_ci { 1133cc1dc7a3Sopenharmony_ci continue; 1134cc1dc7a3Sopenharmony_ci } 1135cc1dc7a3Sopenharmony_ci 1136cc1dc7a3Sopenharmony_ci unsigned int x_weights; 1137cc1dc7a3Sopenharmony_ci unsigned int y_weights; 1138cc1dc7a3Sopenharmony_ci unsigned int z_weights; 1139cc1dc7a3Sopenharmony_ci bool is_dual_plane; 1140cc1dc7a3Sopenharmony_ci unsigned int quant_mode; 1141cc1dc7a3Sopenharmony_ci unsigned int weight_bits; 1142cc1dc7a3Sopenharmony_ci 1143cc1dc7a3Sopenharmony_ci bool valid = decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits); 1144cc1dc7a3Sopenharmony_ci // Skip invalid encodings 1145cc1dc7a3Sopenharmony_ci if (!valid || x_weights > x_texels || y_weights > y_texels || z_weights > z_texels) 1146cc1dc7a3Sopenharmony_ci { 1147cc1dc7a3Sopenharmony_ci continue; 1148cc1dc7a3Sopenharmony_ci } 1149cc1dc7a3Sopenharmony_ci 1150cc1dc7a3Sopenharmony_ci // Skip encodings in the wrong iteration 1151cc1dc7a3Sopenharmony_ci if ((j == 0 && is_dual_plane) || (j == 1 && !is_dual_plane)) 1152cc1dc7a3Sopenharmony_ci { 1153cc1dc7a3Sopenharmony_ci continue; 1154cc1dc7a3Sopenharmony_ci } 1155cc1dc7a3Sopenharmony_ci 1156cc1dc7a3Sopenharmony_ci // Always skip encodings we can't physically encode based on bit availability 1157cc1dc7a3Sopenharmony_ci if (is_dual_plane) 1158cc1dc7a3Sopenharmony_ci { 1159cc1dc7a3Sopenharmony_ci // This is the only check we need as only support 1 partition 1160cc1dc7a3Sopenharmony_ci if ((109 - weight_bits) <= 0) 1161cc1dc7a3Sopenharmony_ci { 1162cc1dc7a3Sopenharmony_ci continue; 1163cc1dc7a3Sopenharmony_ci } 1164cc1dc7a3Sopenharmony_ci } 1165cc1dc7a3Sopenharmony_ci else 1166cc1dc7a3Sopenharmony_ci { 1167cc1dc7a3Sopenharmony_ci // This is conservative - fewer bits may be available for > 1 partition 1168cc1dc7a3Sopenharmony_ci if ((111 - weight_bits) <= 0) 1169cc1dc7a3Sopenharmony_ci { 1170cc1dc7a3Sopenharmony_ci continue; 1171cc1dc7a3Sopenharmony_ci } 1172cc1dc7a3Sopenharmony_ci } 1173cc1dc7a3Sopenharmony_ci 1174cc1dc7a3Sopenharmony_ci int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights]; 1175cc1dc7a3Sopenharmony_ci bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode); 1176cc1dc7a3Sopenharmony_ci bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode); 1177cc1dc7a3Sopenharmony_ci bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits); 1178cc1dc7a3Sopenharmony_ci bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane); 1179cc1dc7a3Sopenharmony_ci bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i); 1180cc1dc7a3Sopenharmony_ci 1181cc1dc7a3Sopenharmony_ci bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx); 1182cc1dc7a3Sopenharmony_ci bm_counts[j]++; 1183cc1dc7a3Sopenharmony_ci packed_idx++; 1184cc1dc7a3Sopenharmony_ci } 1185cc1dc7a3Sopenharmony_ci } 1186cc1dc7a3Sopenharmony_ci 1187cc1dc7a3Sopenharmony_ci bsd.block_mode_count_1plane_always = 0; // Skipped for 3D modes 1188cc1dc7a3Sopenharmony_ci bsd.block_mode_count_1plane_selected = bm_counts[0]; 1189cc1dc7a3Sopenharmony_ci bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1]; 1190cc1dc7a3Sopenharmony_ci bsd.block_mode_count_all = bm_counts[0] + bm_counts[1]; 1191cc1dc7a3Sopenharmony_ci 1192cc1dc7a3Sopenharmony_ci // Determine the texels to use for kmeans clustering. 1193cc1dc7a3Sopenharmony_ci assign_kmeans_texels(bsd); 1194cc1dc7a3Sopenharmony_ci 1195cc1dc7a3Sopenharmony_ci delete wb; 1196cc1dc7a3Sopenharmony_ci} 1197cc1dc7a3Sopenharmony_ci 1198cc1dc7a3Sopenharmony_ci/* See header for documentation. */ 1199cc1dc7a3Sopenharmony_ci#ifdef ASTC_CUSTOMIZED_ENABLE 1200cc1dc7a3Sopenharmony_cibool init_block_size_descriptor( 1201cc1dc7a3Sopenharmony_ci#else 1202cc1dc7a3Sopenharmony_civoid init_block_size_descriptor( 1203cc1dc7a3Sopenharmony_ci#endif 1204cc1dc7a3Sopenharmony_ci QualityProfile privateProfile, 1205cc1dc7a3Sopenharmony_ci unsigned int x_texels, 1206cc1dc7a3Sopenharmony_ci unsigned int y_texels, 1207cc1dc7a3Sopenharmony_ci unsigned int z_texels, 1208cc1dc7a3Sopenharmony_ci bool can_omit_modes, 1209cc1dc7a3Sopenharmony_ci unsigned int partition_count_cutoff, 1210cc1dc7a3Sopenharmony_ci float mode_cutoff, 1211cc1dc7a3Sopenharmony_ci block_size_descriptor& bsd 1212cc1dc7a3Sopenharmony_ci) { 1213cc1dc7a3Sopenharmony_ci if (z_texels > 1) 1214cc1dc7a3Sopenharmony_ci { 1215cc1dc7a3Sopenharmony_ci construct_block_size_descriptor_3d(x_texels, y_texels, z_texels, bsd); 1216cc1dc7a3Sopenharmony_ci } 1217cc1dc7a3Sopenharmony_ci else 1218cc1dc7a3Sopenharmony_ci { 1219cc1dc7a3Sopenharmony_ci#ifdef ASTC_CUSTOMIZED_ENABLE 1220cc1dc7a3Sopenharmony_ci if (!construct_block_size_descriptor_2d(privateProfile, x_texels, y_texels, can_omit_modes, mode_cutoff, bsd)) 1221cc1dc7a3Sopenharmony_ci { 1222cc1dc7a3Sopenharmony_ci return false; 1223cc1dc7a3Sopenharmony_ci } 1224cc1dc7a3Sopenharmony_ci#else 1225cc1dc7a3Sopenharmony_ci construct_block_size_descriptor_2d(privateProfile, x_texels, y_texels, can_omit_modes, mode_cutoff, bsd); 1226cc1dc7a3Sopenharmony_ci#endif 1227cc1dc7a3Sopenharmony_ci } 1228cc1dc7a3Sopenharmony_ci 1229cc1dc7a3Sopenharmony_ci init_partition_tables(bsd, can_omit_modes, partition_count_cutoff); 1230cc1dc7a3Sopenharmony_ci#ifdef ASTC_CUSTOMIZED_ENABLE 1231cc1dc7a3Sopenharmony_ci return true; 1232cc1dc7a3Sopenharmony_ci#endif 1233cc1dc7a3Sopenharmony_ci} 1234