1// SPDX-License-Identifier: Apache-2.0 2// ---------------------------------------------------------------------------- 3// Copyright 2011-2024 Arm Limited 4// 5// Licensed under the Apache License, Version 2.0 (the "License"); you may not 6// use this file except in compliance with the License. You may obtain a copy 7// of the License at: 8// 9// http://www.apache.org/licenses/LICENSE-2.0 10// 11// Unless required by applicable law or agreed to in writing, software 12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14// License for the specific language governing permissions and limitations 15// under the License. 16// ---------------------------------------------------------------------------- 17 18/** 19 * @brief Functions for the library entrypoint. 20 */ 21 22#include <array> 23#include <cstring> 24#include <new> 25 26#include "astcenc.h" 27#include "astcenc_internal_entry.h" 28#include "astcenc_diagnostic_trace.h" 29 30/** 31 * @brief Record of the quality tuning parameter values. 32 * 33 * See the @c astcenc_config structure for detailed parameter documentation. 34 * 35 * Note that the mse_overshoot entries are scaling factors relative to the base MSE to hit db_limit. 36 * A 20% overshoot is harder to hit for a higher base db_limit, so we may actually use lower ratios 37 * for the more through search presets because the underlying db_limit is so much higher. 38 */ 39struct astcenc_preset_config 40{ 41 float quality; 42 unsigned int tune_partition_count_limit; 43 unsigned int tune_2partition_index_limit; 44 unsigned int tune_3partition_index_limit; 45 unsigned int tune_4partition_index_limit; 46 unsigned int tune_block_mode_limit; 47 unsigned int tune_refinement_limit; 48 unsigned int tune_candidate_limit; 49 unsigned int tune_2partitioning_candidate_limit; 50 unsigned int tune_3partitioning_candidate_limit; 51 unsigned int tune_4partitioning_candidate_limit; 52 float tune_db_limit_a_base; 53 float tune_db_limit_b_base; 54 float tune_mse_overshoot; 55 float tune_2partition_early_out_limit_factor; 56 float tune_3partition_early_out_limit_factor; 57 float tune_2plane_early_out_limit_correlation; 58 float tune_search_mode0_enable; 59}; 60 61/** 62 * @brief The static presets for high bandwidth encodings (x < 25 texels per block). 63 */ 64static const std::array<astcenc_preset_config, 6> preset_configs_high {{ 65 { 66 ASTCENC_PRE_FASTEST, 67 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 0.0f 68 }, { 69 ASTCENC_PRE_FAST, 70 3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f, 0.0f 71 }, { 72 ASTCENC_PRE_MEDIUM, 73 4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f, 0.0f 74 }, { 75 ASTCENC_PRE_THOROUGH, 76 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f, 0.0f 77 }, { 78 ASTCENC_PRE_VERYTHOROUGH, 79 4, 256, 128, 64, 98, 4, 6, 8, 6, 4, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f 80 }, { 81 ASTCENC_PRE_EXHAUSTIVE, 82 4, 512, 512, 512, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f 83 } 84}}; 85 86/** 87 * @brief The static presets for medium bandwidth encodings (25 <= x < 64 texels per block). 88 */ 89static const std::array<astcenc_preset_config, 6> preset_configs_mid {{ 90 { 91 ASTCENC_PRE_FASTEST, 92 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f 93 }, { 94 ASTCENC_PRE_FAST, 95 3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f 96 }, { 97 ASTCENC_PRE_MEDIUM, 98 3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f, 1.0f 99 }, { 100 ASTCENC_PRE_THOROUGH, 101 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f, 0.0f 102 }, { 103 ASTCENC_PRE_VERYTHOROUGH, 104 4, 256, 128, 64, 98, 4, 6, 8, 6, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f 105 }, { 106 ASTCENC_PRE_EXHAUSTIVE, 107 4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f 108 } 109}}; 110 111/** 112 * @brief The static presets for low bandwidth encodings (64 <= x texels per block). 113 */ 114static const std::array<astcenc_preset_config, 6> preset_configs_low {{ 115 { 116 ASTCENC_PRE_FASTEST, 117 2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f 118 }, { 119 ASTCENC_PRE_FAST, 120 2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f 121 }, { 122 ASTCENC_PRE_MEDIUM, 123 3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f, 1.0f 124 }, { 125 ASTCENC_PRE_THOROUGH, 126 4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f, 1.0f 127 }, { 128 ASTCENC_PRE_VERYTHOROUGH, 129 4, 256, 128, 64, 98, 4, 6, 8, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 1.0f 130 }, { 131 ASTCENC_PRE_EXHAUSTIVE, 132 4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 1.0f 133 } 134}}; 135 136/** 137 * @brief Validate CPU floating point meets assumptions made in the codec. 138 * 139 * The codec is written with the assumption that a float threaded through the @c if32 union will be 140 * stored and reloaded as a 32-bit IEEE-754 float with round-to-nearest rounding. This is always the 141 * case in an IEEE-754 compliant system, however not every system or compilation mode is actually 142 * IEEE-754 compliant. This normally fails if the code is compiled with fast math enabled. 143 * 144 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 145 */ 146static astcenc_error validate_cpu_float() 147{ 148 if32 p; 149 volatile float xprec_testval = 2.51f; 150 p.f = xprec_testval + 12582912.0f; 151 float q = p.f - 12582912.0f; 152 153 if (q != 3.0f) 154 { 155 return ASTCENC_ERR_BAD_CPU_FLOAT; 156 } 157 158 return ASTCENC_SUCCESS; 159} 160 161/** 162 * @brief Validate config profile. 163 * 164 * @param profile The profile to check. 165 * 166 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 167 */ 168static astcenc_error validate_profile( 169 astcenc_profile profile 170) { 171 // Values in this enum are from an external user, so not guaranteed to be 172 // bounded to the enum values 173 switch (static_cast<int>(profile)) 174 { 175 case ASTCENC_PRF_LDR_SRGB: 176 case ASTCENC_PRF_LDR: 177 case ASTCENC_PRF_HDR_RGB_LDR_A: 178 case ASTCENC_PRF_HDR: 179 return ASTCENC_SUCCESS; 180 default: 181 return ASTCENC_ERR_BAD_PROFILE; 182 } 183} 184 185/** 186 * @brief Validate block size. 187 * 188 * @param block_x The block x dimensions. 189 * @param block_y The block y dimensions. 190 * @param block_z The block z dimensions. 191 * 192 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 193 */ 194static astcenc_error validate_block_size( 195 unsigned int block_x, 196 unsigned int block_y, 197 unsigned int block_z 198) { 199 // Test if this is a legal block size at all 200 bool is_legal = (((block_z <= 1) && is_legal_2d_block_size(block_x, block_y)) || 201 ((block_z >= 2) && is_legal_3d_block_size(block_x, block_y, block_z))); 202 if (!is_legal) 203 { 204 return ASTCENC_ERR_BAD_BLOCK_SIZE; 205 } 206 207 // Test if this build has sufficient capacity for this block size 208 bool have_capacity = (block_x * block_y * block_z) <= BLOCK_MAX_TEXELS; 209 if (!have_capacity) 210 { 211 return ASTCENC_ERR_NOT_IMPLEMENTED; 212 } 213 214 return ASTCENC_SUCCESS; 215} 216 217/** 218 * @brief Validate flags. 219 * 220 * @param profile The profile to check. 221 * @param flags The flags to check. 222 * 223 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 224 */ 225static astcenc_error validate_flags( 226 astcenc_profile profile, 227 unsigned int flags 228) { 229 // Flags field must not contain any unknown flag bits 230 unsigned int exMask = ~ASTCENC_ALL_FLAGS; 231 if (popcount(flags & exMask) != 0) 232 { 233 return ASTCENC_ERR_BAD_FLAGS; 234 } 235 236 // Flags field must only contain at most a single map type 237 exMask = ASTCENC_FLG_MAP_NORMAL 238 | ASTCENC_FLG_MAP_RGBM; 239 if (popcount(flags & exMask) > 1) 240 { 241 return ASTCENC_ERR_BAD_FLAGS; 242 } 243 244 // Decode_unorm8 must only be used with an LDR profile 245 bool is_unorm8 = flags & ASTCENC_FLG_USE_DECODE_UNORM8; 246 bool is_hdr = (profile == ASTCENC_PRF_HDR) || (profile == ASTCENC_PRF_HDR_RGB_LDR_A); 247 if (is_unorm8 && is_hdr) 248 { 249 return ASTCENC_ERR_BAD_DECODE_MODE; 250 } 251 252 return ASTCENC_SUCCESS; 253} 254 255#if !defined(ASTCENC_DECOMPRESS_ONLY) 256 257/** 258 * @brief Validate single channel compression swizzle. 259 * 260 * @param swizzle The swizzle to check. 261 * 262 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 263 */ 264static astcenc_error validate_compression_swz( 265 astcenc_swz swizzle 266) { 267 // Not all enum values are handled; SWZ_Z is invalid for compression 268 switch (static_cast<int>(swizzle)) 269 { 270 case ASTCENC_SWZ_R: 271 case ASTCENC_SWZ_G: 272 case ASTCENC_SWZ_B: 273 case ASTCENC_SWZ_A: 274 case ASTCENC_SWZ_0: 275 case ASTCENC_SWZ_1: 276 return ASTCENC_SUCCESS; 277 default: 278 return ASTCENC_ERR_BAD_SWIZZLE; 279 } 280} 281 282/** 283 * @brief Validate overall compression swizzle. 284 * 285 * @param swizzle The swizzle to check. 286 * 287 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 288 */ 289static astcenc_error validate_compression_swizzle( 290 const astcenc_swizzle& swizzle 291) { 292 if (validate_compression_swz(swizzle.r) || 293 validate_compression_swz(swizzle.g) || 294 validate_compression_swz(swizzle.b) || 295 validate_compression_swz(swizzle.a)) 296 { 297 return ASTCENC_ERR_BAD_SWIZZLE; 298 } 299 300 return ASTCENC_SUCCESS; 301} 302#endif 303 304/** 305 * @brief Validate single channel decompression swizzle. 306 * 307 * @param swizzle The swizzle to check. 308 * 309 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 310 */ 311static astcenc_error validate_decompression_swz( 312 astcenc_swz swizzle 313) { 314 // Values in this enum are from an external user, so not guaranteed to be 315 // bounded to the enum values 316 switch (static_cast<int>(swizzle)) 317 { 318 case ASTCENC_SWZ_R: 319 case ASTCENC_SWZ_G: 320 case ASTCENC_SWZ_B: 321 case ASTCENC_SWZ_A: 322 case ASTCENC_SWZ_0: 323 case ASTCENC_SWZ_1: 324 case ASTCENC_SWZ_Z: 325 return ASTCENC_SUCCESS; 326 default: 327 return ASTCENC_ERR_BAD_SWIZZLE; 328 } 329} 330 331/** 332 * @brief Validate overall decompression swizzle. 333 * 334 * @param swizzle The swizzle to check. 335 * 336 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 337 */ 338static astcenc_error validate_decompression_swizzle( 339 const astcenc_swizzle& swizzle 340) { 341 if (validate_decompression_swz(swizzle.r) || 342 validate_decompression_swz(swizzle.g) || 343 validate_decompression_swz(swizzle.b) || 344 validate_decompression_swz(swizzle.a)) 345 { 346 return ASTCENC_ERR_BAD_SWIZZLE; 347 } 348 349 return ASTCENC_SUCCESS; 350} 351 352/** 353 * Validate that an incoming configuration is in-spec. 354 * 355 * This function can respond in two ways: 356 * 357 * * Numerical inputs that have valid ranges are clamped to those valid ranges. No error is thrown 358 * for out-of-range inputs in this case. 359 * * Numerical inputs and logic inputs are are logically invalid and which make no sense 360 * algorithmically will return an error. 361 * 362 * @param[in,out] config The input compressor configuration. 363 * 364 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 365 */ 366static astcenc_error validate_config( 367 astcenc_config &config 368) { 369 astcenc_error status; 370 371 status = validate_profile(config.profile); 372 if (status != ASTCENC_SUCCESS) 373 { 374 return status; 375 } 376 377 status = validate_flags(config.profile, config.flags); 378 if (status != ASTCENC_SUCCESS) 379 { 380 return status; 381 } 382 383 status = validate_block_size(config.block_x, config.block_y, config.block_z); 384 if (status != ASTCENC_SUCCESS) 385 { 386 return status; 387 } 388 389#if defined(ASTCENC_DECOMPRESS_ONLY) 390 // Decompress-only builds only support decompress-only contexts 391 if (!(config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)) 392 { 393 return ASTCENC_ERR_BAD_PARAM; 394 } 395#endif 396 397 config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f); 398 399 config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u); 400 config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); 401 config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); 402 config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); 403 config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u); 404 config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u); 405 config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES); 406 config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); 407 config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); 408 config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); 409 config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f); 410 config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f); 411 config.tune_2partition_early_out_limit_factor = astc::max(config.tune_2partition_early_out_limit_factor, 0.0f); 412 config.tune_3partition_early_out_limit_factor = astc::max(config.tune_3partition_early_out_limit_factor, 0.0f); 413 config.tune_2plane_early_out_limit_correlation = astc::max(config.tune_2plane_early_out_limit_correlation, 0.0f); 414 415 // Specifying a zero weight color component is not allowed; force to small value 416 float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight), 417 astc::max(config.cw_b_weight, config.cw_a_weight)); 418 if (max_weight > 0.0f) 419 { 420 max_weight /= 1000.0f; 421 config.cw_r_weight = astc::max(config.cw_r_weight, max_weight); 422 config.cw_g_weight = astc::max(config.cw_g_weight, max_weight); 423 config.cw_b_weight = astc::max(config.cw_b_weight, max_weight); 424 config.cw_a_weight = astc::max(config.cw_a_weight, max_weight); 425 } 426 // If all color components error weights are zero then return an error 427 else 428 { 429 return ASTCENC_ERR_BAD_PARAM; 430 } 431 432 return ASTCENC_SUCCESS; 433} 434 435/* See header for documentation. */ 436astcenc_error astcenc_config_init( 437 astcenc_profile profile, 438 unsigned int block_x, 439 unsigned int block_y, 440 unsigned int block_z, 441 float quality, 442 unsigned int flags, 443 astcenc_config* configp 444) { 445 astcenc_error status; 446 447 status = validate_cpu_float(); 448 if (status != ASTCENC_SUCCESS) 449 { 450 return status; 451 } 452 453 // Zero init all config fields; although most of will be over written 454 astcenc_config& config = *configp; 455 std::memset(&config, 0, sizeof(config)); 456 457 // Process the block size 458 block_z = astc::max(block_z, 1u); // For 2D blocks Z==0 is accepted, but convert to 1 459 status = validate_block_size(block_x, block_y, block_z); 460 if (status != ASTCENC_SUCCESS) 461 { 462 return status; 463 } 464 465 config.block_x = block_x; 466 config.block_y = block_y; 467 config.block_z = block_z; 468 469 float texels = static_cast<float>(block_x * block_y * block_z); 470 float ltexels = logf(texels) / logf(10.0f); 471 472 // Process the performance quality level or preset; note that this must be done before we 473 // process any additional settings, such as color profile and flags, which may replace some of 474 // these settings with more use case tuned values 475 if (quality < ASTCENC_PRE_FASTEST || 476 quality > ASTCENC_PRE_EXHAUSTIVE) 477 { 478 return ASTCENC_ERR_BAD_QUALITY; 479 } 480 481 static const std::array<astcenc_preset_config, 6>* preset_configs; 482 int texels_int = block_x * block_y * block_z; 483 if (texels_int < 25) 484 { 485 preset_configs = &preset_configs_high; 486 } 487 else if (texels_int < 64) 488 { 489 preset_configs = &preset_configs_mid; 490 } 491 else 492 { 493 preset_configs = &preset_configs_low; 494 } 495 496 // Determine which preset to use, or which pair to interpolate 497 size_t start; 498 size_t end; 499 for (end = 0; end < preset_configs->size(); end++) 500 { 501 if ((*preset_configs)[end].quality >= quality) 502 { 503 break; 504 } 505 } 506 507 start = end == 0 ? 0 : end - 1; 508 509 // Start and end node are the same - so just transfer the values. 510 if (start == end) 511 { 512 config.tune_partition_count_limit = (*preset_configs)[start].tune_partition_count_limit; 513 config.tune_2partition_index_limit = (*preset_configs)[start].tune_2partition_index_limit; 514 config.tune_3partition_index_limit = (*preset_configs)[start].tune_3partition_index_limit; 515 config.tune_4partition_index_limit = (*preset_configs)[start].tune_4partition_index_limit; 516 config.tune_block_mode_limit = (*preset_configs)[start].tune_block_mode_limit; 517 config.tune_refinement_limit = (*preset_configs)[start].tune_refinement_limit; 518 config.tune_candidate_limit = (*preset_configs)[start].tune_candidate_limit; 519 config.tune_2partitioning_candidate_limit = (*preset_configs)[start].tune_2partitioning_candidate_limit; 520 config.tune_3partitioning_candidate_limit = (*preset_configs)[start].tune_3partitioning_candidate_limit; 521 config.tune_4partitioning_candidate_limit = (*preset_configs)[start].tune_4partitioning_candidate_limit; 522 config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels, 523 (*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels); 524 525 config.tune_mse_overshoot = (*preset_configs)[start].tune_mse_overshoot; 526 527 config.tune_2partition_early_out_limit_factor = (*preset_configs)[start].tune_2partition_early_out_limit_factor; 528 config.tune_3partition_early_out_limit_factor = (*preset_configs)[start].tune_3partition_early_out_limit_factor; 529 config.tune_2plane_early_out_limit_correlation = (*preset_configs)[start].tune_2plane_early_out_limit_correlation; 530 config.tune_search_mode0_enable = (*preset_configs)[start].tune_search_mode0_enable; 531 } 532 // Start and end node are not the same - so interpolate between them 533 else 534 { 535 auto& node_a = (*preset_configs)[start]; 536 auto& node_b = (*preset_configs)[end]; 537 538 float wt_range = node_b.quality - node_a.quality; 539 assert(wt_range > 0); 540 541 // Compute interpolation factors 542 float wt_node_a = (node_b.quality - quality) / wt_range; 543 float wt_node_b = (quality - node_a.quality) / wt_range; 544 545 #define LERP(param) ((node_a.param * wt_node_a) + (node_b.param * wt_node_b)) 546 #define LERPI(param) astc::flt2int_rtn(\ 547 (static_cast<float>(node_a.param) * wt_node_a) + \ 548 (static_cast<float>(node_b.param) * wt_node_b)) 549 #define LERPUI(param) static_cast<unsigned int>(LERPI(param)) 550 551 config.tune_partition_count_limit = LERPI(tune_partition_count_limit); 552 config.tune_2partition_index_limit = LERPI(tune_2partition_index_limit); 553 config.tune_3partition_index_limit = LERPI(tune_3partition_index_limit); 554 config.tune_4partition_index_limit = LERPI(tune_4partition_index_limit); 555 config.tune_block_mode_limit = LERPI(tune_block_mode_limit); 556 config.tune_refinement_limit = LERPI(tune_refinement_limit); 557 config.tune_candidate_limit = LERPUI(tune_candidate_limit); 558 config.tune_2partitioning_candidate_limit = LERPUI(tune_2partitioning_candidate_limit); 559 config.tune_3partitioning_candidate_limit = LERPUI(tune_3partitioning_candidate_limit); 560 config.tune_4partitioning_candidate_limit = LERPUI(tune_4partitioning_candidate_limit); 561 config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels, 562 LERP(tune_db_limit_b_base) - 19 * ltexels); 563 564 config.tune_mse_overshoot = LERP(tune_mse_overshoot); 565 566 config.tune_2partition_early_out_limit_factor = LERP(tune_2partition_early_out_limit_factor); 567 config.tune_3partition_early_out_limit_factor = LERP(tune_3partition_early_out_limit_factor); 568 config.tune_2plane_early_out_limit_correlation = LERP(tune_2plane_early_out_limit_correlation); 569 config.tune_search_mode0_enable = LERP(tune_search_mode0_enable); 570 #undef LERP 571 #undef LERPI 572 #undef LERPUI 573 } 574 575 // Set heuristics to the defaults for each color profile 576 config.cw_r_weight = 1.0f; 577 config.cw_g_weight = 1.0f; 578 config.cw_b_weight = 1.0f; 579 config.cw_a_weight = 1.0f; 580 581 config.a_scale_radius = 0; 582 583 config.rgbm_m_scale = 0.0f; 584 585 config.profile = profile; 586 587 // Values in this enum are from an external user, so not guaranteed to be 588 // bounded to the enum values 589 switch (static_cast<int>(profile)) 590 { 591 case ASTCENC_PRF_LDR: 592 case ASTCENC_PRF_LDR_SRGB: 593 break; 594 case ASTCENC_PRF_HDR_RGB_LDR_A: 595 case ASTCENC_PRF_HDR: 596 config.tune_db_limit = 999.0f; 597 config.tune_search_mode0_enable = 0.0f; 598 break; 599 default: 600 return ASTCENC_ERR_BAD_PROFILE; 601 } 602 603 // Flags field must not contain any unknown flag bits 604 status = validate_flags(profile, flags); 605 if (status != ASTCENC_SUCCESS) 606 { 607 return status; 608 } 609 610 if (flags & ASTCENC_FLG_MAP_NORMAL) 611 { 612 // Normal map encoding uses L+A blocks, so allow one more partitioning 613 // than normal. We need need fewer bits for endpoints, so more likely 614 // to be able to use more partitions than an RGB/RGBA block 615 config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1u, 4u); 616 617 config.cw_g_weight = 0.0f; 618 config.cw_b_weight = 0.0f; 619 config.tune_2partition_early_out_limit_factor *= 1.5f; 620 config.tune_3partition_early_out_limit_factor *= 1.5f; 621 config.tune_2plane_early_out_limit_correlation = 0.99f; 622 623 // Normals are prone to blocking artifacts on smooth curves 624 // so force compressor to try harder here ... 625 config.tune_db_limit *= 1.03f; 626 } 627 else if (flags & ASTCENC_FLG_MAP_RGBM) 628 { 629 config.rgbm_m_scale = 5.0f; 630 config.cw_a_weight = 2.0f * config.rgbm_m_scale; 631 } 632 else // (This is color data) 633 { 634 // This is a very basic perceptual metric for RGB color data, which weights error 635 // significance by the perceptual luminance contribution of each color channel. For 636 // luminance the usual weights to compute luminance from a linear RGB value are as 637 // follows: 638 // 639 // l = r * 0.3 + g * 0.59 + b * 0.11 640 // 641 // ... but we scale these up to keep a better balance between color and alpha. Note 642 // that if the content is using alpha we'd recommend using the -a option to weight 643 // the color contribution by the alpha transparency. 644 if (flags & ASTCENC_FLG_USE_PERCEPTUAL) 645 { 646 config.cw_r_weight = 0.30f * 2.25f; 647 config.cw_g_weight = 0.59f * 2.25f; 648 config.cw_b_weight = 0.11f * 2.25f; 649 } 650 } 651 config.flags = flags; 652 653 return ASTCENC_SUCCESS; 654} 655 656/* See header for documentation. */ 657astcenc_error astcenc_context_alloc( 658 const astcenc_config* configp, 659 unsigned int thread_count, 660 astcenc_context** context 661) { 662 astcenc_error status; 663 const astcenc_config& config = *configp; 664 665 status = validate_cpu_float(); 666 if (status != ASTCENC_SUCCESS) 667 { 668 return status; 669 } 670 671 if (thread_count == 0) 672 { 673 return ASTCENC_ERR_BAD_PARAM; 674 } 675 676#if defined(ASTCENC_DIAGNOSTICS) 677 // Force single threaded compressor use in diagnostic mode. 678 if (thread_count != 1) 679 { 680 return ASTCENC_ERR_BAD_PARAM; 681 } 682#endif 683 684#ifndef ASTC_CUSTOMIZED_ENABLE 685 if (config.privateProfile == CUSTOMIZED_PROFILE) 686 { 687 return ASTCENC_ERR_BAD_PARAM; 688 } 689#endif 690 691 astcenc_context* ctxo = new astcenc_context; 692 astcenc_contexti* ctx = &ctxo->context; 693 ctx->thread_count = thread_count; 694 ctx->config = config; 695 ctx->working_buffers = nullptr; 696 697 // These are allocated per-compress, as they depend on image size 698 ctx->input_alpha_averages = nullptr; 699 700 // Copy the config first and validate the copy (we may modify it) 701 status = validate_config(ctx->config); 702 if (status != ASTCENC_SUCCESS) 703 { 704 delete ctxo; 705 return status; 706 } 707 708 ctx->bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN); 709 if (!ctx->bsd) 710 { 711 delete ctxo; 712 return ASTCENC_ERR_OUT_OF_MEM; 713 } 714 715 bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY); 716#ifdef ASTC_CUSTOMIZED_ENABLE 717 if (!init_block_size_descriptor(ctx->config.privateProfile, config.block_x, config.block_y, config.block_z, 718 can_omit_modes, 719 config.tune_partition_count_limit, 720 static_cast<float>(config.tune_block_mode_limit) / 100.0f, 721 *ctx->bsd)) 722 { 723 aligned_free<block_size_descriptor>(ctx->bsd); 724 delete ctxo; 725 *context = nullptr; 726 return ASTCENC_ERR_DLOPEN_FAILED; 727 } 728#else 729 init_block_size_descriptor(ctx->config.privateProfile, config.block_x, config.block_y, config.block_z, 730 can_omit_modes, 731 config.tune_partition_count_limit, 732 static_cast<float>(config.tune_block_mode_limit) / 100.0f, 733 *ctx->bsd); 734#endif 735 736#if !defined(ASTCENC_DECOMPRESS_ONLY) 737 // Do setup only needed by compression 738 if (!(ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)) 739 { 740 // Turn a dB limit into a per-texel error for faster use later 741 if ((ctx->config.profile == ASTCENC_PRF_LDR) || (ctx->config.profile == ASTCENC_PRF_LDR_SRGB)) 742 { 743 ctx->config.tune_db_limit = astc::pow(0.1f, ctx->config.tune_db_limit * 0.1f) * 65535.0f * 65535.0f; 744 } 745 else 746 { 747 ctx->config.tune_db_limit = 0.0f; 748 } 749 750 size_t worksize = sizeof(compression_working_buffers) * thread_count; 751 ctx->working_buffers = aligned_malloc<compression_working_buffers>(worksize, ASTCENC_VECALIGN); 752 static_assert((ASTCENC_VECALIGN == 0) || ((sizeof(compression_working_buffers) % ASTCENC_VECALIGN) == 0), 753 "compression_working_buffers size must be multiple of vector alignment"); 754 if (!ctx->working_buffers) 755 { 756 aligned_free<block_size_descriptor>(ctx->bsd); 757 delete ctxo; 758 *context = nullptr; 759 return ASTCENC_ERR_OUT_OF_MEM; 760 } 761 } 762#endif 763 764#if defined(ASTCENC_DIAGNOSTICS) 765 ctx->trace_log = new TraceLog(ctx->config.trace_file_path); 766 if (!ctx->trace_log->m_file) 767 { 768 return ASTCENC_ERR_DTRACE_FAILURE; 769 } 770 771 trace_add_data("block_x", config.block_x); 772 trace_add_data("block_y", config.block_y); 773 trace_add_data("block_z", config.block_z); 774#endif 775 776 *context = ctxo; 777 778#if !defined(ASTCENC_DECOMPRESS_ONLY) 779 prepare_angular_tables(); 780#endif 781 782 return ASTCENC_SUCCESS; 783} 784 785/* See header dor documentation. */ 786void astcenc_context_free( 787 astcenc_context* ctxo 788) { 789 if (ctxo) 790 { 791 astcenc_contexti* ctx = &ctxo->context; 792 if (ctx->working_buffers) 793 { 794 aligned_free<compression_working_buffers>(ctx->working_buffers); 795 } 796 else 797 { 798 printf("ctx->working_buffers is nullptr !!\n"); 799 } 800 if (ctx->bsd) 801 { 802 aligned_free<block_size_descriptor>(ctx->bsd); 803 } 804 else 805 { 806 printf("ctx->bsd is nullptr !!\n"); 807 } 808#if defined(ASTCENC_DIAGNOSTICS) 809 delete ctx->trace_log; 810#endif 811 delete ctxo; 812 } 813} 814 815#if !defined(ASTCENC_DECOMPRESS_ONLY) 816 817/** 818 * @brief Compress an image, after any preflight has completed. 819 * 820 * @param[out] ctxo The compressor context. 821 * @param thread_index The thread index. 822 * @param image The intput image. 823 * @param swizzle The input swizzle. 824 * @param[out] buffer The output array for the compressed data. 825 */ 826static void compress_image( 827 astcenc_context& ctxo, 828 unsigned int thread_index, 829 const astcenc_image& image, 830 const astcenc_swizzle& swizzle, 831#if QUALITY_CONTROL 832 uint8_t* buffer, 833 bool calQualityEnable, 834 int32_t *mse[RGBA_COM] 835#else 836 uint8_t* buffer 837#endif 838) { 839 astcenc_contexti& ctx = ctxo.context; 840 const block_size_descriptor& bsd = *ctx.bsd; 841 astcenc_profile decode_mode = ctx.config.profile; 842 843 image_block blk; 844 845 int block_x = bsd.xdim; 846 int block_y = bsd.ydim; 847 int block_z = bsd.zdim; 848 blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z); 849 850 int dim_x = image.dim_x; 851 int dim_y = image.dim_y; 852 int dim_z = image.dim_z; 853 854 int xblocks = (dim_x + block_x - 1) / block_x; 855 int yblocks = (dim_y + block_y - 1) / block_y; 856 int zblocks = (dim_z + block_z - 1) / block_z; 857 int block_count = zblocks * yblocks * xblocks; 858 859 int row_blocks = xblocks; 860 int plane_blocks = xblocks * yblocks; 861 862 blk.decode_unorm8 = ctxo.context.config.flags & ASTCENC_FLG_USE_DECODE_UNORM8; 863 864 // Populate the block channel weights 865 blk.channel_weight = vfloat4(ctx.config.cw_r_weight, 866 ctx.config.cw_g_weight, 867 ctx.config.cw_b_weight, 868 ctx.config.cw_a_weight); 869 870 // Use preallocated scratch buffer 871 auto& temp_buffers = ctx.working_buffers[thread_index]; 872 873 // Only the first thread actually runs the initializer 874 ctxo.manage_compress.init(block_count, ctx.config.progress_callback); 875 876 // Determine if we can use an optimized load function 877 bool needs_swz = (swizzle.r != ASTCENC_SWZ_R) || (swizzle.g != ASTCENC_SWZ_G) || 878 (swizzle.b != ASTCENC_SWZ_B) || (swizzle.a != ASTCENC_SWZ_A); 879 880 bool needs_hdr = (decode_mode == ASTCENC_PRF_HDR) || 881 (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A); 882 883 bool use_fast_load = !needs_swz && !needs_hdr && 884 block_z == 1 && image.data_type == ASTCENC_TYPE_U8; 885 886 auto load_func = load_image_block; 887 if (use_fast_load) 888 { 889 load_func = load_image_block_fast_ldr; 890 } 891 892 // All threads run this processing loop until there is no work remaining 893 while (true) 894 { 895 unsigned int count; 896 unsigned int base = ctxo.manage_compress.get_task_assignment(16, count); 897 if (!count) 898 { 899 break; 900 } 901 902 for (unsigned int i = base; i < base + count; i++) 903 { 904 // Decode i into x, y, z block indices 905 int z = i / plane_blocks; 906 unsigned int rem = i - (z * plane_blocks); 907 int y = rem / row_blocks; 908 int x = rem - (y * row_blocks); 909 910 // Test if we can apply some basic alpha-scale RDO 911 bool use_full_block = true; 912 if (ctx.config.a_scale_radius != 0 && block_z == 1) 913 { 914 int start_x = x * block_x; 915 int end_x = astc::min(dim_x, start_x + block_x); 916 917 int start_y = y * block_y; 918 int end_y = astc::min(dim_y, start_y + block_y); 919 920 // SATs accumulate error, so don't test exactly zero. Test for 921 // less than 1 alpha in the expanded block footprint that 922 // includes the alpha radius. 923 int x_footprint = block_x + 2 * (ctx.config.a_scale_radius - 1); 924 925 int y_footprint = block_y + 2 * (ctx.config.a_scale_radius - 1); 926 927 float footprint = static_cast<float>(x_footprint * y_footprint); 928 float threshold = 0.9f / (255.0f * footprint); 929 930 // Do we have any alpha values? 931 use_full_block = false; 932 for (int ay = start_y; ay < end_y; ay++) 933 { 934 for (int ax = start_x; ax < end_x; ax++) 935 { 936 float a_avg = ctx.input_alpha_averages[ay * dim_x + ax]; 937 if (a_avg > threshold) 938 { 939 use_full_block = true; 940 ax = end_x; 941 ay = end_y; 942 } 943 } 944 } 945 } 946 947 // Fetch the full block for compression 948 if (use_full_block) 949 { 950 load_func(decode_mode, image, blk, bsd, x * block_x, y * block_y, z * block_z, swizzle); 951 952 // Scale RGB error contribution by the maximum alpha in the block 953 // This encourages preserving alpha accuracy in regions with high 954 // transparency, and can buy up to 0.5 dB PSNR. 955 if (ctx.config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT) 956 { 957 float alpha_scale = blk.data_max.lane<3>() * (1.0f / 65535.0f); 958 blk.channel_weight = vfloat4(ctx.config.cw_r_weight * alpha_scale, 959 ctx.config.cw_g_weight * alpha_scale, 960 ctx.config.cw_b_weight * alpha_scale, 961 ctx.config.cw_a_weight); 962 } 963 } 964 // Apply alpha scale RDO - substitute constant color block 965 else 966 { 967 blk.origin_texel = vfloat4::zero(); 968 blk.data_min = vfloat4::zero(); 969 blk.data_mean = vfloat4::zero(); 970 blk.data_max = vfloat4::zero(); 971 blk.grayscale = true; 972 } 973 974 int offset = ((z * yblocks + y) * xblocks + x) * 16; 975 uint8_t *bp = buffer + offset; 976#if QUALITY_CONTROL 977 int32_t *mseBlock[RGBA_COM] = {nullptr, nullptr, nullptr, nullptr}; 978 if (calQualityEnable) { 979 offset = (z * yblocks + y) * xblocks + x; 980 mseBlock[R_COM] = mse[R_COM] + offset; 981 mseBlock[G_COM] = mse[G_COM] + offset; 982 mseBlock[B_COM] = mse[B_COM] + offset; 983 mseBlock[A_COM] = mse[A_COM] + offset; 984 } 985 compress_block(ctx, blk, bp, temp_buffers, calQualityEnable, mseBlock); 986#else 987 compress_block(ctx, blk, bp, temp_buffers); 988#endif 989 } 990 991 ctxo.manage_compress.complete_task_assignment(count); 992 } 993} 994 995/** 996 * @brief Compute regional averages in an image. 997 * 998 * This function can be called by multiple threads, but only after a single 999 * thread calls the setup function @c init_compute_averages(). 1000 * 1001 * Results are written back into @c img->input_alpha_averages. 1002 * 1003 * @param[out] ctx The context. 1004 * @param ag The average and variance arguments created during setup. 1005 */ 1006static void compute_averages( 1007 astcenc_context& ctx, 1008 const avg_args &ag 1009) { 1010 pixel_region_args arg = ag.arg; 1011 arg.work_memory = new vfloat4[ag.work_memory_size]; 1012 1013 int size_x = ag.img_size_x; 1014 int size_y = ag.img_size_y; 1015 int size_z = ag.img_size_z; 1016 1017 int step_xy = ag.blk_size_xy; 1018 int step_z = ag.blk_size_z; 1019 1020 int y_tasks = (size_y + step_xy - 1) / step_xy; 1021 1022 // All threads run this processing loop until there is no work remaining 1023 while (true) 1024 { 1025 unsigned int count; 1026 unsigned int base = ctx.manage_avg.get_task_assignment(16, count); 1027 if (!count) 1028 { 1029 break; 1030 } 1031 1032 for (unsigned int i = base; i < base + count; i++) 1033 { 1034 int z = (i / (y_tasks)) * step_z; 1035 int y = (i - (z * y_tasks)) * step_xy; 1036 1037 arg.size_z = astc::min(step_z, size_z - z); 1038 arg.offset_z = z; 1039 1040 arg.size_y = astc::min(step_xy, size_y - y); 1041 arg.offset_y = y; 1042 1043 for (int x = 0; x < size_x; x += step_xy) 1044 { 1045 arg.size_x = astc::min(step_xy, size_x - x); 1046 arg.offset_x = x; 1047 compute_pixel_region_variance(ctx.context, arg); 1048 } 1049 } 1050 1051 ctx.manage_avg.complete_task_assignment(count); 1052 } 1053 1054 delete[] arg.work_memory; 1055} 1056 1057#endif 1058 1059/* See header for documentation. */ 1060astcenc_error astcenc_compress_image( 1061 astcenc_context* ctxo, 1062 astcenc_image* imagep, 1063 const astcenc_swizzle* swizzle, 1064 uint8_t* data_out, 1065 size_t data_len, 1066#if QUALITY_CONTROL 1067 bool calQualityEnable, 1068 int32_t *mse[RGBA_COM], 1069#endif 1070 unsigned int thread_index 1071) { 1072#if defined(ASTCENC_DECOMPRESS_ONLY) 1073 (void)ctxo; 1074 (void)imagep; 1075 (void)swizzle; 1076 (void)data_out; 1077 (void)data_len; 1078 (void)thread_index; 1079 return ASTCENC_ERR_BAD_CONTEXT; 1080#else 1081 astcenc_contexti* ctx = &ctxo->context; 1082 astcenc_error status; 1083 astcenc_image& image = *imagep; 1084 1085 if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY) 1086 { 1087 return ASTCENC_ERR_BAD_CONTEXT; 1088 } 1089 1090 status = validate_compression_swizzle(*swizzle); 1091 if (status != ASTCENC_SUCCESS) 1092 { 1093 return status; 1094 } 1095 1096 if (thread_index >= ctx->thread_count) 1097 { 1098 return ASTCENC_ERR_BAD_PARAM; 1099 } 1100 1101 unsigned int block_x = ctx->config.block_x; 1102 unsigned int block_y = ctx->config.block_y; 1103 unsigned int block_z = ctx->config.block_z; 1104 1105 unsigned int xblocks = (image.dim_x + block_x - 1) / block_x; 1106 unsigned int yblocks = (image.dim_y + block_y - 1) / block_y; 1107 unsigned int zblocks = (image.dim_z + block_z - 1) / block_z; 1108 1109 // Check we have enough output space (16 bytes per block) 1110 size_t size_needed = xblocks * yblocks * zblocks * 16; 1111 if (data_len < size_needed) 1112 { 1113 return ASTCENC_ERR_OUT_OF_MEM; 1114 } 1115 1116 // If context thread count is one then implicitly reset 1117 if (ctx->thread_count == 1) 1118 { 1119 astcenc_compress_reset(ctxo); 1120 } 1121 1122 if (ctx->config.a_scale_radius != 0) 1123 { 1124 // First thread to enter will do setup, other threads will subsequently 1125 // enter the critical section but simply skip over the initialization 1126 auto init_avg = [ctx, &image, swizzle]() { 1127 // Perform memory allocations for the destination buffers 1128 size_t texel_count = image.dim_x * image.dim_y * image.dim_z; 1129 ctx->input_alpha_averages = new float[texel_count]; 1130 1131 return init_compute_averages( 1132 image, ctx->config.a_scale_radius, *swizzle, 1133 ctx->avg_preprocess_args); 1134 }; 1135 1136 // Only the first thread actually runs the initializer 1137 ctxo->manage_avg.init(init_avg); 1138 1139 // All threads will enter this function and dynamically grab work 1140 compute_averages(*ctxo, ctx->avg_preprocess_args); 1141 } 1142 1143 // Wait for compute_averages to complete before compressing 1144 ctxo->manage_avg.wait(); 1145#if QUALITY_CONTROL 1146 compress_image(*ctxo, thread_index, image, *swizzle, data_out, calQualityEnable, mse); 1147#else 1148 compress_image(*ctxo, thread_index, image, *swizzle, data_out); 1149#endif 1150 // Wait for compress to complete before freeing memory 1151 ctxo->manage_compress.wait(); 1152 1153 auto term_compress = [ctx]() { 1154 delete[] ctx->input_alpha_averages; 1155 ctx->input_alpha_averages = nullptr; 1156 }; 1157 1158 // Only the first thread to arrive actually runs the term 1159 ctxo->manage_compress.term(term_compress); 1160 1161 return ASTCENC_SUCCESS; 1162#endif 1163} 1164 1165/* See header for documentation. */ 1166astcenc_error astcenc_compress_reset( 1167 astcenc_context* ctxo 1168) { 1169#if defined(ASTCENC_DECOMPRESS_ONLY) 1170 (void)ctxo; 1171 return ASTCENC_ERR_BAD_CONTEXT; 1172#else 1173 astcenc_contexti* ctx = &ctxo->context; 1174 if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY) 1175 { 1176 return ASTCENC_ERR_BAD_CONTEXT; 1177 } 1178 1179 ctxo->manage_avg.reset(); 1180 ctxo->manage_compress.reset(); 1181 return ASTCENC_SUCCESS; 1182#endif 1183} 1184 1185/* See header for documentation. */ 1186astcenc_error astcenc_decompress_image( 1187 astcenc_context* ctxo, 1188 const uint8_t* data, 1189 size_t data_len, 1190 astcenc_image* image_outp, 1191 const astcenc_swizzle* swizzle, 1192 unsigned int thread_index 1193) { 1194 astcenc_error status; 1195 astcenc_image& image_out = *image_outp; 1196 astcenc_contexti* ctx = &ctxo->context; 1197 1198 // Today this doesn't matter (working set on stack) but might in future ... 1199 if (thread_index >= ctx->thread_count) 1200 { 1201 return ASTCENC_ERR_BAD_PARAM; 1202 } 1203 1204 status = validate_decompression_swizzle(*swizzle); 1205 if (status != ASTCENC_SUCCESS) 1206 { 1207 return status; 1208 } 1209 1210 unsigned int block_x = ctx->config.block_x; 1211 unsigned int block_y = ctx->config.block_y; 1212 unsigned int block_z = ctx->config.block_z; 1213 1214 unsigned int xblocks = (image_out.dim_x + block_x - 1) / block_x; 1215 unsigned int yblocks = (image_out.dim_y + block_y - 1) / block_y; 1216 unsigned int zblocks = (image_out.dim_z + block_z - 1) / block_z; 1217 unsigned int block_count = zblocks * yblocks * xblocks; 1218 1219 int row_blocks = xblocks; 1220 int plane_blocks = xblocks * yblocks; 1221 1222 // Check we have enough output space (16 bytes per block) 1223 size_t size_needed = xblocks * yblocks * zblocks * 16; 1224 if (data_len < size_needed) 1225 { 1226 return ASTCENC_ERR_OUT_OF_MEM; 1227 } 1228 1229 image_block blk; 1230 blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z); 1231 1232 // Decode mode inferred from the output data type 1233 blk.decode_unorm8 = image_out.data_type == ASTCENC_TYPE_U8; 1234 1235 // If context thread count is one then implicitly reset 1236 if (ctx->thread_count == 1) 1237 { 1238 astcenc_decompress_reset(ctxo); 1239 } 1240 1241 // Only the first thread actually runs the initializer 1242 ctxo->manage_decompress.init(block_count, nullptr); 1243 1244 // All threads run this processing loop until there is no work remaining 1245 while (true) 1246 { 1247 unsigned int count; 1248 unsigned int base = ctxo->manage_decompress.get_task_assignment(128, count); 1249 if (!count) 1250 { 1251 break; 1252 } 1253 1254 for (unsigned int i = base; i < base + count; i++) 1255 { 1256 // Decode i into x, y, z block indices 1257 int z = i / plane_blocks; 1258 unsigned int rem = i - (z * plane_blocks); 1259 int y = rem / row_blocks; 1260 int x = rem - (y * row_blocks); 1261 1262 unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16; 1263 const uint8_t* bp = data + offset; 1264 1265 symbolic_compressed_block scb; 1266 1267 physical_to_symbolic(*ctx->bsd, bp, scb); 1268 1269 decompress_symbolic_block(ctx->config.profile, *ctx->bsd, 1270 x * block_x, y * block_y, z * block_z, 1271 scb, blk); 1272 1273 store_image_block(image_out, blk, *ctx->bsd, 1274 x * block_x, y * block_y, z * block_z, *swizzle); 1275 } 1276 1277 ctxo->manage_decompress.complete_task_assignment(count); 1278 } 1279 1280 return ASTCENC_SUCCESS; 1281} 1282 1283/* See header for documentation. */ 1284astcenc_error astcenc_decompress_reset( 1285 astcenc_context* ctxo 1286) { 1287 ctxo->manage_decompress.reset(); 1288 return ASTCENC_SUCCESS; 1289} 1290 1291/* See header for documentation. */ 1292astcenc_error astcenc_get_block_info( 1293 astcenc_context* ctxo, 1294 const uint8_t data[16], 1295 astcenc_block_info* info 1296) { 1297#if defined(ASTCENC_DECOMPRESS_ONLY) 1298 (void)ctxo; 1299 (void)data; 1300 (void)info; 1301 return ASTCENC_ERR_BAD_CONTEXT; 1302#else 1303 astcenc_contexti* ctx = &ctxo->context; 1304 1305 // Decode the compressed data into a symbolic form 1306 symbolic_compressed_block scb; 1307 physical_to_symbolic(*ctx->bsd, data, scb); 1308 1309 // Fetch the appropriate partition and decimation tables 1310 block_size_descriptor& bsd = *ctx->bsd; 1311 1312 // Start from a clean slate 1313 memset(info, 0, sizeof(*info)); 1314 1315 // Basic info we can always populate 1316 info->profile = ctx->config.profile; 1317 1318 info->block_x = ctx->config.block_x; 1319 info->block_y = ctx->config.block_y; 1320 info->block_z = ctx->config.block_z; 1321 info->texel_count = bsd.texel_count; 1322 1323 // Check for error blocks first 1324 info->is_error_block = scb.block_type == SYM_BTYPE_ERROR; 1325 if (info->is_error_block) 1326 { 1327 return ASTCENC_SUCCESS; 1328 } 1329 1330 // Check for constant color blocks second 1331 info->is_constant_block = scb.block_type == SYM_BTYPE_CONST_F16 || 1332 scb.block_type == SYM_BTYPE_CONST_U16; 1333 if (info->is_constant_block) 1334 { 1335 return ASTCENC_SUCCESS; 1336 } 1337 1338 // Otherwise handle a full block ; known to be valid after conditions above have been checked 1339 int partition_count = scb.partition_count; 1340 const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index); 1341 1342 const block_mode& bm = bsd.get_block_mode(scb.block_mode); 1343 const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode); 1344 1345 info->weight_x = di.weight_x; 1346 info->weight_y = di.weight_y; 1347 info->weight_z = di.weight_z; 1348 1349 info->is_dual_plane_block = bm.is_dual_plane != 0; 1350 1351 info->partition_count = scb.partition_count; 1352 info->partition_index = scb.partition_index; 1353 info->dual_plane_component = scb.plane2_component; 1354 1355 info->color_level_count = get_quant_level(scb.get_color_quant_mode()); 1356 info->weight_level_count = get_quant_level(bm.get_weight_quant_mode()); 1357 1358 // Unpack color endpoints for each active partition 1359 for (unsigned int i = 0; i < scb.partition_count; i++) 1360 { 1361 bool rgb_hdr; 1362 bool a_hdr; 1363 vint4 endpnt[2]; 1364 1365 unpack_color_endpoints(ctx->config.profile, 1366 scb.color_formats[i], 1367 scb.color_values[i], 1368 rgb_hdr, a_hdr, 1369 endpnt[0], endpnt[1]); 1370 1371 // Store the color endpoint mode info 1372 info->color_endpoint_modes[i] = scb.color_formats[i]; 1373 info->is_hdr_block = info->is_hdr_block || rgb_hdr || a_hdr; 1374 1375 // Store the unpacked and decoded color endpoint 1376 vmask4 hdr_mask(rgb_hdr, rgb_hdr, rgb_hdr, a_hdr); 1377 for (int j = 0; j < 2; j++) 1378 { 1379 vint4 color_lns = lns_to_sf16(endpnt[j]); 1380 vint4 color_unorm = unorm16_to_sf16(endpnt[j]); 1381 vint4 datai = select(color_unorm, color_lns, hdr_mask); 1382 store(float16_to_float(datai), info->color_endpoints[i][j]); 1383 } 1384 } 1385 1386 // Unpack weights for each texel 1387 int weight_plane1[BLOCK_MAX_TEXELS]; 1388 int weight_plane2[BLOCK_MAX_TEXELS]; 1389 1390 unpack_weights(bsd, scb, di, bm.is_dual_plane, weight_plane1, weight_plane2); 1391 for (unsigned int i = 0; i < bsd.texel_count; i++) 1392 { 1393 info->weight_values_plane1[i] = static_cast<float>(weight_plane1[i]) * (1.0f / WEIGHTS_TEXEL_SUM); 1394 if (info->is_dual_plane_block) 1395 { 1396 info->weight_values_plane2[i] = static_cast<float>(weight_plane2[i]) * (1.0f / WEIGHTS_TEXEL_SUM); 1397 } 1398 } 1399 1400 // Unpack partition assignments for each texel 1401 for (unsigned int i = 0; i < bsd.texel_count; i++) 1402 { 1403 info->partition_assignment[i] = pi.partition_of_texel[i]; 1404 } 1405 1406 return ASTCENC_SUCCESS; 1407#endif 1408} 1409 1410/* See header for documentation. */ 1411const char* astcenc_get_error_string( 1412 astcenc_error status 1413) { 1414 // Values in this enum are from an external user, so not guaranteed to be 1415 // bounded to the enum values 1416 switch (static_cast<int>(status)) 1417 { 1418 case ASTCENC_SUCCESS: 1419 return "ASTCENC_SUCCESS"; 1420 case ASTCENC_ERR_OUT_OF_MEM: 1421 return "ASTCENC_ERR_OUT_OF_MEM"; 1422 case ASTCENC_ERR_BAD_CPU_FLOAT: 1423 return "ASTCENC_ERR_BAD_CPU_FLOAT"; 1424 case ASTCENC_ERR_BAD_PARAM: 1425 return "ASTCENC_ERR_BAD_PARAM"; 1426 case ASTCENC_ERR_BAD_BLOCK_SIZE: 1427 return "ASTCENC_ERR_BAD_BLOCK_SIZE"; 1428 case ASTCENC_ERR_BAD_PROFILE: 1429 return "ASTCENC_ERR_BAD_PROFILE"; 1430 case ASTCENC_ERR_BAD_QUALITY: 1431 return "ASTCENC_ERR_BAD_QUALITY"; 1432 case ASTCENC_ERR_BAD_FLAGS: 1433 return "ASTCENC_ERR_BAD_FLAGS"; 1434 case ASTCENC_ERR_BAD_SWIZZLE: 1435 return "ASTCENC_ERR_BAD_SWIZZLE"; 1436 case ASTCENC_ERR_BAD_CONTEXT: 1437 return "ASTCENC_ERR_BAD_CONTEXT"; 1438 case ASTCENC_ERR_NOT_IMPLEMENTED: 1439 return "ASTCENC_ERR_NOT_IMPLEMENTED"; 1440 case ASTCENC_ERR_BAD_DECODE_MODE: 1441 return "ASTCENC_ERR_BAD_DECODE_MODE"; 1442#if defined(ASTCENC_DIAGNOSTICS) 1443 case ASTCENC_ERR_DTRACE_FAILURE: 1444 return "ASTCENC_ERR_DTRACE_FAILURE"; 1445#endif 1446 default: 1447 return nullptr; 1448 } 1449} 1450