1cc1dc7a3Sopenharmony_ci// SPDX-License-Identifier: Apache-2.0 2cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 3cc1dc7a3Sopenharmony_ci// Copyright 2011-2024 Arm Limited 4cc1dc7a3Sopenharmony_ci// 5cc1dc7a3Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); you may not 6cc1dc7a3Sopenharmony_ci// use this file except in compliance with the License. You may obtain a copy 7cc1dc7a3Sopenharmony_ci// of the License at: 8cc1dc7a3Sopenharmony_ci// 9cc1dc7a3Sopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0 10cc1dc7a3Sopenharmony_ci// 11cc1dc7a3Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software 12cc1dc7a3Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13cc1dc7a3Sopenharmony_ci// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14cc1dc7a3Sopenharmony_ci// License for the specific language governing permissions and limitations 15cc1dc7a3Sopenharmony_ci// under the License. 16cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 17cc1dc7a3Sopenharmony_ci 18cc1dc7a3Sopenharmony_ci/** 19cc1dc7a3Sopenharmony_ci * @brief Functions for the library entrypoint. 20cc1dc7a3Sopenharmony_ci */ 21cc1dc7a3Sopenharmony_ci 22cc1dc7a3Sopenharmony_ci#include <array> 23cc1dc7a3Sopenharmony_ci#include <cstring> 24cc1dc7a3Sopenharmony_ci#include <new> 25cc1dc7a3Sopenharmony_ci 26cc1dc7a3Sopenharmony_ci#include "astcenc.h" 27cc1dc7a3Sopenharmony_ci#include "astcenc_internal_entry.h" 28cc1dc7a3Sopenharmony_ci#include "astcenc_diagnostic_trace.h" 29cc1dc7a3Sopenharmony_ci 30cc1dc7a3Sopenharmony_ci/** 31cc1dc7a3Sopenharmony_ci * @brief Record of the quality tuning parameter values. 32cc1dc7a3Sopenharmony_ci * 33cc1dc7a3Sopenharmony_ci * See the @c astcenc_config structure for detailed parameter documentation. 34cc1dc7a3Sopenharmony_ci * 35cc1dc7a3Sopenharmony_ci * Note that the mse_overshoot entries are scaling factors relative to the base MSE to hit db_limit. 36cc1dc7a3Sopenharmony_ci * A 20% overshoot is harder to hit for a higher base db_limit, so we may actually use lower ratios 37cc1dc7a3Sopenharmony_ci * for the more through search presets because the underlying db_limit is so much higher. 38cc1dc7a3Sopenharmony_ci */ 39cc1dc7a3Sopenharmony_cistruct astcenc_preset_config 40cc1dc7a3Sopenharmony_ci{ 41cc1dc7a3Sopenharmony_ci float quality; 42cc1dc7a3Sopenharmony_ci unsigned int tune_partition_count_limit; 43cc1dc7a3Sopenharmony_ci unsigned int tune_2partition_index_limit; 44cc1dc7a3Sopenharmony_ci unsigned int tune_3partition_index_limit; 45cc1dc7a3Sopenharmony_ci unsigned int tune_4partition_index_limit; 46cc1dc7a3Sopenharmony_ci unsigned int tune_block_mode_limit; 47cc1dc7a3Sopenharmony_ci unsigned int tune_refinement_limit; 48cc1dc7a3Sopenharmony_ci unsigned int tune_candidate_limit; 49cc1dc7a3Sopenharmony_ci unsigned int tune_2partitioning_candidate_limit; 50cc1dc7a3Sopenharmony_ci unsigned int tune_3partitioning_candidate_limit; 51cc1dc7a3Sopenharmony_ci unsigned int tune_4partitioning_candidate_limit; 52cc1dc7a3Sopenharmony_ci float tune_db_limit_a_base; 53cc1dc7a3Sopenharmony_ci float tune_db_limit_b_base; 54cc1dc7a3Sopenharmony_ci float tune_mse_overshoot; 55cc1dc7a3Sopenharmony_ci float tune_2partition_early_out_limit_factor; 56cc1dc7a3Sopenharmony_ci float tune_3partition_early_out_limit_factor; 57cc1dc7a3Sopenharmony_ci float tune_2plane_early_out_limit_correlation; 58cc1dc7a3Sopenharmony_ci float tune_search_mode0_enable; 59cc1dc7a3Sopenharmony_ci}; 60cc1dc7a3Sopenharmony_ci 61cc1dc7a3Sopenharmony_ci/** 62cc1dc7a3Sopenharmony_ci * @brief The static presets for high bandwidth encodings (x < 25 texels per block). 63cc1dc7a3Sopenharmony_ci */ 64cc1dc7a3Sopenharmony_cistatic const std::array<astcenc_preset_config, 6> preset_configs_high {{ 65cc1dc7a3Sopenharmony_ci { 66cc1dc7a3Sopenharmony_ci ASTCENC_PRE_FASTEST, 67cc1dc7a3Sopenharmony_ci 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 0.0f 68cc1dc7a3Sopenharmony_ci }, { 69cc1dc7a3Sopenharmony_ci ASTCENC_PRE_FAST, 70cc1dc7a3Sopenharmony_ci 3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f, 0.0f 71cc1dc7a3Sopenharmony_ci }, { 72cc1dc7a3Sopenharmony_ci ASTCENC_PRE_MEDIUM, 73cc1dc7a3Sopenharmony_ci 4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f, 0.0f 74cc1dc7a3Sopenharmony_ci }, { 75cc1dc7a3Sopenharmony_ci ASTCENC_PRE_THOROUGH, 76cc1dc7a3Sopenharmony_ci 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f, 0.0f 77cc1dc7a3Sopenharmony_ci }, { 78cc1dc7a3Sopenharmony_ci ASTCENC_PRE_VERYTHOROUGH, 79cc1dc7a3Sopenharmony_ci 4, 256, 128, 64, 98, 4, 6, 8, 6, 4, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f 80cc1dc7a3Sopenharmony_ci }, { 81cc1dc7a3Sopenharmony_ci ASTCENC_PRE_EXHAUSTIVE, 82cc1dc7a3Sopenharmony_ci 4, 512, 512, 512, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f 83cc1dc7a3Sopenharmony_ci } 84cc1dc7a3Sopenharmony_ci}}; 85cc1dc7a3Sopenharmony_ci 86cc1dc7a3Sopenharmony_ci/** 87cc1dc7a3Sopenharmony_ci * @brief The static presets for medium bandwidth encodings (25 <= x < 64 texels per block). 88cc1dc7a3Sopenharmony_ci */ 89cc1dc7a3Sopenharmony_cistatic const std::array<astcenc_preset_config, 6> preset_configs_mid {{ 90cc1dc7a3Sopenharmony_ci { 91cc1dc7a3Sopenharmony_ci ASTCENC_PRE_FASTEST, 92cc1dc7a3Sopenharmony_ci 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f 93cc1dc7a3Sopenharmony_ci }, { 94cc1dc7a3Sopenharmony_ci ASTCENC_PRE_FAST, 95cc1dc7a3Sopenharmony_ci 3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f 96cc1dc7a3Sopenharmony_ci }, { 97cc1dc7a3Sopenharmony_ci ASTCENC_PRE_MEDIUM, 98cc1dc7a3Sopenharmony_ci 3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f, 1.0f 99cc1dc7a3Sopenharmony_ci }, { 100cc1dc7a3Sopenharmony_ci ASTCENC_PRE_THOROUGH, 101cc1dc7a3Sopenharmony_ci 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f, 0.0f 102cc1dc7a3Sopenharmony_ci }, { 103cc1dc7a3Sopenharmony_ci ASTCENC_PRE_VERYTHOROUGH, 104cc1dc7a3Sopenharmony_ci 4, 256, 128, 64, 98, 4, 6, 8, 6, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f 105cc1dc7a3Sopenharmony_ci }, { 106cc1dc7a3Sopenharmony_ci ASTCENC_PRE_EXHAUSTIVE, 107cc1dc7a3Sopenharmony_ci 4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f 108cc1dc7a3Sopenharmony_ci } 109cc1dc7a3Sopenharmony_ci}}; 110cc1dc7a3Sopenharmony_ci 111cc1dc7a3Sopenharmony_ci/** 112cc1dc7a3Sopenharmony_ci * @brief The static presets for low bandwidth encodings (64 <= x texels per block). 113cc1dc7a3Sopenharmony_ci */ 114cc1dc7a3Sopenharmony_cistatic const std::array<astcenc_preset_config, 6> preset_configs_low {{ 115cc1dc7a3Sopenharmony_ci { 116cc1dc7a3Sopenharmony_ci ASTCENC_PRE_FASTEST, 117cc1dc7a3Sopenharmony_ci 2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f 118cc1dc7a3Sopenharmony_ci }, { 119cc1dc7a3Sopenharmony_ci ASTCENC_PRE_FAST, 120cc1dc7a3Sopenharmony_ci 2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f 121cc1dc7a3Sopenharmony_ci }, { 122cc1dc7a3Sopenharmony_ci ASTCENC_PRE_MEDIUM, 123cc1dc7a3Sopenharmony_ci 3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f, 1.0f 124cc1dc7a3Sopenharmony_ci }, { 125cc1dc7a3Sopenharmony_ci ASTCENC_PRE_THOROUGH, 126cc1dc7a3Sopenharmony_ci 4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f, 1.0f 127cc1dc7a3Sopenharmony_ci }, { 128cc1dc7a3Sopenharmony_ci ASTCENC_PRE_VERYTHOROUGH, 129cc1dc7a3Sopenharmony_ci 4, 256, 128, 64, 98, 4, 6, 8, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 1.0f 130cc1dc7a3Sopenharmony_ci }, { 131cc1dc7a3Sopenharmony_ci ASTCENC_PRE_EXHAUSTIVE, 132cc1dc7a3Sopenharmony_ci 4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 1.0f 133cc1dc7a3Sopenharmony_ci } 134cc1dc7a3Sopenharmony_ci}}; 135cc1dc7a3Sopenharmony_ci 136cc1dc7a3Sopenharmony_ci/** 137cc1dc7a3Sopenharmony_ci * @brief Validate CPU floating point meets assumptions made in the codec. 138cc1dc7a3Sopenharmony_ci * 139cc1dc7a3Sopenharmony_ci * The codec is written with the assumption that a float threaded through the @c if32 union will be 140cc1dc7a3Sopenharmony_ci * stored and reloaded as a 32-bit IEEE-754 float with round-to-nearest rounding. This is always the 141cc1dc7a3Sopenharmony_ci * case in an IEEE-754 compliant system, however not every system or compilation mode is actually 142cc1dc7a3Sopenharmony_ci * IEEE-754 compliant. This normally fails if the code is compiled with fast math enabled. 143cc1dc7a3Sopenharmony_ci * 144cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 145cc1dc7a3Sopenharmony_ci */ 146cc1dc7a3Sopenharmony_cistatic astcenc_error validate_cpu_float() 147cc1dc7a3Sopenharmony_ci{ 148cc1dc7a3Sopenharmony_ci if32 p; 149cc1dc7a3Sopenharmony_ci volatile float xprec_testval = 2.51f; 150cc1dc7a3Sopenharmony_ci p.f = xprec_testval + 12582912.0f; 151cc1dc7a3Sopenharmony_ci float q = p.f - 12582912.0f; 152cc1dc7a3Sopenharmony_ci 153cc1dc7a3Sopenharmony_ci if (q != 3.0f) 154cc1dc7a3Sopenharmony_ci { 155cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_CPU_FLOAT; 156cc1dc7a3Sopenharmony_ci } 157cc1dc7a3Sopenharmony_ci 158cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 159cc1dc7a3Sopenharmony_ci} 160cc1dc7a3Sopenharmony_ci 161cc1dc7a3Sopenharmony_ci/** 162cc1dc7a3Sopenharmony_ci * @brief Validate config profile. 163cc1dc7a3Sopenharmony_ci * 164cc1dc7a3Sopenharmony_ci * @param profile The profile to check. 165cc1dc7a3Sopenharmony_ci * 166cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 167cc1dc7a3Sopenharmony_ci */ 168cc1dc7a3Sopenharmony_cistatic astcenc_error validate_profile( 169cc1dc7a3Sopenharmony_ci astcenc_profile profile 170cc1dc7a3Sopenharmony_ci) { 171cc1dc7a3Sopenharmony_ci // Values in this enum are from an external user, so not guaranteed to be 172cc1dc7a3Sopenharmony_ci // bounded to the enum values 173cc1dc7a3Sopenharmony_ci switch (static_cast<int>(profile)) 174cc1dc7a3Sopenharmony_ci { 175cc1dc7a3Sopenharmony_ci case ASTCENC_PRF_LDR_SRGB: 176cc1dc7a3Sopenharmony_ci case ASTCENC_PRF_LDR: 177cc1dc7a3Sopenharmony_ci case ASTCENC_PRF_HDR_RGB_LDR_A: 178cc1dc7a3Sopenharmony_ci case ASTCENC_PRF_HDR: 179cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 180cc1dc7a3Sopenharmony_ci default: 181cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_PROFILE; 182cc1dc7a3Sopenharmony_ci } 183cc1dc7a3Sopenharmony_ci} 184cc1dc7a3Sopenharmony_ci 185cc1dc7a3Sopenharmony_ci/** 186cc1dc7a3Sopenharmony_ci * @brief Validate block size. 187cc1dc7a3Sopenharmony_ci * 188cc1dc7a3Sopenharmony_ci * @param block_x The block x dimensions. 189cc1dc7a3Sopenharmony_ci * @param block_y The block y dimensions. 190cc1dc7a3Sopenharmony_ci * @param block_z The block z dimensions. 191cc1dc7a3Sopenharmony_ci * 192cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 193cc1dc7a3Sopenharmony_ci */ 194cc1dc7a3Sopenharmony_cistatic astcenc_error validate_block_size( 195cc1dc7a3Sopenharmony_ci unsigned int block_x, 196cc1dc7a3Sopenharmony_ci unsigned int block_y, 197cc1dc7a3Sopenharmony_ci unsigned int block_z 198cc1dc7a3Sopenharmony_ci) { 199cc1dc7a3Sopenharmony_ci // Test if this is a legal block size at all 200cc1dc7a3Sopenharmony_ci bool is_legal = (((block_z <= 1) && is_legal_2d_block_size(block_x, block_y)) || 201cc1dc7a3Sopenharmony_ci ((block_z >= 2) && is_legal_3d_block_size(block_x, block_y, block_z))); 202cc1dc7a3Sopenharmony_ci if (!is_legal) 203cc1dc7a3Sopenharmony_ci { 204cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_BLOCK_SIZE; 205cc1dc7a3Sopenharmony_ci } 206cc1dc7a3Sopenharmony_ci 207cc1dc7a3Sopenharmony_ci // Test if this build has sufficient capacity for this block size 208cc1dc7a3Sopenharmony_ci bool have_capacity = (block_x * block_y * block_z) <= BLOCK_MAX_TEXELS; 209cc1dc7a3Sopenharmony_ci if (!have_capacity) 210cc1dc7a3Sopenharmony_ci { 211cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_NOT_IMPLEMENTED; 212cc1dc7a3Sopenharmony_ci } 213cc1dc7a3Sopenharmony_ci 214cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 215cc1dc7a3Sopenharmony_ci} 216cc1dc7a3Sopenharmony_ci 217cc1dc7a3Sopenharmony_ci/** 218cc1dc7a3Sopenharmony_ci * @brief Validate flags. 219cc1dc7a3Sopenharmony_ci * 220cc1dc7a3Sopenharmony_ci * @param profile The profile to check. 221cc1dc7a3Sopenharmony_ci * @param flags The flags to check. 222cc1dc7a3Sopenharmony_ci * 223cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 224cc1dc7a3Sopenharmony_ci */ 225cc1dc7a3Sopenharmony_cistatic astcenc_error validate_flags( 226cc1dc7a3Sopenharmony_ci astcenc_profile profile, 227cc1dc7a3Sopenharmony_ci unsigned int flags 228cc1dc7a3Sopenharmony_ci) { 229cc1dc7a3Sopenharmony_ci // Flags field must not contain any unknown flag bits 230cc1dc7a3Sopenharmony_ci unsigned int exMask = ~ASTCENC_ALL_FLAGS; 231cc1dc7a3Sopenharmony_ci if (popcount(flags & exMask) != 0) 232cc1dc7a3Sopenharmony_ci { 233cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_FLAGS; 234cc1dc7a3Sopenharmony_ci } 235cc1dc7a3Sopenharmony_ci 236cc1dc7a3Sopenharmony_ci // Flags field must only contain at most a single map type 237cc1dc7a3Sopenharmony_ci exMask = ASTCENC_FLG_MAP_NORMAL 238cc1dc7a3Sopenharmony_ci | ASTCENC_FLG_MAP_RGBM; 239cc1dc7a3Sopenharmony_ci if (popcount(flags & exMask) > 1) 240cc1dc7a3Sopenharmony_ci { 241cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_FLAGS; 242cc1dc7a3Sopenharmony_ci } 243cc1dc7a3Sopenharmony_ci 244cc1dc7a3Sopenharmony_ci // Decode_unorm8 must only be used with an LDR profile 245cc1dc7a3Sopenharmony_ci bool is_unorm8 = flags & ASTCENC_FLG_USE_DECODE_UNORM8; 246cc1dc7a3Sopenharmony_ci bool is_hdr = (profile == ASTCENC_PRF_HDR) || (profile == ASTCENC_PRF_HDR_RGB_LDR_A); 247cc1dc7a3Sopenharmony_ci if (is_unorm8 && is_hdr) 248cc1dc7a3Sopenharmony_ci { 249cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_DECODE_MODE; 250cc1dc7a3Sopenharmony_ci } 251cc1dc7a3Sopenharmony_ci 252cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 253cc1dc7a3Sopenharmony_ci} 254cc1dc7a3Sopenharmony_ci 255cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY) 256cc1dc7a3Sopenharmony_ci 257cc1dc7a3Sopenharmony_ci/** 258cc1dc7a3Sopenharmony_ci * @brief Validate single channel compression swizzle. 259cc1dc7a3Sopenharmony_ci * 260cc1dc7a3Sopenharmony_ci * @param swizzle The swizzle to check. 261cc1dc7a3Sopenharmony_ci * 262cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 263cc1dc7a3Sopenharmony_ci */ 264cc1dc7a3Sopenharmony_cistatic astcenc_error validate_compression_swz( 265cc1dc7a3Sopenharmony_ci astcenc_swz swizzle 266cc1dc7a3Sopenharmony_ci) { 267cc1dc7a3Sopenharmony_ci // Not all enum values are handled; SWZ_Z is invalid for compression 268cc1dc7a3Sopenharmony_ci switch (static_cast<int>(swizzle)) 269cc1dc7a3Sopenharmony_ci { 270cc1dc7a3Sopenharmony_ci case ASTCENC_SWZ_R: 271cc1dc7a3Sopenharmony_ci case ASTCENC_SWZ_G: 272cc1dc7a3Sopenharmony_ci case ASTCENC_SWZ_B: 273cc1dc7a3Sopenharmony_ci case ASTCENC_SWZ_A: 274cc1dc7a3Sopenharmony_ci case ASTCENC_SWZ_0: 275cc1dc7a3Sopenharmony_ci case ASTCENC_SWZ_1: 276cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 277cc1dc7a3Sopenharmony_ci default: 278cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_SWIZZLE; 279cc1dc7a3Sopenharmony_ci } 280cc1dc7a3Sopenharmony_ci} 281cc1dc7a3Sopenharmony_ci 282cc1dc7a3Sopenharmony_ci/** 283cc1dc7a3Sopenharmony_ci * @brief Validate overall compression swizzle. 284cc1dc7a3Sopenharmony_ci * 285cc1dc7a3Sopenharmony_ci * @param swizzle The swizzle to check. 286cc1dc7a3Sopenharmony_ci * 287cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 288cc1dc7a3Sopenharmony_ci */ 289cc1dc7a3Sopenharmony_cistatic astcenc_error validate_compression_swizzle( 290cc1dc7a3Sopenharmony_ci const astcenc_swizzle& swizzle 291cc1dc7a3Sopenharmony_ci) { 292cc1dc7a3Sopenharmony_ci if (validate_compression_swz(swizzle.r) || 293cc1dc7a3Sopenharmony_ci validate_compression_swz(swizzle.g) || 294cc1dc7a3Sopenharmony_ci validate_compression_swz(swizzle.b) || 295cc1dc7a3Sopenharmony_ci validate_compression_swz(swizzle.a)) 296cc1dc7a3Sopenharmony_ci { 297cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_SWIZZLE; 298cc1dc7a3Sopenharmony_ci } 299cc1dc7a3Sopenharmony_ci 300cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 301cc1dc7a3Sopenharmony_ci} 302cc1dc7a3Sopenharmony_ci#endif 303cc1dc7a3Sopenharmony_ci 304cc1dc7a3Sopenharmony_ci/** 305cc1dc7a3Sopenharmony_ci * @brief Validate single channel decompression swizzle. 306cc1dc7a3Sopenharmony_ci * 307cc1dc7a3Sopenharmony_ci * @param swizzle The swizzle to check. 308cc1dc7a3Sopenharmony_ci * 309cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 310cc1dc7a3Sopenharmony_ci */ 311cc1dc7a3Sopenharmony_cistatic astcenc_error validate_decompression_swz( 312cc1dc7a3Sopenharmony_ci astcenc_swz swizzle 313cc1dc7a3Sopenharmony_ci) { 314cc1dc7a3Sopenharmony_ci // Values in this enum are from an external user, so not guaranteed to be 315cc1dc7a3Sopenharmony_ci // bounded to the enum values 316cc1dc7a3Sopenharmony_ci switch (static_cast<int>(swizzle)) 317cc1dc7a3Sopenharmony_ci { 318cc1dc7a3Sopenharmony_ci case ASTCENC_SWZ_R: 319cc1dc7a3Sopenharmony_ci case ASTCENC_SWZ_G: 320cc1dc7a3Sopenharmony_ci case ASTCENC_SWZ_B: 321cc1dc7a3Sopenharmony_ci case ASTCENC_SWZ_A: 322cc1dc7a3Sopenharmony_ci case ASTCENC_SWZ_0: 323cc1dc7a3Sopenharmony_ci case ASTCENC_SWZ_1: 324cc1dc7a3Sopenharmony_ci case ASTCENC_SWZ_Z: 325cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 326cc1dc7a3Sopenharmony_ci default: 327cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_SWIZZLE; 328cc1dc7a3Sopenharmony_ci } 329cc1dc7a3Sopenharmony_ci} 330cc1dc7a3Sopenharmony_ci 331cc1dc7a3Sopenharmony_ci/** 332cc1dc7a3Sopenharmony_ci * @brief Validate overall decompression swizzle. 333cc1dc7a3Sopenharmony_ci * 334cc1dc7a3Sopenharmony_ci * @param swizzle The swizzle to check. 335cc1dc7a3Sopenharmony_ci * 336cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 337cc1dc7a3Sopenharmony_ci */ 338cc1dc7a3Sopenharmony_cistatic astcenc_error validate_decompression_swizzle( 339cc1dc7a3Sopenharmony_ci const astcenc_swizzle& swizzle 340cc1dc7a3Sopenharmony_ci) { 341cc1dc7a3Sopenharmony_ci if (validate_decompression_swz(swizzle.r) || 342cc1dc7a3Sopenharmony_ci validate_decompression_swz(swizzle.g) || 343cc1dc7a3Sopenharmony_ci validate_decompression_swz(swizzle.b) || 344cc1dc7a3Sopenharmony_ci validate_decompression_swz(swizzle.a)) 345cc1dc7a3Sopenharmony_ci { 346cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_SWIZZLE; 347cc1dc7a3Sopenharmony_ci } 348cc1dc7a3Sopenharmony_ci 349cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 350cc1dc7a3Sopenharmony_ci} 351cc1dc7a3Sopenharmony_ci 352cc1dc7a3Sopenharmony_ci/** 353cc1dc7a3Sopenharmony_ci * Validate that an incoming configuration is in-spec. 354cc1dc7a3Sopenharmony_ci * 355cc1dc7a3Sopenharmony_ci * This function can respond in two ways: 356cc1dc7a3Sopenharmony_ci * 357cc1dc7a3Sopenharmony_ci * * Numerical inputs that have valid ranges are clamped to those valid ranges. No error is thrown 358cc1dc7a3Sopenharmony_ci * for out-of-range inputs in this case. 359cc1dc7a3Sopenharmony_ci * * Numerical inputs and logic inputs are are logically invalid and which make no sense 360cc1dc7a3Sopenharmony_ci * algorithmically will return an error. 361cc1dc7a3Sopenharmony_ci * 362cc1dc7a3Sopenharmony_ci * @param[in,out] config The input compressor configuration. 363cc1dc7a3Sopenharmony_ci * 364cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. 365cc1dc7a3Sopenharmony_ci */ 366cc1dc7a3Sopenharmony_cistatic astcenc_error validate_config( 367cc1dc7a3Sopenharmony_ci astcenc_config &config 368cc1dc7a3Sopenharmony_ci) { 369cc1dc7a3Sopenharmony_ci astcenc_error status; 370cc1dc7a3Sopenharmony_ci 371cc1dc7a3Sopenharmony_ci status = validate_profile(config.profile); 372cc1dc7a3Sopenharmony_ci if (status != ASTCENC_SUCCESS) 373cc1dc7a3Sopenharmony_ci { 374cc1dc7a3Sopenharmony_ci return status; 375cc1dc7a3Sopenharmony_ci } 376cc1dc7a3Sopenharmony_ci 377cc1dc7a3Sopenharmony_ci status = validate_flags(config.profile, config.flags); 378cc1dc7a3Sopenharmony_ci if (status != ASTCENC_SUCCESS) 379cc1dc7a3Sopenharmony_ci { 380cc1dc7a3Sopenharmony_ci return status; 381cc1dc7a3Sopenharmony_ci } 382cc1dc7a3Sopenharmony_ci 383cc1dc7a3Sopenharmony_ci status = validate_block_size(config.block_x, config.block_y, config.block_z); 384cc1dc7a3Sopenharmony_ci if (status != ASTCENC_SUCCESS) 385cc1dc7a3Sopenharmony_ci { 386cc1dc7a3Sopenharmony_ci return status; 387cc1dc7a3Sopenharmony_ci } 388cc1dc7a3Sopenharmony_ci 389cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DECOMPRESS_ONLY) 390cc1dc7a3Sopenharmony_ci // Decompress-only builds only support decompress-only contexts 391cc1dc7a3Sopenharmony_ci if (!(config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)) 392cc1dc7a3Sopenharmony_ci { 393cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_PARAM; 394cc1dc7a3Sopenharmony_ci } 395cc1dc7a3Sopenharmony_ci#endif 396cc1dc7a3Sopenharmony_ci 397cc1dc7a3Sopenharmony_ci config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f); 398cc1dc7a3Sopenharmony_ci 399cc1dc7a3Sopenharmony_ci config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u); 400cc1dc7a3Sopenharmony_ci config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); 401cc1dc7a3Sopenharmony_ci config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); 402cc1dc7a3Sopenharmony_ci config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); 403cc1dc7a3Sopenharmony_ci config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u); 404cc1dc7a3Sopenharmony_ci config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u); 405cc1dc7a3Sopenharmony_ci config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES); 406cc1dc7a3Sopenharmony_ci config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); 407cc1dc7a3Sopenharmony_ci config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); 408cc1dc7a3Sopenharmony_ci config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); 409cc1dc7a3Sopenharmony_ci config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f); 410cc1dc7a3Sopenharmony_ci config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f); 411cc1dc7a3Sopenharmony_ci config.tune_2partition_early_out_limit_factor = astc::max(config.tune_2partition_early_out_limit_factor, 0.0f); 412cc1dc7a3Sopenharmony_ci config.tune_3partition_early_out_limit_factor = astc::max(config.tune_3partition_early_out_limit_factor, 0.0f); 413cc1dc7a3Sopenharmony_ci config.tune_2plane_early_out_limit_correlation = astc::max(config.tune_2plane_early_out_limit_correlation, 0.0f); 414cc1dc7a3Sopenharmony_ci 415cc1dc7a3Sopenharmony_ci // Specifying a zero weight color component is not allowed; force to small value 416cc1dc7a3Sopenharmony_ci float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight), 417cc1dc7a3Sopenharmony_ci astc::max(config.cw_b_weight, config.cw_a_weight)); 418cc1dc7a3Sopenharmony_ci if (max_weight > 0.0f) 419cc1dc7a3Sopenharmony_ci { 420cc1dc7a3Sopenharmony_ci max_weight /= 1000.0f; 421cc1dc7a3Sopenharmony_ci config.cw_r_weight = astc::max(config.cw_r_weight, max_weight); 422cc1dc7a3Sopenharmony_ci config.cw_g_weight = astc::max(config.cw_g_weight, max_weight); 423cc1dc7a3Sopenharmony_ci config.cw_b_weight = astc::max(config.cw_b_weight, max_weight); 424cc1dc7a3Sopenharmony_ci config.cw_a_weight = astc::max(config.cw_a_weight, max_weight); 425cc1dc7a3Sopenharmony_ci } 426cc1dc7a3Sopenharmony_ci // If all color components error weights are zero then return an error 427cc1dc7a3Sopenharmony_ci else 428cc1dc7a3Sopenharmony_ci { 429cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_PARAM; 430cc1dc7a3Sopenharmony_ci } 431cc1dc7a3Sopenharmony_ci 432cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 433cc1dc7a3Sopenharmony_ci} 434cc1dc7a3Sopenharmony_ci 435cc1dc7a3Sopenharmony_ci/* See header for documentation. */ 436cc1dc7a3Sopenharmony_ciastcenc_error astcenc_config_init( 437cc1dc7a3Sopenharmony_ci astcenc_profile profile, 438cc1dc7a3Sopenharmony_ci unsigned int block_x, 439cc1dc7a3Sopenharmony_ci unsigned int block_y, 440cc1dc7a3Sopenharmony_ci unsigned int block_z, 441cc1dc7a3Sopenharmony_ci float quality, 442cc1dc7a3Sopenharmony_ci unsigned int flags, 443cc1dc7a3Sopenharmony_ci astcenc_config* configp 444cc1dc7a3Sopenharmony_ci) { 445cc1dc7a3Sopenharmony_ci astcenc_error status; 446cc1dc7a3Sopenharmony_ci 447cc1dc7a3Sopenharmony_ci status = validate_cpu_float(); 448cc1dc7a3Sopenharmony_ci if (status != ASTCENC_SUCCESS) 449cc1dc7a3Sopenharmony_ci { 450cc1dc7a3Sopenharmony_ci return status; 451cc1dc7a3Sopenharmony_ci } 452cc1dc7a3Sopenharmony_ci 453cc1dc7a3Sopenharmony_ci // Zero init all config fields; although most of will be over written 454cc1dc7a3Sopenharmony_ci astcenc_config& config = *configp; 455cc1dc7a3Sopenharmony_ci std::memset(&config, 0, sizeof(config)); 456cc1dc7a3Sopenharmony_ci 457cc1dc7a3Sopenharmony_ci // Process the block size 458cc1dc7a3Sopenharmony_ci block_z = astc::max(block_z, 1u); // For 2D blocks Z==0 is accepted, but convert to 1 459cc1dc7a3Sopenharmony_ci status = validate_block_size(block_x, block_y, block_z); 460cc1dc7a3Sopenharmony_ci if (status != ASTCENC_SUCCESS) 461cc1dc7a3Sopenharmony_ci { 462cc1dc7a3Sopenharmony_ci return status; 463cc1dc7a3Sopenharmony_ci } 464cc1dc7a3Sopenharmony_ci 465cc1dc7a3Sopenharmony_ci config.block_x = block_x; 466cc1dc7a3Sopenharmony_ci config.block_y = block_y; 467cc1dc7a3Sopenharmony_ci config.block_z = block_z; 468cc1dc7a3Sopenharmony_ci 469cc1dc7a3Sopenharmony_ci float texels = static_cast<float>(block_x * block_y * block_z); 470cc1dc7a3Sopenharmony_ci float ltexels = logf(texels) / logf(10.0f); 471cc1dc7a3Sopenharmony_ci 472cc1dc7a3Sopenharmony_ci // Process the performance quality level or preset; note that this must be done before we 473cc1dc7a3Sopenharmony_ci // process any additional settings, such as color profile and flags, which may replace some of 474cc1dc7a3Sopenharmony_ci // these settings with more use case tuned values 475cc1dc7a3Sopenharmony_ci if (quality < ASTCENC_PRE_FASTEST || 476cc1dc7a3Sopenharmony_ci quality > ASTCENC_PRE_EXHAUSTIVE) 477cc1dc7a3Sopenharmony_ci { 478cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_QUALITY; 479cc1dc7a3Sopenharmony_ci } 480cc1dc7a3Sopenharmony_ci 481cc1dc7a3Sopenharmony_ci static const std::array<astcenc_preset_config, 6>* preset_configs; 482cc1dc7a3Sopenharmony_ci int texels_int = block_x * block_y * block_z; 483cc1dc7a3Sopenharmony_ci if (texels_int < 25) 484cc1dc7a3Sopenharmony_ci { 485cc1dc7a3Sopenharmony_ci preset_configs = &preset_configs_high; 486cc1dc7a3Sopenharmony_ci } 487cc1dc7a3Sopenharmony_ci else if (texels_int < 64) 488cc1dc7a3Sopenharmony_ci { 489cc1dc7a3Sopenharmony_ci preset_configs = &preset_configs_mid; 490cc1dc7a3Sopenharmony_ci } 491cc1dc7a3Sopenharmony_ci else 492cc1dc7a3Sopenharmony_ci { 493cc1dc7a3Sopenharmony_ci preset_configs = &preset_configs_low; 494cc1dc7a3Sopenharmony_ci } 495cc1dc7a3Sopenharmony_ci 496cc1dc7a3Sopenharmony_ci // Determine which preset to use, or which pair to interpolate 497cc1dc7a3Sopenharmony_ci size_t start; 498cc1dc7a3Sopenharmony_ci size_t end; 499cc1dc7a3Sopenharmony_ci for (end = 0; end < preset_configs->size(); end++) 500cc1dc7a3Sopenharmony_ci { 501cc1dc7a3Sopenharmony_ci if ((*preset_configs)[end].quality >= quality) 502cc1dc7a3Sopenharmony_ci { 503cc1dc7a3Sopenharmony_ci break; 504cc1dc7a3Sopenharmony_ci } 505cc1dc7a3Sopenharmony_ci } 506cc1dc7a3Sopenharmony_ci 507cc1dc7a3Sopenharmony_ci start = end == 0 ? 0 : end - 1; 508cc1dc7a3Sopenharmony_ci 509cc1dc7a3Sopenharmony_ci // Start and end node are the same - so just transfer the values. 510cc1dc7a3Sopenharmony_ci if (start == end) 511cc1dc7a3Sopenharmony_ci { 512cc1dc7a3Sopenharmony_ci config.tune_partition_count_limit = (*preset_configs)[start].tune_partition_count_limit; 513cc1dc7a3Sopenharmony_ci config.tune_2partition_index_limit = (*preset_configs)[start].tune_2partition_index_limit; 514cc1dc7a3Sopenharmony_ci config.tune_3partition_index_limit = (*preset_configs)[start].tune_3partition_index_limit; 515cc1dc7a3Sopenharmony_ci config.tune_4partition_index_limit = (*preset_configs)[start].tune_4partition_index_limit; 516cc1dc7a3Sopenharmony_ci config.tune_block_mode_limit = (*preset_configs)[start].tune_block_mode_limit; 517cc1dc7a3Sopenharmony_ci config.tune_refinement_limit = (*preset_configs)[start].tune_refinement_limit; 518cc1dc7a3Sopenharmony_ci config.tune_candidate_limit = (*preset_configs)[start].tune_candidate_limit; 519cc1dc7a3Sopenharmony_ci config.tune_2partitioning_candidate_limit = (*preset_configs)[start].tune_2partitioning_candidate_limit; 520cc1dc7a3Sopenharmony_ci config.tune_3partitioning_candidate_limit = (*preset_configs)[start].tune_3partitioning_candidate_limit; 521cc1dc7a3Sopenharmony_ci config.tune_4partitioning_candidate_limit = (*preset_configs)[start].tune_4partitioning_candidate_limit; 522cc1dc7a3Sopenharmony_ci config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels, 523cc1dc7a3Sopenharmony_ci (*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels); 524cc1dc7a3Sopenharmony_ci 525cc1dc7a3Sopenharmony_ci config.tune_mse_overshoot = (*preset_configs)[start].tune_mse_overshoot; 526cc1dc7a3Sopenharmony_ci 527cc1dc7a3Sopenharmony_ci config.tune_2partition_early_out_limit_factor = (*preset_configs)[start].tune_2partition_early_out_limit_factor; 528cc1dc7a3Sopenharmony_ci config.tune_3partition_early_out_limit_factor = (*preset_configs)[start].tune_3partition_early_out_limit_factor; 529cc1dc7a3Sopenharmony_ci config.tune_2plane_early_out_limit_correlation = (*preset_configs)[start].tune_2plane_early_out_limit_correlation; 530cc1dc7a3Sopenharmony_ci config.tune_search_mode0_enable = (*preset_configs)[start].tune_search_mode0_enable; 531cc1dc7a3Sopenharmony_ci } 532cc1dc7a3Sopenharmony_ci // Start and end node are not the same - so interpolate between them 533cc1dc7a3Sopenharmony_ci else 534cc1dc7a3Sopenharmony_ci { 535cc1dc7a3Sopenharmony_ci auto& node_a = (*preset_configs)[start]; 536cc1dc7a3Sopenharmony_ci auto& node_b = (*preset_configs)[end]; 537cc1dc7a3Sopenharmony_ci 538cc1dc7a3Sopenharmony_ci float wt_range = node_b.quality - node_a.quality; 539cc1dc7a3Sopenharmony_ci assert(wt_range > 0); 540cc1dc7a3Sopenharmony_ci 541cc1dc7a3Sopenharmony_ci // Compute interpolation factors 542cc1dc7a3Sopenharmony_ci float wt_node_a = (node_b.quality - quality) / wt_range; 543cc1dc7a3Sopenharmony_ci float wt_node_b = (quality - node_a.quality) / wt_range; 544cc1dc7a3Sopenharmony_ci 545cc1dc7a3Sopenharmony_ci #define LERP(param) ((node_a.param * wt_node_a) + (node_b.param * wt_node_b)) 546cc1dc7a3Sopenharmony_ci #define LERPI(param) astc::flt2int_rtn(\ 547cc1dc7a3Sopenharmony_ci (static_cast<float>(node_a.param) * wt_node_a) + \ 548cc1dc7a3Sopenharmony_ci (static_cast<float>(node_b.param) * wt_node_b)) 549cc1dc7a3Sopenharmony_ci #define LERPUI(param) static_cast<unsigned int>(LERPI(param)) 550cc1dc7a3Sopenharmony_ci 551cc1dc7a3Sopenharmony_ci config.tune_partition_count_limit = LERPI(tune_partition_count_limit); 552cc1dc7a3Sopenharmony_ci config.tune_2partition_index_limit = LERPI(tune_2partition_index_limit); 553cc1dc7a3Sopenharmony_ci config.tune_3partition_index_limit = LERPI(tune_3partition_index_limit); 554cc1dc7a3Sopenharmony_ci config.tune_4partition_index_limit = LERPI(tune_4partition_index_limit); 555cc1dc7a3Sopenharmony_ci config.tune_block_mode_limit = LERPI(tune_block_mode_limit); 556cc1dc7a3Sopenharmony_ci config.tune_refinement_limit = LERPI(tune_refinement_limit); 557cc1dc7a3Sopenharmony_ci config.tune_candidate_limit = LERPUI(tune_candidate_limit); 558cc1dc7a3Sopenharmony_ci config.tune_2partitioning_candidate_limit = LERPUI(tune_2partitioning_candidate_limit); 559cc1dc7a3Sopenharmony_ci config.tune_3partitioning_candidate_limit = LERPUI(tune_3partitioning_candidate_limit); 560cc1dc7a3Sopenharmony_ci config.tune_4partitioning_candidate_limit = LERPUI(tune_4partitioning_candidate_limit); 561cc1dc7a3Sopenharmony_ci config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels, 562cc1dc7a3Sopenharmony_ci LERP(tune_db_limit_b_base) - 19 * ltexels); 563cc1dc7a3Sopenharmony_ci 564cc1dc7a3Sopenharmony_ci config.tune_mse_overshoot = LERP(tune_mse_overshoot); 565cc1dc7a3Sopenharmony_ci 566cc1dc7a3Sopenharmony_ci config.tune_2partition_early_out_limit_factor = LERP(tune_2partition_early_out_limit_factor); 567cc1dc7a3Sopenharmony_ci config.tune_3partition_early_out_limit_factor = LERP(tune_3partition_early_out_limit_factor); 568cc1dc7a3Sopenharmony_ci config.tune_2plane_early_out_limit_correlation = LERP(tune_2plane_early_out_limit_correlation); 569cc1dc7a3Sopenharmony_ci config.tune_search_mode0_enable = LERP(tune_search_mode0_enable); 570cc1dc7a3Sopenharmony_ci #undef LERP 571cc1dc7a3Sopenharmony_ci #undef LERPI 572cc1dc7a3Sopenharmony_ci #undef LERPUI 573cc1dc7a3Sopenharmony_ci } 574cc1dc7a3Sopenharmony_ci 575cc1dc7a3Sopenharmony_ci // Set heuristics to the defaults for each color profile 576cc1dc7a3Sopenharmony_ci config.cw_r_weight = 1.0f; 577cc1dc7a3Sopenharmony_ci config.cw_g_weight = 1.0f; 578cc1dc7a3Sopenharmony_ci config.cw_b_weight = 1.0f; 579cc1dc7a3Sopenharmony_ci config.cw_a_weight = 1.0f; 580cc1dc7a3Sopenharmony_ci 581cc1dc7a3Sopenharmony_ci config.a_scale_radius = 0; 582cc1dc7a3Sopenharmony_ci 583cc1dc7a3Sopenharmony_ci config.rgbm_m_scale = 0.0f; 584cc1dc7a3Sopenharmony_ci 585cc1dc7a3Sopenharmony_ci config.profile = profile; 586cc1dc7a3Sopenharmony_ci 587cc1dc7a3Sopenharmony_ci // Values in this enum are from an external user, so not guaranteed to be 588cc1dc7a3Sopenharmony_ci // bounded to the enum values 589cc1dc7a3Sopenharmony_ci switch (static_cast<int>(profile)) 590cc1dc7a3Sopenharmony_ci { 591cc1dc7a3Sopenharmony_ci case ASTCENC_PRF_LDR: 592cc1dc7a3Sopenharmony_ci case ASTCENC_PRF_LDR_SRGB: 593cc1dc7a3Sopenharmony_ci break; 594cc1dc7a3Sopenharmony_ci case ASTCENC_PRF_HDR_RGB_LDR_A: 595cc1dc7a3Sopenharmony_ci case ASTCENC_PRF_HDR: 596cc1dc7a3Sopenharmony_ci config.tune_db_limit = 999.0f; 597cc1dc7a3Sopenharmony_ci config.tune_search_mode0_enable = 0.0f; 598cc1dc7a3Sopenharmony_ci break; 599cc1dc7a3Sopenharmony_ci default: 600cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_PROFILE; 601cc1dc7a3Sopenharmony_ci } 602cc1dc7a3Sopenharmony_ci 603cc1dc7a3Sopenharmony_ci // Flags field must not contain any unknown flag bits 604cc1dc7a3Sopenharmony_ci status = validate_flags(profile, flags); 605cc1dc7a3Sopenharmony_ci if (status != ASTCENC_SUCCESS) 606cc1dc7a3Sopenharmony_ci { 607cc1dc7a3Sopenharmony_ci return status; 608cc1dc7a3Sopenharmony_ci } 609cc1dc7a3Sopenharmony_ci 610cc1dc7a3Sopenharmony_ci if (flags & ASTCENC_FLG_MAP_NORMAL) 611cc1dc7a3Sopenharmony_ci { 612cc1dc7a3Sopenharmony_ci // Normal map encoding uses L+A blocks, so allow one more partitioning 613cc1dc7a3Sopenharmony_ci // than normal. We need need fewer bits for endpoints, so more likely 614cc1dc7a3Sopenharmony_ci // to be able to use more partitions than an RGB/RGBA block 615cc1dc7a3Sopenharmony_ci config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1u, 4u); 616cc1dc7a3Sopenharmony_ci 617cc1dc7a3Sopenharmony_ci config.cw_g_weight = 0.0f; 618cc1dc7a3Sopenharmony_ci config.cw_b_weight = 0.0f; 619cc1dc7a3Sopenharmony_ci config.tune_2partition_early_out_limit_factor *= 1.5f; 620cc1dc7a3Sopenharmony_ci config.tune_3partition_early_out_limit_factor *= 1.5f; 621cc1dc7a3Sopenharmony_ci config.tune_2plane_early_out_limit_correlation = 0.99f; 622cc1dc7a3Sopenharmony_ci 623cc1dc7a3Sopenharmony_ci // Normals are prone to blocking artifacts on smooth curves 624cc1dc7a3Sopenharmony_ci // so force compressor to try harder here ... 625cc1dc7a3Sopenharmony_ci config.tune_db_limit *= 1.03f; 626cc1dc7a3Sopenharmony_ci } 627cc1dc7a3Sopenharmony_ci else if (flags & ASTCENC_FLG_MAP_RGBM) 628cc1dc7a3Sopenharmony_ci { 629cc1dc7a3Sopenharmony_ci config.rgbm_m_scale = 5.0f; 630cc1dc7a3Sopenharmony_ci config.cw_a_weight = 2.0f * config.rgbm_m_scale; 631cc1dc7a3Sopenharmony_ci } 632cc1dc7a3Sopenharmony_ci else // (This is color data) 633cc1dc7a3Sopenharmony_ci { 634cc1dc7a3Sopenharmony_ci // This is a very basic perceptual metric for RGB color data, which weights error 635cc1dc7a3Sopenharmony_ci // significance by the perceptual luminance contribution of each color channel. For 636cc1dc7a3Sopenharmony_ci // luminance the usual weights to compute luminance from a linear RGB value are as 637cc1dc7a3Sopenharmony_ci // follows: 638cc1dc7a3Sopenharmony_ci // 639cc1dc7a3Sopenharmony_ci // l = r * 0.3 + g * 0.59 + b * 0.11 640cc1dc7a3Sopenharmony_ci // 641cc1dc7a3Sopenharmony_ci // ... but we scale these up to keep a better balance between color and alpha. Note 642cc1dc7a3Sopenharmony_ci // that if the content is using alpha we'd recommend using the -a option to weight 643cc1dc7a3Sopenharmony_ci // the color contribution by the alpha transparency. 644cc1dc7a3Sopenharmony_ci if (flags & ASTCENC_FLG_USE_PERCEPTUAL) 645cc1dc7a3Sopenharmony_ci { 646cc1dc7a3Sopenharmony_ci config.cw_r_weight = 0.30f * 2.25f; 647cc1dc7a3Sopenharmony_ci config.cw_g_weight = 0.59f * 2.25f; 648cc1dc7a3Sopenharmony_ci config.cw_b_weight = 0.11f * 2.25f; 649cc1dc7a3Sopenharmony_ci } 650cc1dc7a3Sopenharmony_ci } 651cc1dc7a3Sopenharmony_ci config.flags = flags; 652cc1dc7a3Sopenharmony_ci 653cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 654cc1dc7a3Sopenharmony_ci} 655cc1dc7a3Sopenharmony_ci 656cc1dc7a3Sopenharmony_ci/* See header for documentation. */ 657cc1dc7a3Sopenharmony_ciastcenc_error astcenc_context_alloc( 658cc1dc7a3Sopenharmony_ci const astcenc_config* configp, 659cc1dc7a3Sopenharmony_ci unsigned int thread_count, 660cc1dc7a3Sopenharmony_ci astcenc_context** context 661cc1dc7a3Sopenharmony_ci) { 662cc1dc7a3Sopenharmony_ci astcenc_error status; 663cc1dc7a3Sopenharmony_ci const astcenc_config& config = *configp; 664cc1dc7a3Sopenharmony_ci 665cc1dc7a3Sopenharmony_ci status = validate_cpu_float(); 666cc1dc7a3Sopenharmony_ci if (status != ASTCENC_SUCCESS) 667cc1dc7a3Sopenharmony_ci { 668cc1dc7a3Sopenharmony_ci return status; 669cc1dc7a3Sopenharmony_ci } 670cc1dc7a3Sopenharmony_ci 671cc1dc7a3Sopenharmony_ci if (thread_count == 0) 672cc1dc7a3Sopenharmony_ci { 673cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_PARAM; 674cc1dc7a3Sopenharmony_ci } 675cc1dc7a3Sopenharmony_ci 676cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DIAGNOSTICS) 677cc1dc7a3Sopenharmony_ci // Force single threaded compressor use in diagnostic mode. 678cc1dc7a3Sopenharmony_ci if (thread_count != 1) 679cc1dc7a3Sopenharmony_ci { 680cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_PARAM; 681cc1dc7a3Sopenharmony_ci } 682cc1dc7a3Sopenharmony_ci#endif 683cc1dc7a3Sopenharmony_ci 684cc1dc7a3Sopenharmony_ci#ifndef ASTC_CUSTOMIZED_ENABLE 685cc1dc7a3Sopenharmony_ci if (config.privateProfile == CUSTOMIZED_PROFILE) 686cc1dc7a3Sopenharmony_ci { 687cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_PARAM; 688cc1dc7a3Sopenharmony_ci } 689cc1dc7a3Sopenharmony_ci#endif 690cc1dc7a3Sopenharmony_ci 691cc1dc7a3Sopenharmony_ci astcenc_context* ctxo = new astcenc_context; 692cc1dc7a3Sopenharmony_ci astcenc_contexti* ctx = &ctxo->context; 693cc1dc7a3Sopenharmony_ci ctx->thread_count = thread_count; 694cc1dc7a3Sopenharmony_ci ctx->config = config; 695cc1dc7a3Sopenharmony_ci ctx->working_buffers = nullptr; 696cc1dc7a3Sopenharmony_ci 697cc1dc7a3Sopenharmony_ci // These are allocated per-compress, as they depend on image size 698cc1dc7a3Sopenharmony_ci ctx->input_alpha_averages = nullptr; 699cc1dc7a3Sopenharmony_ci 700cc1dc7a3Sopenharmony_ci // Copy the config first and validate the copy (we may modify it) 701cc1dc7a3Sopenharmony_ci status = validate_config(ctx->config); 702cc1dc7a3Sopenharmony_ci if (status != ASTCENC_SUCCESS) 703cc1dc7a3Sopenharmony_ci { 704cc1dc7a3Sopenharmony_ci delete ctxo; 705cc1dc7a3Sopenharmony_ci return status; 706cc1dc7a3Sopenharmony_ci } 707cc1dc7a3Sopenharmony_ci 708cc1dc7a3Sopenharmony_ci ctx->bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN); 709cc1dc7a3Sopenharmony_ci if (!ctx->bsd) 710cc1dc7a3Sopenharmony_ci { 711cc1dc7a3Sopenharmony_ci delete ctxo; 712cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_OUT_OF_MEM; 713cc1dc7a3Sopenharmony_ci } 714cc1dc7a3Sopenharmony_ci 715cc1dc7a3Sopenharmony_ci bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY); 716cc1dc7a3Sopenharmony_ci#ifdef ASTC_CUSTOMIZED_ENABLE 717cc1dc7a3Sopenharmony_ci if (!init_block_size_descriptor(ctx->config.privateProfile, config.block_x, config.block_y, config.block_z, 718cc1dc7a3Sopenharmony_ci can_omit_modes, 719cc1dc7a3Sopenharmony_ci config.tune_partition_count_limit, 720cc1dc7a3Sopenharmony_ci static_cast<float>(config.tune_block_mode_limit) / 100.0f, 721cc1dc7a3Sopenharmony_ci *ctx->bsd)) 722cc1dc7a3Sopenharmony_ci { 723cc1dc7a3Sopenharmony_ci aligned_free<block_size_descriptor>(ctx->bsd); 724cc1dc7a3Sopenharmony_ci delete ctxo; 725cc1dc7a3Sopenharmony_ci *context = nullptr; 726cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_DLOPEN_FAILED; 727cc1dc7a3Sopenharmony_ci } 728cc1dc7a3Sopenharmony_ci#else 729cc1dc7a3Sopenharmony_ci init_block_size_descriptor(ctx->config.privateProfile, config.block_x, config.block_y, config.block_z, 730cc1dc7a3Sopenharmony_ci can_omit_modes, 731cc1dc7a3Sopenharmony_ci config.tune_partition_count_limit, 732cc1dc7a3Sopenharmony_ci static_cast<float>(config.tune_block_mode_limit) / 100.0f, 733cc1dc7a3Sopenharmony_ci *ctx->bsd); 734cc1dc7a3Sopenharmony_ci#endif 735cc1dc7a3Sopenharmony_ci 736cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY) 737cc1dc7a3Sopenharmony_ci // Do setup only needed by compression 738cc1dc7a3Sopenharmony_ci if (!(ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)) 739cc1dc7a3Sopenharmony_ci { 740cc1dc7a3Sopenharmony_ci // Turn a dB limit into a per-texel error for faster use later 741cc1dc7a3Sopenharmony_ci if ((ctx->config.profile == ASTCENC_PRF_LDR) || (ctx->config.profile == ASTCENC_PRF_LDR_SRGB)) 742cc1dc7a3Sopenharmony_ci { 743cc1dc7a3Sopenharmony_ci ctx->config.tune_db_limit = astc::pow(0.1f, ctx->config.tune_db_limit * 0.1f) * 65535.0f * 65535.0f; 744cc1dc7a3Sopenharmony_ci } 745cc1dc7a3Sopenharmony_ci else 746cc1dc7a3Sopenharmony_ci { 747cc1dc7a3Sopenharmony_ci ctx->config.tune_db_limit = 0.0f; 748cc1dc7a3Sopenharmony_ci } 749cc1dc7a3Sopenharmony_ci 750cc1dc7a3Sopenharmony_ci size_t worksize = sizeof(compression_working_buffers) * thread_count; 751cc1dc7a3Sopenharmony_ci ctx->working_buffers = aligned_malloc<compression_working_buffers>(worksize, ASTCENC_VECALIGN); 752cc1dc7a3Sopenharmony_ci static_assert((ASTCENC_VECALIGN == 0) || ((sizeof(compression_working_buffers) % ASTCENC_VECALIGN) == 0), 753cc1dc7a3Sopenharmony_ci "compression_working_buffers size must be multiple of vector alignment"); 754cc1dc7a3Sopenharmony_ci if (!ctx->working_buffers) 755cc1dc7a3Sopenharmony_ci { 756cc1dc7a3Sopenharmony_ci aligned_free<block_size_descriptor>(ctx->bsd); 757cc1dc7a3Sopenharmony_ci delete ctxo; 758cc1dc7a3Sopenharmony_ci *context = nullptr; 759cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_OUT_OF_MEM; 760cc1dc7a3Sopenharmony_ci } 761cc1dc7a3Sopenharmony_ci } 762cc1dc7a3Sopenharmony_ci#endif 763cc1dc7a3Sopenharmony_ci 764cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DIAGNOSTICS) 765cc1dc7a3Sopenharmony_ci ctx->trace_log = new TraceLog(ctx->config.trace_file_path); 766cc1dc7a3Sopenharmony_ci if (!ctx->trace_log->m_file) 767cc1dc7a3Sopenharmony_ci { 768cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_DTRACE_FAILURE; 769cc1dc7a3Sopenharmony_ci } 770cc1dc7a3Sopenharmony_ci 771cc1dc7a3Sopenharmony_ci trace_add_data("block_x", config.block_x); 772cc1dc7a3Sopenharmony_ci trace_add_data("block_y", config.block_y); 773cc1dc7a3Sopenharmony_ci trace_add_data("block_z", config.block_z); 774cc1dc7a3Sopenharmony_ci#endif 775cc1dc7a3Sopenharmony_ci 776cc1dc7a3Sopenharmony_ci *context = ctxo; 777cc1dc7a3Sopenharmony_ci 778cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY) 779cc1dc7a3Sopenharmony_ci prepare_angular_tables(); 780cc1dc7a3Sopenharmony_ci#endif 781cc1dc7a3Sopenharmony_ci 782cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 783cc1dc7a3Sopenharmony_ci} 784cc1dc7a3Sopenharmony_ci 785cc1dc7a3Sopenharmony_ci/* See header dor documentation. */ 786cc1dc7a3Sopenharmony_civoid astcenc_context_free( 787cc1dc7a3Sopenharmony_ci astcenc_context* ctxo 788cc1dc7a3Sopenharmony_ci) { 789cc1dc7a3Sopenharmony_ci if (ctxo) 790cc1dc7a3Sopenharmony_ci { 791cc1dc7a3Sopenharmony_ci astcenc_contexti* ctx = &ctxo->context; 792cc1dc7a3Sopenharmony_ci if (ctx->working_buffers) 793cc1dc7a3Sopenharmony_ci { 794cc1dc7a3Sopenharmony_ci aligned_free<compression_working_buffers>(ctx->working_buffers); 795cc1dc7a3Sopenharmony_ci } 796cc1dc7a3Sopenharmony_ci else 797cc1dc7a3Sopenharmony_ci { 798cc1dc7a3Sopenharmony_ci printf("ctx->working_buffers is nullptr !!\n"); 799cc1dc7a3Sopenharmony_ci } 800cc1dc7a3Sopenharmony_ci if (ctx->bsd) 801cc1dc7a3Sopenharmony_ci { 802cc1dc7a3Sopenharmony_ci aligned_free<block_size_descriptor>(ctx->bsd); 803cc1dc7a3Sopenharmony_ci } 804cc1dc7a3Sopenharmony_ci else 805cc1dc7a3Sopenharmony_ci { 806cc1dc7a3Sopenharmony_ci printf("ctx->bsd is nullptr !!\n"); 807cc1dc7a3Sopenharmony_ci } 808cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DIAGNOSTICS) 809cc1dc7a3Sopenharmony_ci delete ctx->trace_log; 810cc1dc7a3Sopenharmony_ci#endif 811cc1dc7a3Sopenharmony_ci delete ctxo; 812cc1dc7a3Sopenharmony_ci } 813cc1dc7a3Sopenharmony_ci} 814cc1dc7a3Sopenharmony_ci 815cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY) 816cc1dc7a3Sopenharmony_ci 817cc1dc7a3Sopenharmony_ci/** 818cc1dc7a3Sopenharmony_ci * @brief Compress an image, after any preflight has completed. 819cc1dc7a3Sopenharmony_ci * 820cc1dc7a3Sopenharmony_ci * @param[out] ctxo The compressor context. 821cc1dc7a3Sopenharmony_ci * @param thread_index The thread index. 822cc1dc7a3Sopenharmony_ci * @param image The intput image. 823cc1dc7a3Sopenharmony_ci * @param swizzle The input swizzle. 824cc1dc7a3Sopenharmony_ci * @param[out] buffer The output array for the compressed data. 825cc1dc7a3Sopenharmony_ci */ 826cc1dc7a3Sopenharmony_cistatic void compress_image( 827cc1dc7a3Sopenharmony_ci astcenc_context& ctxo, 828cc1dc7a3Sopenharmony_ci unsigned int thread_index, 829cc1dc7a3Sopenharmony_ci const astcenc_image& image, 830cc1dc7a3Sopenharmony_ci const astcenc_swizzle& swizzle, 831cc1dc7a3Sopenharmony_ci#if QUALITY_CONTROL 832cc1dc7a3Sopenharmony_ci uint8_t* buffer, 833cc1dc7a3Sopenharmony_ci bool calQualityEnable, 834cc1dc7a3Sopenharmony_ci int32_t *mse[RGBA_COM] 835cc1dc7a3Sopenharmony_ci#else 836cc1dc7a3Sopenharmony_ci uint8_t* buffer 837cc1dc7a3Sopenharmony_ci#endif 838cc1dc7a3Sopenharmony_ci) { 839cc1dc7a3Sopenharmony_ci astcenc_contexti& ctx = ctxo.context; 840cc1dc7a3Sopenharmony_ci const block_size_descriptor& bsd = *ctx.bsd; 841cc1dc7a3Sopenharmony_ci astcenc_profile decode_mode = ctx.config.profile; 842cc1dc7a3Sopenharmony_ci 843cc1dc7a3Sopenharmony_ci image_block blk; 844cc1dc7a3Sopenharmony_ci 845cc1dc7a3Sopenharmony_ci int block_x = bsd.xdim; 846cc1dc7a3Sopenharmony_ci int block_y = bsd.ydim; 847cc1dc7a3Sopenharmony_ci int block_z = bsd.zdim; 848cc1dc7a3Sopenharmony_ci blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z); 849cc1dc7a3Sopenharmony_ci 850cc1dc7a3Sopenharmony_ci int dim_x = image.dim_x; 851cc1dc7a3Sopenharmony_ci int dim_y = image.dim_y; 852cc1dc7a3Sopenharmony_ci int dim_z = image.dim_z; 853cc1dc7a3Sopenharmony_ci 854cc1dc7a3Sopenharmony_ci int xblocks = (dim_x + block_x - 1) / block_x; 855cc1dc7a3Sopenharmony_ci int yblocks = (dim_y + block_y - 1) / block_y; 856cc1dc7a3Sopenharmony_ci int zblocks = (dim_z + block_z - 1) / block_z; 857cc1dc7a3Sopenharmony_ci int block_count = zblocks * yblocks * xblocks; 858cc1dc7a3Sopenharmony_ci 859cc1dc7a3Sopenharmony_ci int row_blocks = xblocks; 860cc1dc7a3Sopenharmony_ci int plane_blocks = xblocks * yblocks; 861cc1dc7a3Sopenharmony_ci 862cc1dc7a3Sopenharmony_ci blk.decode_unorm8 = ctxo.context.config.flags & ASTCENC_FLG_USE_DECODE_UNORM8; 863cc1dc7a3Sopenharmony_ci 864cc1dc7a3Sopenharmony_ci // Populate the block channel weights 865cc1dc7a3Sopenharmony_ci blk.channel_weight = vfloat4(ctx.config.cw_r_weight, 866cc1dc7a3Sopenharmony_ci ctx.config.cw_g_weight, 867cc1dc7a3Sopenharmony_ci ctx.config.cw_b_weight, 868cc1dc7a3Sopenharmony_ci ctx.config.cw_a_weight); 869cc1dc7a3Sopenharmony_ci 870cc1dc7a3Sopenharmony_ci // Use preallocated scratch buffer 871cc1dc7a3Sopenharmony_ci auto& temp_buffers = ctx.working_buffers[thread_index]; 872cc1dc7a3Sopenharmony_ci 873cc1dc7a3Sopenharmony_ci // Only the first thread actually runs the initializer 874cc1dc7a3Sopenharmony_ci ctxo.manage_compress.init(block_count, ctx.config.progress_callback); 875cc1dc7a3Sopenharmony_ci 876cc1dc7a3Sopenharmony_ci // Determine if we can use an optimized load function 877cc1dc7a3Sopenharmony_ci bool needs_swz = (swizzle.r != ASTCENC_SWZ_R) || (swizzle.g != ASTCENC_SWZ_G) || 878cc1dc7a3Sopenharmony_ci (swizzle.b != ASTCENC_SWZ_B) || (swizzle.a != ASTCENC_SWZ_A); 879cc1dc7a3Sopenharmony_ci 880cc1dc7a3Sopenharmony_ci bool needs_hdr = (decode_mode == ASTCENC_PRF_HDR) || 881cc1dc7a3Sopenharmony_ci (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A); 882cc1dc7a3Sopenharmony_ci 883cc1dc7a3Sopenharmony_ci bool use_fast_load = !needs_swz && !needs_hdr && 884cc1dc7a3Sopenharmony_ci block_z == 1 && image.data_type == ASTCENC_TYPE_U8; 885cc1dc7a3Sopenharmony_ci 886cc1dc7a3Sopenharmony_ci auto load_func = load_image_block; 887cc1dc7a3Sopenharmony_ci if (use_fast_load) 888cc1dc7a3Sopenharmony_ci { 889cc1dc7a3Sopenharmony_ci load_func = load_image_block_fast_ldr; 890cc1dc7a3Sopenharmony_ci } 891cc1dc7a3Sopenharmony_ci 892cc1dc7a3Sopenharmony_ci // All threads run this processing loop until there is no work remaining 893cc1dc7a3Sopenharmony_ci while (true) 894cc1dc7a3Sopenharmony_ci { 895cc1dc7a3Sopenharmony_ci unsigned int count; 896cc1dc7a3Sopenharmony_ci unsigned int base = ctxo.manage_compress.get_task_assignment(16, count); 897cc1dc7a3Sopenharmony_ci if (!count) 898cc1dc7a3Sopenharmony_ci { 899cc1dc7a3Sopenharmony_ci break; 900cc1dc7a3Sopenharmony_ci } 901cc1dc7a3Sopenharmony_ci 902cc1dc7a3Sopenharmony_ci for (unsigned int i = base; i < base + count; i++) 903cc1dc7a3Sopenharmony_ci { 904cc1dc7a3Sopenharmony_ci // Decode i into x, y, z block indices 905cc1dc7a3Sopenharmony_ci int z = i / plane_blocks; 906cc1dc7a3Sopenharmony_ci unsigned int rem = i - (z * plane_blocks); 907cc1dc7a3Sopenharmony_ci int y = rem / row_blocks; 908cc1dc7a3Sopenharmony_ci int x = rem - (y * row_blocks); 909cc1dc7a3Sopenharmony_ci 910cc1dc7a3Sopenharmony_ci // Test if we can apply some basic alpha-scale RDO 911cc1dc7a3Sopenharmony_ci bool use_full_block = true; 912cc1dc7a3Sopenharmony_ci if (ctx.config.a_scale_radius != 0 && block_z == 1) 913cc1dc7a3Sopenharmony_ci { 914cc1dc7a3Sopenharmony_ci int start_x = x * block_x; 915cc1dc7a3Sopenharmony_ci int end_x = astc::min(dim_x, start_x + block_x); 916cc1dc7a3Sopenharmony_ci 917cc1dc7a3Sopenharmony_ci int start_y = y * block_y; 918cc1dc7a3Sopenharmony_ci int end_y = astc::min(dim_y, start_y + block_y); 919cc1dc7a3Sopenharmony_ci 920cc1dc7a3Sopenharmony_ci // SATs accumulate error, so don't test exactly zero. Test for 921cc1dc7a3Sopenharmony_ci // less than 1 alpha in the expanded block footprint that 922cc1dc7a3Sopenharmony_ci // includes the alpha radius. 923cc1dc7a3Sopenharmony_ci int x_footprint = block_x + 2 * (ctx.config.a_scale_radius - 1); 924cc1dc7a3Sopenharmony_ci 925cc1dc7a3Sopenharmony_ci int y_footprint = block_y + 2 * (ctx.config.a_scale_radius - 1); 926cc1dc7a3Sopenharmony_ci 927cc1dc7a3Sopenharmony_ci float footprint = static_cast<float>(x_footprint * y_footprint); 928cc1dc7a3Sopenharmony_ci float threshold = 0.9f / (255.0f * footprint); 929cc1dc7a3Sopenharmony_ci 930cc1dc7a3Sopenharmony_ci // Do we have any alpha values? 931cc1dc7a3Sopenharmony_ci use_full_block = false; 932cc1dc7a3Sopenharmony_ci for (int ay = start_y; ay < end_y; ay++) 933cc1dc7a3Sopenharmony_ci { 934cc1dc7a3Sopenharmony_ci for (int ax = start_x; ax < end_x; ax++) 935cc1dc7a3Sopenharmony_ci { 936cc1dc7a3Sopenharmony_ci float a_avg = ctx.input_alpha_averages[ay * dim_x + ax]; 937cc1dc7a3Sopenharmony_ci if (a_avg > threshold) 938cc1dc7a3Sopenharmony_ci { 939cc1dc7a3Sopenharmony_ci use_full_block = true; 940cc1dc7a3Sopenharmony_ci ax = end_x; 941cc1dc7a3Sopenharmony_ci ay = end_y; 942cc1dc7a3Sopenharmony_ci } 943cc1dc7a3Sopenharmony_ci } 944cc1dc7a3Sopenharmony_ci } 945cc1dc7a3Sopenharmony_ci } 946cc1dc7a3Sopenharmony_ci 947cc1dc7a3Sopenharmony_ci // Fetch the full block for compression 948cc1dc7a3Sopenharmony_ci if (use_full_block) 949cc1dc7a3Sopenharmony_ci { 950cc1dc7a3Sopenharmony_ci load_func(decode_mode, image, blk, bsd, x * block_x, y * block_y, z * block_z, swizzle); 951cc1dc7a3Sopenharmony_ci 952cc1dc7a3Sopenharmony_ci // Scale RGB error contribution by the maximum alpha in the block 953cc1dc7a3Sopenharmony_ci // This encourages preserving alpha accuracy in regions with high 954cc1dc7a3Sopenharmony_ci // transparency, and can buy up to 0.5 dB PSNR. 955cc1dc7a3Sopenharmony_ci if (ctx.config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT) 956cc1dc7a3Sopenharmony_ci { 957cc1dc7a3Sopenharmony_ci float alpha_scale = blk.data_max.lane<3>() * (1.0f / 65535.0f); 958cc1dc7a3Sopenharmony_ci blk.channel_weight = vfloat4(ctx.config.cw_r_weight * alpha_scale, 959cc1dc7a3Sopenharmony_ci ctx.config.cw_g_weight * alpha_scale, 960cc1dc7a3Sopenharmony_ci ctx.config.cw_b_weight * alpha_scale, 961cc1dc7a3Sopenharmony_ci ctx.config.cw_a_weight); 962cc1dc7a3Sopenharmony_ci } 963cc1dc7a3Sopenharmony_ci } 964cc1dc7a3Sopenharmony_ci // Apply alpha scale RDO - substitute constant color block 965cc1dc7a3Sopenharmony_ci else 966cc1dc7a3Sopenharmony_ci { 967cc1dc7a3Sopenharmony_ci blk.origin_texel = vfloat4::zero(); 968cc1dc7a3Sopenharmony_ci blk.data_min = vfloat4::zero(); 969cc1dc7a3Sopenharmony_ci blk.data_mean = vfloat4::zero(); 970cc1dc7a3Sopenharmony_ci blk.data_max = vfloat4::zero(); 971cc1dc7a3Sopenharmony_ci blk.grayscale = true; 972cc1dc7a3Sopenharmony_ci } 973cc1dc7a3Sopenharmony_ci 974cc1dc7a3Sopenharmony_ci int offset = ((z * yblocks + y) * xblocks + x) * 16; 975cc1dc7a3Sopenharmony_ci uint8_t *bp = buffer + offset; 976cc1dc7a3Sopenharmony_ci#if QUALITY_CONTROL 977cc1dc7a3Sopenharmony_ci int32_t *mseBlock[RGBA_COM] = {nullptr, nullptr, nullptr, nullptr}; 978cc1dc7a3Sopenharmony_ci if (calQualityEnable) { 979cc1dc7a3Sopenharmony_ci offset = (z * yblocks + y) * xblocks + x; 980cc1dc7a3Sopenharmony_ci mseBlock[R_COM] = mse[R_COM] + offset; 981cc1dc7a3Sopenharmony_ci mseBlock[G_COM] = mse[G_COM] + offset; 982cc1dc7a3Sopenharmony_ci mseBlock[B_COM] = mse[B_COM] + offset; 983cc1dc7a3Sopenharmony_ci mseBlock[A_COM] = mse[A_COM] + offset; 984cc1dc7a3Sopenharmony_ci } 985cc1dc7a3Sopenharmony_ci compress_block(ctx, blk, bp, temp_buffers, calQualityEnable, mseBlock); 986cc1dc7a3Sopenharmony_ci#else 987cc1dc7a3Sopenharmony_ci compress_block(ctx, blk, bp, temp_buffers); 988cc1dc7a3Sopenharmony_ci#endif 989cc1dc7a3Sopenharmony_ci } 990cc1dc7a3Sopenharmony_ci 991cc1dc7a3Sopenharmony_ci ctxo.manage_compress.complete_task_assignment(count); 992cc1dc7a3Sopenharmony_ci } 993cc1dc7a3Sopenharmony_ci} 994cc1dc7a3Sopenharmony_ci 995cc1dc7a3Sopenharmony_ci/** 996cc1dc7a3Sopenharmony_ci * @brief Compute regional averages in an image. 997cc1dc7a3Sopenharmony_ci * 998cc1dc7a3Sopenharmony_ci * This function can be called by multiple threads, but only after a single 999cc1dc7a3Sopenharmony_ci * thread calls the setup function @c init_compute_averages(). 1000cc1dc7a3Sopenharmony_ci * 1001cc1dc7a3Sopenharmony_ci * Results are written back into @c img->input_alpha_averages. 1002cc1dc7a3Sopenharmony_ci * 1003cc1dc7a3Sopenharmony_ci * @param[out] ctx The context. 1004cc1dc7a3Sopenharmony_ci * @param ag The average and variance arguments created during setup. 1005cc1dc7a3Sopenharmony_ci */ 1006cc1dc7a3Sopenharmony_cistatic void compute_averages( 1007cc1dc7a3Sopenharmony_ci astcenc_context& ctx, 1008cc1dc7a3Sopenharmony_ci const avg_args &ag 1009cc1dc7a3Sopenharmony_ci) { 1010cc1dc7a3Sopenharmony_ci pixel_region_args arg = ag.arg; 1011cc1dc7a3Sopenharmony_ci arg.work_memory = new vfloat4[ag.work_memory_size]; 1012cc1dc7a3Sopenharmony_ci 1013cc1dc7a3Sopenharmony_ci int size_x = ag.img_size_x; 1014cc1dc7a3Sopenharmony_ci int size_y = ag.img_size_y; 1015cc1dc7a3Sopenharmony_ci int size_z = ag.img_size_z; 1016cc1dc7a3Sopenharmony_ci 1017cc1dc7a3Sopenharmony_ci int step_xy = ag.blk_size_xy; 1018cc1dc7a3Sopenharmony_ci int step_z = ag.blk_size_z; 1019cc1dc7a3Sopenharmony_ci 1020cc1dc7a3Sopenharmony_ci int y_tasks = (size_y + step_xy - 1) / step_xy; 1021cc1dc7a3Sopenharmony_ci 1022cc1dc7a3Sopenharmony_ci // All threads run this processing loop until there is no work remaining 1023cc1dc7a3Sopenharmony_ci while (true) 1024cc1dc7a3Sopenharmony_ci { 1025cc1dc7a3Sopenharmony_ci unsigned int count; 1026cc1dc7a3Sopenharmony_ci unsigned int base = ctx.manage_avg.get_task_assignment(16, count); 1027cc1dc7a3Sopenharmony_ci if (!count) 1028cc1dc7a3Sopenharmony_ci { 1029cc1dc7a3Sopenharmony_ci break; 1030cc1dc7a3Sopenharmony_ci } 1031cc1dc7a3Sopenharmony_ci 1032cc1dc7a3Sopenharmony_ci for (unsigned int i = base; i < base + count; i++) 1033cc1dc7a3Sopenharmony_ci { 1034cc1dc7a3Sopenharmony_ci int z = (i / (y_tasks)) * step_z; 1035cc1dc7a3Sopenharmony_ci int y = (i - (z * y_tasks)) * step_xy; 1036cc1dc7a3Sopenharmony_ci 1037cc1dc7a3Sopenharmony_ci arg.size_z = astc::min(step_z, size_z - z); 1038cc1dc7a3Sopenharmony_ci arg.offset_z = z; 1039cc1dc7a3Sopenharmony_ci 1040cc1dc7a3Sopenharmony_ci arg.size_y = astc::min(step_xy, size_y - y); 1041cc1dc7a3Sopenharmony_ci arg.offset_y = y; 1042cc1dc7a3Sopenharmony_ci 1043cc1dc7a3Sopenharmony_ci for (int x = 0; x < size_x; x += step_xy) 1044cc1dc7a3Sopenharmony_ci { 1045cc1dc7a3Sopenharmony_ci arg.size_x = astc::min(step_xy, size_x - x); 1046cc1dc7a3Sopenharmony_ci arg.offset_x = x; 1047cc1dc7a3Sopenharmony_ci compute_pixel_region_variance(ctx.context, arg); 1048cc1dc7a3Sopenharmony_ci } 1049cc1dc7a3Sopenharmony_ci } 1050cc1dc7a3Sopenharmony_ci 1051cc1dc7a3Sopenharmony_ci ctx.manage_avg.complete_task_assignment(count); 1052cc1dc7a3Sopenharmony_ci } 1053cc1dc7a3Sopenharmony_ci 1054cc1dc7a3Sopenharmony_ci delete[] arg.work_memory; 1055cc1dc7a3Sopenharmony_ci} 1056cc1dc7a3Sopenharmony_ci 1057cc1dc7a3Sopenharmony_ci#endif 1058cc1dc7a3Sopenharmony_ci 1059cc1dc7a3Sopenharmony_ci/* See header for documentation. */ 1060cc1dc7a3Sopenharmony_ciastcenc_error astcenc_compress_image( 1061cc1dc7a3Sopenharmony_ci astcenc_context* ctxo, 1062cc1dc7a3Sopenharmony_ci astcenc_image* imagep, 1063cc1dc7a3Sopenharmony_ci const astcenc_swizzle* swizzle, 1064cc1dc7a3Sopenharmony_ci uint8_t* data_out, 1065cc1dc7a3Sopenharmony_ci size_t data_len, 1066cc1dc7a3Sopenharmony_ci#if QUALITY_CONTROL 1067cc1dc7a3Sopenharmony_ci bool calQualityEnable, 1068cc1dc7a3Sopenharmony_ci int32_t *mse[RGBA_COM], 1069cc1dc7a3Sopenharmony_ci#endif 1070cc1dc7a3Sopenharmony_ci unsigned int thread_index 1071cc1dc7a3Sopenharmony_ci) { 1072cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DECOMPRESS_ONLY) 1073cc1dc7a3Sopenharmony_ci (void)ctxo; 1074cc1dc7a3Sopenharmony_ci (void)imagep; 1075cc1dc7a3Sopenharmony_ci (void)swizzle; 1076cc1dc7a3Sopenharmony_ci (void)data_out; 1077cc1dc7a3Sopenharmony_ci (void)data_len; 1078cc1dc7a3Sopenharmony_ci (void)thread_index; 1079cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_CONTEXT; 1080cc1dc7a3Sopenharmony_ci#else 1081cc1dc7a3Sopenharmony_ci astcenc_contexti* ctx = &ctxo->context; 1082cc1dc7a3Sopenharmony_ci astcenc_error status; 1083cc1dc7a3Sopenharmony_ci astcenc_image& image = *imagep; 1084cc1dc7a3Sopenharmony_ci 1085cc1dc7a3Sopenharmony_ci if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY) 1086cc1dc7a3Sopenharmony_ci { 1087cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_CONTEXT; 1088cc1dc7a3Sopenharmony_ci } 1089cc1dc7a3Sopenharmony_ci 1090cc1dc7a3Sopenharmony_ci status = validate_compression_swizzle(*swizzle); 1091cc1dc7a3Sopenharmony_ci if (status != ASTCENC_SUCCESS) 1092cc1dc7a3Sopenharmony_ci { 1093cc1dc7a3Sopenharmony_ci return status; 1094cc1dc7a3Sopenharmony_ci } 1095cc1dc7a3Sopenharmony_ci 1096cc1dc7a3Sopenharmony_ci if (thread_index >= ctx->thread_count) 1097cc1dc7a3Sopenharmony_ci { 1098cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_PARAM; 1099cc1dc7a3Sopenharmony_ci } 1100cc1dc7a3Sopenharmony_ci 1101cc1dc7a3Sopenharmony_ci unsigned int block_x = ctx->config.block_x; 1102cc1dc7a3Sopenharmony_ci unsigned int block_y = ctx->config.block_y; 1103cc1dc7a3Sopenharmony_ci unsigned int block_z = ctx->config.block_z; 1104cc1dc7a3Sopenharmony_ci 1105cc1dc7a3Sopenharmony_ci unsigned int xblocks = (image.dim_x + block_x - 1) / block_x; 1106cc1dc7a3Sopenharmony_ci unsigned int yblocks = (image.dim_y + block_y - 1) / block_y; 1107cc1dc7a3Sopenharmony_ci unsigned int zblocks = (image.dim_z + block_z - 1) / block_z; 1108cc1dc7a3Sopenharmony_ci 1109cc1dc7a3Sopenharmony_ci // Check we have enough output space (16 bytes per block) 1110cc1dc7a3Sopenharmony_ci size_t size_needed = xblocks * yblocks * zblocks * 16; 1111cc1dc7a3Sopenharmony_ci if (data_len < size_needed) 1112cc1dc7a3Sopenharmony_ci { 1113cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_OUT_OF_MEM; 1114cc1dc7a3Sopenharmony_ci } 1115cc1dc7a3Sopenharmony_ci 1116cc1dc7a3Sopenharmony_ci // If context thread count is one then implicitly reset 1117cc1dc7a3Sopenharmony_ci if (ctx->thread_count == 1) 1118cc1dc7a3Sopenharmony_ci { 1119cc1dc7a3Sopenharmony_ci astcenc_compress_reset(ctxo); 1120cc1dc7a3Sopenharmony_ci } 1121cc1dc7a3Sopenharmony_ci 1122cc1dc7a3Sopenharmony_ci if (ctx->config.a_scale_radius != 0) 1123cc1dc7a3Sopenharmony_ci { 1124cc1dc7a3Sopenharmony_ci // First thread to enter will do setup, other threads will subsequently 1125cc1dc7a3Sopenharmony_ci // enter the critical section but simply skip over the initialization 1126cc1dc7a3Sopenharmony_ci auto init_avg = [ctx, &image, swizzle]() { 1127cc1dc7a3Sopenharmony_ci // Perform memory allocations for the destination buffers 1128cc1dc7a3Sopenharmony_ci size_t texel_count = image.dim_x * image.dim_y * image.dim_z; 1129cc1dc7a3Sopenharmony_ci ctx->input_alpha_averages = new float[texel_count]; 1130cc1dc7a3Sopenharmony_ci 1131cc1dc7a3Sopenharmony_ci return init_compute_averages( 1132cc1dc7a3Sopenharmony_ci image, ctx->config.a_scale_radius, *swizzle, 1133cc1dc7a3Sopenharmony_ci ctx->avg_preprocess_args); 1134cc1dc7a3Sopenharmony_ci }; 1135cc1dc7a3Sopenharmony_ci 1136cc1dc7a3Sopenharmony_ci // Only the first thread actually runs the initializer 1137cc1dc7a3Sopenharmony_ci ctxo->manage_avg.init(init_avg); 1138cc1dc7a3Sopenharmony_ci 1139cc1dc7a3Sopenharmony_ci // All threads will enter this function and dynamically grab work 1140cc1dc7a3Sopenharmony_ci compute_averages(*ctxo, ctx->avg_preprocess_args); 1141cc1dc7a3Sopenharmony_ci } 1142cc1dc7a3Sopenharmony_ci 1143cc1dc7a3Sopenharmony_ci // Wait for compute_averages to complete before compressing 1144cc1dc7a3Sopenharmony_ci ctxo->manage_avg.wait(); 1145cc1dc7a3Sopenharmony_ci#if QUALITY_CONTROL 1146cc1dc7a3Sopenharmony_ci compress_image(*ctxo, thread_index, image, *swizzle, data_out, calQualityEnable, mse); 1147cc1dc7a3Sopenharmony_ci#else 1148cc1dc7a3Sopenharmony_ci compress_image(*ctxo, thread_index, image, *swizzle, data_out); 1149cc1dc7a3Sopenharmony_ci#endif 1150cc1dc7a3Sopenharmony_ci // Wait for compress to complete before freeing memory 1151cc1dc7a3Sopenharmony_ci ctxo->manage_compress.wait(); 1152cc1dc7a3Sopenharmony_ci 1153cc1dc7a3Sopenharmony_ci auto term_compress = [ctx]() { 1154cc1dc7a3Sopenharmony_ci delete[] ctx->input_alpha_averages; 1155cc1dc7a3Sopenharmony_ci ctx->input_alpha_averages = nullptr; 1156cc1dc7a3Sopenharmony_ci }; 1157cc1dc7a3Sopenharmony_ci 1158cc1dc7a3Sopenharmony_ci // Only the first thread to arrive actually runs the term 1159cc1dc7a3Sopenharmony_ci ctxo->manage_compress.term(term_compress); 1160cc1dc7a3Sopenharmony_ci 1161cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 1162cc1dc7a3Sopenharmony_ci#endif 1163cc1dc7a3Sopenharmony_ci} 1164cc1dc7a3Sopenharmony_ci 1165cc1dc7a3Sopenharmony_ci/* See header for documentation. */ 1166cc1dc7a3Sopenharmony_ciastcenc_error astcenc_compress_reset( 1167cc1dc7a3Sopenharmony_ci astcenc_context* ctxo 1168cc1dc7a3Sopenharmony_ci) { 1169cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DECOMPRESS_ONLY) 1170cc1dc7a3Sopenharmony_ci (void)ctxo; 1171cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_CONTEXT; 1172cc1dc7a3Sopenharmony_ci#else 1173cc1dc7a3Sopenharmony_ci astcenc_contexti* ctx = &ctxo->context; 1174cc1dc7a3Sopenharmony_ci if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY) 1175cc1dc7a3Sopenharmony_ci { 1176cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_CONTEXT; 1177cc1dc7a3Sopenharmony_ci } 1178cc1dc7a3Sopenharmony_ci 1179cc1dc7a3Sopenharmony_ci ctxo->manage_avg.reset(); 1180cc1dc7a3Sopenharmony_ci ctxo->manage_compress.reset(); 1181cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 1182cc1dc7a3Sopenharmony_ci#endif 1183cc1dc7a3Sopenharmony_ci} 1184cc1dc7a3Sopenharmony_ci 1185cc1dc7a3Sopenharmony_ci/* See header for documentation. */ 1186cc1dc7a3Sopenharmony_ciastcenc_error astcenc_decompress_image( 1187cc1dc7a3Sopenharmony_ci astcenc_context* ctxo, 1188cc1dc7a3Sopenharmony_ci const uint8_t* data, 1189cc1dc7a3Sopenharmony_ci size_t data_len, 1190cc1dc7a3Sopenharmony_ci astcenc_image* image_outp, 1191cc1dc7a3Sopenharmony_ci const astcenc_swizzle* swizzle, 1192cc1dc7a3Sopenharmony_ci unsigned int thread_index 1193cc1dc7a3Sopenharmony_ci) { 1194cc1dc7a3Sopenharmony_ci astcenc_error status; 1195cc1dc7a3Sopenharmony_ci astcenc_image& image_out = *image_outp; 1196cc1dc7a3Sopenharmony_ci astcenc_contexti* ctx = &ctxo->context; 1197cc1dc7a3Sopenharmony_ci 1198cc1dc7a3Sopenharmony_ci // Today this doesn't matter (working set on stack) but might in future ... 1199cc1dc7a3Sopenharmony_ci if (thread_index >= ctx->thread_count) 1200cc1dc7a3Sopenharmony_ci { 1201cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_PARAM; 1202cc1dc7a3Sopenharmony_ci } 1203cc1dc7a3Sopenharmony_ci 1204cc1dc7a3Sopenharmony_ci status = validate_decompression_swizzle(*swizzle); 1205cc1dc7a3Sopenharmony_ci if (status != ASTCENC_SUCCESS) 1206cc1dc7a3Sopenharmony_ci { 1207cc1dc7a3Sopenharmony_ci return status; 1208cc1dc7a3Sopenharmony_ci } 1209cc1dc7a3Sopenharmony_ci 1210cc1dc7a3Sopenharmony_ci unsigned int block_x = ctx->config.block_x; 1211cc1dc7a3Sopenharmony_ci unsigned int block_y = ctx->config.block_y; 1212cc1dc7a3Sopenharmony_ci unsigned int block_z = ctx->config.block_z; 1213cc1dc7a3Sopenharmony_ci 1214cc1dc7a3Sopenharmony_ci unsigned int xblocks = (image_out.dim_x + block_x - 1) / block_x; 1215cc1dc7a3Sopenharmony_ci unsigned int yblocks = (image_out.dim_y + block_y - 1) / block_y; 1216cc1dc7a3Sopenharmony_ci unsigned int zblocks = (image_out.dim_z + block_z - 1) / block_z; 1217cc1dc7a3Sopenharmony_ci unsigned int block_count = zblocks * yblocks * xblocks; 1218cc1dc7a3Sopenharmony_ci 1219cc1dc7a3Sopenharmony_ci int row_blocks = xblocks; 1220cc1dc7a3Sopenharmony_ci int plane_blocks = xblocks * yblocks; 1221cc1dc7a3Sopenharmony_ci 1222cc1dc7a3Sopenharmony_ci // Check we have enough output space (16 bytes per block) 1223cc1dc7a3Sopenharmony_ci size_t size_needed = xblocks * yblocks * zblocks * 16; 1224cc1dc7a3Sopenharmony_ci if (data_len < size_needed) 1225cc1dc7a3Sopenharmony_ci { 1226cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_OUT_OF_MEM; 1227cc1dc7a3Sopenharmony_ci } 1228cc1dc7a3Sopenharmony_ci 1229cc1dc7a3Sopenharmony_ci image_block blk; 1230cc1dc7a3Sopenharmony_ci blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z); 1231cc1dc7a3Sopenharmony_ci 1232cc1dc7a3Sopenharmony_ci // Decode mode inferred from the output data type 1233cc1dc7a3Sopenharmony_ci blk.decode_unorm8 = image_out.data_type == ASTCENC_TYPE_U8; 1234cc1dc7a3Sopenharmony_ci 1235cc1dc7a3Sopenharmony_ci // If context thread count is one then implicitly reset 1236cc1dc7a3Sopenharmony_ci if (ctx->thread_count == 1) 1237cc1dc7a3Sopenharmony_ci { 1238cc1dc7a3Sopenharmony_ci astcenc_decompress_reset(ctxo); 1239cc1dc7a3Sopenharmony_ci } 1240cc1dc7a3Sopenharmony_ci 1241cc1dc7a3Sopenharmony_ci // Only the first thread actually runs the initializer 1242cc1dc7a3Sopenharmony_ci ctxo->manage_decompress.init(block_count, nullptr); 1243cc1dc7a3Sopenharmony_ci 1244cc1dc7a3Sopenharmony_ci // All threads run this processing loop until there is no work remaining 1245cc1dc7a3Sopenharmony_ci while (true) 1246cc1dc7a3Sopenharmony_ci { 1247cc1dc7a3Sopenharmony_ci unsigned int count; 1248cc1dc7a3Sopenharmony_ci unsigned int base = ctxo->manage_decompress.get_task_assignment(128, count); 1249cc1dc7a3Sopenharmony_ci if (!count) 1250cc1dc7a3Sopenharmony_ci { 1251cc1dc7a3Sopenharmony_ci break; 1252cc1dc7a3Sopenharmony_ci } 1253cc1dc7a3Sopenharmony_ci 1254cc1dc7a3Sopenharmony_ci for (unsigned int i = base; i < base + count; i++) 1255cc1dc7a3Sopenharmony_ci { 1256cc1dc7a3Sopenharmony_ci // Decode i into x, y, z block indices 1257cc1dc7a3Sopenharmony_ci int z = i / plane_blocks; 1258cc1dc7a3Sopenharmony_ci unsigned int rem = i - (z * plane_blocks); 1259cc1dc7a3Sopenharmony_ci int y = rem / row_blocks; 1260cc1dc7a3Sopenharmony_ci int x = rem - (y * row_blocks); 1261cc1dc7a3Sopenharmony_ci 1262cc1dc7a3Sopenharmony_ci unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16; 1263cc1dc7a3Sopenharmony_ci const uint8_t* bp = data + offset; 1264cc1dc7a3Sopenharmony_ci 1265cc1dc7a3Sopenharmony_ci symbolic_compressed_block scb; 1266cc1dc7a3Sopenharmony_ci 1267cc1dc7a3Sopenharmony_ci physical_to_symbolic(*ctx->bsd, bp, scb); 1268cc1dc7a3Sopenharmony_ci 1269cc1dc7a3Sopenharmony_ci decompress_symbolic_block(ctx->config.profile, *ctx->bsd, 1270cc1dc7a3Sopenharmony_ci x * block_x, y * block_y, z * block_z, 1271cc1dc7a3Sopenharmony_ci scb, blk); 1272cc1dc7a3Sopenharmony_ci 1273cc1dc7a3Sopenharmony_ci store_image_block(image_out, blk, *ctx->bsd, 1274cc1dc7a3Sopenharmony_ci x * block_x, y * block_y, z * block_z, *swizzle); 1275cc1dc7a3Sopenharmony_ci } 1276cc1dc7a3Sopenharmony_ci 1277cc1dc7a3Sopenharmony_ci ctxo->manage_decompress.complete_task_assignment(count); 1278cc1dc7a3Sopenharmony_ci } 1279cc1dc7a3Sopenharmony_ci 1280cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 1281cc1dc7a3Sopenharmony_ci} 1282cc1dc7a3Sopenharmony_ci 1283cc1dc7a3Sopenharmony_ci/* See header for documentation. */ 1284cc1dc7a3Sopenharmony_ciastcenc_error astcenc_decompress_reset( 1285cc1dc7a3Sopenharmony_ci astcenc_context* ctxo 1286cc1dc7a3Sopenharmony_ci) { 1287cc1dc7a3Sopenharmony_ci ctxo->manage_decompress.reset(); 1288cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 1289cc1dc7a3Sopenharmony_ci} 1290cc1dc7a3Sopenharmony_ci 1291cc1dc7a3Sopenharmony_ci/* See header for documentation. */ 1292cc1dc7a3Sopenharmony_ciastcenc_error astcenc_get_block_info( 1293cc1dc7a3Sopenharmony_ci astcenc_context* ctxo, 1294cc1dc7a3Sopenharmony_ci const uint8_t data[16], 1295cc1dc7a3Sopenharmony_ci astcenc_block_info* info 1296cc1dc7a3Sopenharmony_ci) { 1297cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DECOMPRESS_ONLY) 1298cc1dc7a3Sopenharmony_ci (void)ctxo; 1299cc1dc7a3Sopenharmony_ci (void)data; 1300cc1dc7a3Sopenharmony_ci (void)info; 1301cc1dc7a3Sopenharmony_ci return ASTCENC_ERR_BAD_CONTEXT; 1302cc1dc7a3Sopenharmony_ci#else 1303cc1dc7a3Sopenharmony_ci astcenc_contexti* ctx = &ctxo->context; 1304cc1dc7a3Sopenharmony_ci 1305cc1dc7a3Sopenharmony_ci // Decode the compressed data into a symbolic form 1306cc1dc7a3Sopenharmony_ci symbolic_compressed_block scb; 1307cc1dc7a3Sopenharmony_ci physical_to_symbolic(*ctx->bsd, data, scb); 1308cc1dc7a3Sopenharmony_ci 1309cc1dc7a3Sopenharmony_ci // Fetch the appropriate partition and decimation tables 1310cc1dc7a3Sopenharmony_ci block_size_descriptor& bsd = *ctx->bsd; 1311cc1dc7a3Sopenharmony_ci 1312cc1dc7a3Sopenharmony_ci // Start from a clean slate 1313cc1dc7a3Sopenharmony_ci memset(info, 0, sizeof(*info)); 1314cc1dc7a3Sopenharmony_ci 1315cc1dc7a3Sopenharmony_ci // Basic info we can always populate 1316cc1dc7a3Sopenharmony_ci info->profile = ctx->config.profile; 1317cc1dc7a3Sopenharmony_ci 1318cc1dc7a3Sopenharmony_ci info->block_x = ctx->config.block_x; 1319cc1dc7a3Sopenharmony_ci info->block_y = ctx->config.block_y; 1320cc1dc7a3Sopenharmony_ci info->block_z = ctx->config.block_z; 1321cc1dc7a3Sopenharmony_ci info->texel_count = bsd.texel_count; 1322cc1dc7a3Sopenharmony_ci 1323cc1dc7a3Sopenharmony_ci // Check for error blocks first 1324cc1dc7a3Sopenharmony_ci info->is_error_block = scb.block_type == SYM_BTYPE_ERROR; 1325cc1dc7a3Sopenharmony_ci if (info->is_error_block) 1326cc1dc7a3Sopenharmony_ci { 1327cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 1328cc1dc7a3Sopenharmony_ci } 1329cc1dc7a3Sopenharmony_ci 1330cc1dc7a3Sopenharmony_ci // Check for constant color blocks second 1331cc1dc7a3Sopenharmony_ci info->is_constant_block = scb.block_type == SYM_BTYPE_CONST_F16 || 1332cc1dc7a3Sopenharmony_ci scb.block_type == SYM_BTYPE_CONST_U16; 1333cc1dc7a3Sopenharmony_ci if (info->is_constant_block) 1334cc1dc7a3Sopenharmony_ci { 1335cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 1336cc1dc7a3Sopenharmony_ci } 1337cc1dc7a3Sopenharmony_ci 1338cc1dc7a3Sopenharmony_ci // Otherwise handle a full block ; known to be valid after conditions above have been checked 1339cc1dc7a3Sopenharmony_ci int partition_count = scb.partition_count; 1340cc1dc7a3Sopenharmony_ci const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index); 1341cc1dc7a3Sopenharmony_ci 1342cc1dc7a3Sopenharmony_ci const block_mode& bm = bsd.get_block_mode(scb.block_mode); 1343cc1dc7a3Sopenharmony_ci const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode); 1344cc1dc7a3Sopenharmony_ci 1345cc1dc7a3Sopenharmony_ci info->weight_x = di.weight_x; 1346cc1dc7a3Sopenharmony_ci info->weight_y = di.weight_y; 1347cc1dc7a3Sopenharmony_ci info->weight_z = di.weight_z; 1348cc1dc7a3Sopenharmony_ci 1349cc1dc7a3Sopenharmony_ci info->is_dual_plane_block = bm.is_dual_plane != 0; 1350cc1dc7a3Sopenharmony_ci 1351cc1dc7a3Sopenharmony_ci info->partition_count = scb.partition_count; 1352cc1dc7a3Sopenharmony_ci info->partition_index = scb.partition_index; 1353cc1dc7a3Sopenharmony_ci info->dual_plane_component = scb.plane2_component; 1354cc1dc7a3Sopenharmony_ci 1355cc1dc7a3Sopenharmony_ci info->color_level_count = get_quant_level(scb.get_color_quant_mode()); 1356cc1dc7a3Sopenharmony_ci info->weight_level_count = get_quant_level(bm.get_weight_quant_mode()); 1357cc1dc7a3Sopenharmony_ci 1358cc1dc7a3Sopenharmony_ci // Unpack color endpoints for each active partition 1359cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < scb.partition_count; i++) 1360cc1dc7a3Sopenharmony_ci { 1361cc1dc7a3Sopenharmony_ci bool rgb_hdr; 1362cc1dc7a3Sopenharmony_ci bool a_hdr; 1363cc1dc7a3Sopenharmony_ci vint4 endpnt[2]; 1364cc1dc7a3Sopenharmony_ci 1365cc1dc7a3Sopenharmony_ci unpack_color_endpoints(ctx->config.profile, 1366cc1dc7a3Sopenharmony_ci scb.color_formats[i], 1367cc1dc7a3Sopenharmony_ci scb.color_values[i], 1368cc1dc7a3Sopenharmony_ci rgb_hdr, a_hdr, 1369cc1dc7a3Sopenharmony_ci endpnt[0], endpnt[1]); 1370cc1dc7a3Sopenharmony_ci 1371cc1dc7a3Sopenharmony_ci // Store the color endpoint mode info 1372cc1dc7a3Sopenharmony_ci info->color_endpoint_modes[i] = scb.color_formats[i]; 1373cc1dc7a3Sopenharmony_ci info->is_hdr_block = info->is_hdr_block || rgb_hdr || a_hdr; 1374cc1dc7a3Sopenharmony_ci 1375cc1dc7a3Sopenharmony_ci // Store the unpacked and decoded color endpoint 1376cc1dc7a3Sopenharmony_ci vmask4 hdr_mask(rgb_hdr, rgb_hdr, rgb_hdr, a_hdr); 1377cc1dc7a3Sopenharmony_ci for (int j = 0; j < 2; j++) 1378cc1dc7a3Sopenharmony_ci { 1379cc1dc7a3Sopenharmony_ci vint4 color_lns = lns_to_sf16(endpnt[j]); 1380cc1dc7a3Sopenharmony_ci vint4 color_unorm = unorm16_to_sf16(endpnt[j]); 1381cc1dc7a3Sopenharmony_ci vint4 datai = select(color_unorm, color_lns, hdr_mask); 1382cc1dc7a3Sopenharmony_ci store(float16_to_float(datai), info->color_endpoints[i][j]); 1383cc1dc7a3Sopenharmony_ci } 1384cc1dc7a3Sopenharmony_ci } 1385cc1dc7a3Sopenharmony_ci 1386cc1dc7a3Sopenharmony_ci // Unpack weights for each texel 1387cc1dc7a3Sopenharmony_ci int weight_plane1[BLOCK_MAX_TEXELS]; 1388cc1dc7a3Sopenharmony_ci int weight_plane2[BLOCK_MAX_TEXELS]; 1389cc1dc7a3Sopenharmony_ci 1390cc1dc7a3Sopenharmony_ci unpack_weights(bsd, scb, di, bm.is_dual_plane, weight_plane1, weight_plane2); 1391cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < bsd.texel_count; i++) 1392cc1dc7a3Sopenharmony_ci { 1393cc1dc7a3Sopenharmony_ci info->weight_values_plane1[i] = static_cast<float>(weight_plane1[i]) * (1.0f / WEIGHTS_TEXEL_SUM); 1394cc1dc7a3Sopenharmony_ci if (info->is_dual_plane_block) 1395cc1dc7a3Sopenharmony_ci { 1396cc1dc7a3Sopenharmony_ci info->weight_values_plane2[i] = static_cast<float>(weight_plane2[i]) * (1.0f / WEIGHTS_TEXEL_SUM); 1397cc1dc7a3Sopenharmony_ci } 1398cc1dc7a3Sopenharmony_ci } 1399cc1dc7a3Sopenharmony_ci 1400cc1dc7a3Sopenharmony_ci // Unpack partition assignments for each texel 1401cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < bsd.texel_count; i++) 1402cc1dc7a3Sopenharmony_ci { 1403cc1dc7a3Sopenharmony_ci info->partition_assignment[i] = pi.partition_of_texel[i]; 1404cc1dc7a3Sopenharmony_ci } 1405cc1dc7a3Sopenharmony_ci 1406cc1dc7a3Sopenharmony_ci return ASTCENC_SUCCESS; 1407cc1dc7a3Sopenharmony_ci#endif 1408cc1dc7a3Sopenharmony_ci} 1409cc1dc7a3Sopenharmony_ci 1410cc1dc7a3Sopenharmony_ci/* See header for documentation. */ 1411cc1dc7a3Sopenharmony_ciconst char* astcenc_get_error_string( 1412cc1dc7a3Sopenharmony_ci astcenc_error status 1413cc1dc7a3Sopenharmony_ci) { 1414cc1dc7a3Sopenharmony_ci // Values in this enum are from an external user, so not guaranteed to be 1415cc1dc7a3Sopenharmony_ci // bounded to the enum values 1416cc1dc7a3Sopenharmony_ci switch (static_cast<int>(status)) 1417cc1dc7a3Sopenharmony_ci { 1418cc1dc7a3Sopenharmony_ci case ASTCENC_SUCCESS: 1419cc1dc7a3Sopenharmony_ci return "ASTCENC_SUCCESS"; 1420cc1dc7a3Sopenharmony_ci case ASTCENC_ERR_OUT_OF_MEM: 1421cc1dc7a3Sopenharmony_ci return "ASTCENC_ERR_OUT_OF_MEM"; 1422cc1dc7a3Sopenharmony_ci case ASTCENC_ERR_BAD_CPU_FLOAT: 1423cc1dc7a3Sopenharmony_ci return "ASTCENC_ERR_BAD_CPU_FLOAT"; 1424cc1dc7a3Sopenharmony_ci case ASTCENC_ERR_BAD_PARAM: 1425cc1dc7a3Sopenharmony_ci return "ASTCENC_ERR_BAD_PARAM"; 1426cc1dc7a3Sopenharmony_ci case ASTCENC_ERR_BAD_BLOCK_SIZE: 1427cc1dc7a3Sopenharmony_ci return "ASTCENC_ERR_BAD_BLOCK_SIZE"; 1428cc1dc7a3Sopenharmony_ci case ASTCENC_ERR_BAD_PROFILE: 1429cc1dc7a3Sopenharmony_ci return "ASTCENC_ERR_BAD_PROFILE"; 1430cc1dc7a3Sopenharmony_ci case ASTCENC_ERR_BAD_QUALITY: 1431cc1dc7a3Sopenharmony_ci return "ASTCENC_ERR_BAD_QUALITY"; 1432cc1dc7a3Sopenharmony_ci case ASTCENC_ERR_BAD_FLAGS: 1433cc1dc7a3Sopenharmony_ci return "ASTCENC_ERR_BAD_FLAGS"; 1434cc1dc7a3Sopenharmony_ci case ASTCENC_ERR_BAD_SWIZZLE: 1435cc1dc7a3Sopenharmony_ci return "ASTCENC_ERR_BAD_SWIZZLE"; 1436cc1dc7a3Sopenharmony_ci case ASTCENC_ERR_BAD_CONTEXT: 1437cc1dc7a3Sopenharmony_ci return "ASTCENC_ERR_BAD_CONTEXT"; 1438cc1dc7a3Sopenharmony_ci case ASTCENC_ERR_NOT_IMPLEMENTED: 1439cc1dc7a3Sopenharmony_ci return "ASTCENC_ERR_NOT_IMPLEMENTED"; 1440cc1dc7a3Sopenharmony_ci case ASTCENC_ERR_BAD_DECODE_MODE: 1441cc1dc7a3Sopenharmony_ci return "ASTCENC_ERR_BAD_DECODE_MODE"; 1442cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DIAGNOSTICS) 1443cc1dc7a3Sopenharmony_ci case ASTCENC_ERR_DTRACE_FAILURE: 1444cc1dc7a3Sopenharmony_ci return "ASTCENC_ERR_DTRACE_FAILURE"; 1445cc1dc7a3Sopenharmony_ci#endif 1446cc1dc7a3Sopenharmony_ci default: 1447cc1dc7a3Sopenharmony_ci return nullptr; 1448cc1dc7a3Sopenharmony_ci } 1449cc1dc7a3Sopenharmony_ci} 1450