1cc1dc7a3Sopenharmony_ci// SPDX-License-Identifier: Apache-2.0
2cc1dc7a3Sopenharmony_ci// ----------------------------------------------------------------------------
3cc1dc7a3Sopenharmony_ci// Copyright 2011-2024 Arm Limited
4cc1dc7a3Sopenharmony_ci//
5cc1dc7a3Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6cc1dc7a3Sopenharmony_ci// use this file except in compliance with the License. You may obtain a copy
7cc1dc7a3Sopenharmony_ci// of the License at:
8cc1dc7a3Sopenharmony_ci//
9cc1dc7a3Sopenharmony_ci//     http://www.apache.org/licenses/LICENSE-2.0
10cc1dc7a3Sopenharmony_ci//
11cc1dc7a3Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software
12cc1dc7a3Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13cc1dc7a3Sopenharmony_ci// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14cc1dc7a3Sopenharmony_ci// License for the specific language governing permissions and limitations
15cc1dc7a3Sopenharmony_ci// under the License.
16cc1dc7a3Sopenharmony_ci// ----------------------------------------------------------------------------
17cc1dc7a3Sopenharmony_ci
18cc1dc7a3Sopenharmony_ci/**
19cc1dc7a3Sopenharmony_ci * @brief Functions for the library entrypoint.
20cc1dc7a3Sopenharmony_ci */
21cc1dc7a3Sopenharmony_ci
22cc1dc7a3Sopenharmony_ci#include <array>
23cc1dc7a3Sopenharmony_ci#include <cstring>
24cc1dc7a3Sopenharmony_ci#include <new>
25cc1dc7a3Sopenharmony_ci
26cc1dc7a3Sopenharmony_ci#include "astcenc.h"
27cc1dc7a3Sopenharmony_ci#include "astcenc_internal_entry.h"
28cc1dc7a3Sopenharmony_ci#include "astcenc_diagnostic_trace.h"
29cc1dc7a3Sopenharmony_ci
30cc1dc7a3Sopenharmony_ci/**
31cc1dc7a3Sopenharmony_ci * @brief Record of the quality tuning parameter values.
32cc1dc7a3Sopenharmony_ci *
33cc1dc7a3Sopenharmony_ci * See the @c astcenc_config structure for detailed parameter documentation.
34cc1dc7a3Sopenharmony_ci *
35cc1dc7a3Sopenharmony_ci * Note that the mse_overshoot entries are scaling factors relative to the base MSE to hit db_limit.
36cc1dc7a3Sopenharmony_ci * A 20% overshoot is harder to hit for a higher base db_limit, so we may actually use lower ratios
37cc1dc7a3Sopenharmony_ci * for the more through search presets because the underlying db_limit is so much higher.
38cc1dc7a3Sopenharmony_ci */
39cc1dc7a3Sopenharmony_cistruct astcenc_preset_config
40cc1dc7a3Sopenharmony_ci{
41cc1dc7a3Sopenharmony_ci	float quality;
42cc1dc7a3Sopenharmony_ci	unsigned int tune_partition_count_limit;
43cc1dc7a3Sopenharmony_ci	unsigned int tune_2partition_index_limit;
44cc1dc7a3Sopenharmony_ci	unsigned int tune_3partition_index_limit;
45cc1dc7a3Sopenharmony_ci	unsigned int tune_4partition_index_limit;
46cc1dc7a3Sopenharmony_ci	unsigned int tune_block_mode_limit;
47cc1dc7a3Sopenharmony_ci	unsigned int tune_refinement_limit;
48cc1dc7a3Sopenharmony_ci	unsigned int tune_candidate_limit;
49cc1dc7a3Sopenharmony_ci	unsigned int tune_2partitioning_candidate_limit;
50cc1dc7a3Sopenharmony_ci	unsigned int tune_3partitioning_candidate_limit;
51cc1dc7a3Sopenharmony_ci	unsigned int tune_4partitioning_candidate_limit;
52cc1dc7a3Sopenharmony_ci	float tune_db_limit_a_base;
53cc1dc7a3Sopenharmony_ci	float tune_db_limit_b_base;
54cc1dc7a3Sopenharmony_ci	float tune_mse_overshoot;
55cc1dc7a3Sopenharmony_ci	float tune_2partition_early_out_limit_factor;
56cc1dc7a3Sopenharmony_ci	float tune_3partition_early_out_limit_factor;
57cc1dc7a3Sopenharmony_ci	float tune_2plane_early_out_limit_correlation;
58cc1dc7a3Sopenharmony_ci	float tune_search_mode0_enable;
59cc1dc7a3Sopenharmony_ci};
60cc1dc7a3Sopenharmony_ci
61cc1dc7a3Sopenharmony_ci/**
62cc1dc7a3Sopenharmony_ci * @brief The static presets for high bandwidth encodings (x < 25 texels per block).
63cc1dc7a3Sopenharmony_ci */
64cc1dc7a3Sopenharmony_cistatic const std::array<astcenc_preset_config, 6> preset_configs_high {{
65cc1dc7a3Sopenharmony_ci	{
66cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_FASTEST,
67cc1dc7a3Sopenharmony_ci		2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 0.0f
68cc1dc7a3Sopenharmony_ci	}, {
69cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_FAST,
70cc1dc7a3Sopenharmony_ci		3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f, 0.0f
71cc1dc7a3Sopenharmony_ci	}, {
72cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_MEDIUM,
73cc1dc7a3Sopenharmony_ci		4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f, 0.0f
74cc1dc7a3Sopenharmony_ci	}, {
75cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_THOROUGH,
76cc1dc7a3Sopenharmony_ci		4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f, 0.0f
77cc1dc7a3Sopenharmony_ci	}, {
78cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_VERYTHOROUGH,
79cc1dc7a3Sopenharmony_ci		4, 256, 128, 64, 98, 4, 6, 8, 6, 4, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f
80cc1dc7a3Sopenharmony_ci	}, {
81cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_EXHAUSTIVE,
82cc1dc7a3Sopenharmony_ci		4, 512, 512, 512, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f
83cc1dc7a3Sopenharmony_ci	}
84cc1dc7a3Sopenharmony_ci}};
85cc1dc7a3Sopenharmony_ci
86cc1dc7a3Sopenharmony_ci/**
87cc1dc7a3Sopenharmony_ci * @brief The static presets for medium bandwidth encodings (25 <= x < 64 texels per block).
88cc1dc7a3Sopenharmony_ci */
89cc1dc7a3Sopenharmony_cistatic const std::array<astcenc_preset_config, 6> preset_configs_mid {{
90cc1dc7a3Sopenharmony_ci	{
91cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_FASTEST,
92cc1dc7a3Sopenharmony_ci		2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f
93cc1dc7a3Sopenharmony_ci	}, {
94cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_FAST,
95cc1dc7a3Sopenharmony_ci		3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f
96cc1dc7a3Sopenharmony_ci	}, {
97cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_MEDIUM,
98cc1dc7a3Sopenharmony_ci		3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f, 1.0f
99cc1dc7a3Sopenharmony_ci	}, {
100cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_THOROUGH,
101cc1dc7a3Sopenharmony_ci		4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f, 0.0f
102cc1dc7a3Sopenharmony_ci	}, {
103cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_VERYTHOROUGH,
104cc1dc7a3Sopenharmony_ci		4, 256, 128, 64, 98, 4, 6, 8, 6, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f
105cc1dc7a3Sopenharmony_ci	}, {
106cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_EXHAUSTIVE,
107cc1dc7a3Sopenharmony_ci		4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f
108cc1dc7a3Sopenharmony_ci	}
109cc1dc7a3Sopenharmony_ci}};
110cc1dc7a3Sopenharmony_ci
111cc1dc7a3Sopenharmony_ci/**
112cc1dc7a3Sopenharmony_ci * @brief The static presets for low bandwidth encodings (64 <= x texels per block).
113cc1dc7a3Sopenharmony_ci */
114cc1dc7a3Sopenharmony_cistatic const std::array<astcenc_preset_config, 6> preset_configs_low {{
115cc1dc7a3Sopenharmony_ci	{
116cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_FASTEST,
117cc1dc7a3Sopenharmony_ci		2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f
118cc1dc7a3Sopenharmony_ci	}, {
119cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_FAST,
120cc1dc7a3Sopenharmony_ci		2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f
121cc1dc7a3Sopenharmony_ci	}, {
122cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_MEDIUM,
123cc1dc7a3Sopenharmony_ci		3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f, 1.0f
124cc1dc7a3Sopenharmony_ci	}, {
125cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_THOROUGH,
126cc1dc7a3Sopenharmony_ci		4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f, 1.0f
127cc1dc7a3Sopenharmony_ci	}, {
128cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_VERYTHOROUGH,
129cc1dc7a3Sopenharmony_ci		4, 256, 128, 64, 98, 4, 6, 8, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 1.0f
130cc1dc7a3Sopenharmony_ci	}, {
131cc1dc7a3Sopenharmony_ci		ASTCENC_PRE_EXHAUSTIVE,
132cc1dc7a3Sopenharmony_ci		4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 1.0f
133cc1dc7a3Sopenharmony_ci	}
134cc1dc7a3Sopenharmony_ci}};
135cc1dc7a3Sopenharmony_ci
136cc1dc7a3Sopenharmony_ci/**
137cc1dc7a3Sopenharmony_ci * @brief Validate CPU floating point meets assumptions made in the codec.
138cc1dc7a3Sopenharmony_ci *
139cc1dc7a3Sopenharmony_ci * The codec is written with the assumption that a float threaded through the @c if32 union will be
140cc1dc7a3Sopenharmony_ci * stored and reloaded as a 32-bit IEEE-754 float with round-to-nearest rounding. This is always the
141cc1dc7a3Sopenharmony_ci * case in an IEEE-754 compliant system, however not every system or compilation mode is actually
142cc1dc7a3Sopenharmony_ci * IEEE-754 compliant. This normally fails if the code is compiled with fast math enabled.
143cc1dc7a3Sopenharmony_ci *
144cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
145cc1dc7a3Sopenharmony_ci */
146cc1dc7a3Sopenharmony_cistatic astcenc_error validate_cpu_float()
147cc1dc7a3Sopenharmony_ci{
148cc1dc7a3Sopenharmony_ci	if32 p;
149cc1dc7a3Sopenharmony_ci	volatile float xprec_testval = 2.51f;
150cc1dc7a3Sopenharmony_ci	p.f = xprec_testval + 12582912.0f;
151cc1dc7a3Sopenharmony_ci	float q = p.f - 12582912.0f;
152cc1dc7a3Sopenharmony_ci
153cc1dc7a3Sopenharmony_ci	if (q != 3.0f)
154cc1dc7a3Sopenharmony_ci	{
155cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_CPU_FLOAT;
156cc1dc7a3Sopenharmony_ci	}
157cc1dc7a3Sopenharmony_ci
158cc1dc7a3Sopenharmony_ci	return ASTCENC_SUCCESS;
159cc1dc7a3Sopenharmony_ci}
160cc1dc7a3Sopenharmony_ci
161cc1dc7a3Sopenharmony_ci/**
162cc1dc7a3Sopenharmony_ci * @brief Validate config profile.
163cc1dc7a3Sopenharmony_ci *
164cc1dc7a3Sopenharmony_ci * @param profile   The profile to check.
165cc1dc7a3Sopenharmony_ci *
166cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
167cc1dc7a3Sopenharmony_ci */
168cc1dc7a3Sopenharmony_cistatic astcenc_error validate_profile(
169cc1dc7a3Sopenharmony_ci	astcenc_profile profile
170cc1dc7a3Sopenharmony_ci) {
171cc1dc7a3Sopenharmony_ci	// Values in this enum are from an external user, so not guaranteed to be
172cc1dc7a3Sopenharmony_ci	// bounded to the enum values
173cc1dc7a3Sopenharmony_ci	switch (static_cast<int>(profile))
174cc1dc7a3Sopenharmony_ci	{
175cc1dc7a3Sopenharmony_ci	case ASTCENC_PRF_LDR_SRGB:
176cc1dc7a3Sopenharmony_ci	case ASTCENC_PRF_LDR:
177cc1dc7a3Sopenharmony_ci	case ASTCENC_PRF_HDR_RGB_LDR_A:
178cc1dc7a3Sopenharmony_ci	case ASTCENC_PRF_HDR:
179cc1dc7a3Sopenharmony_ci		return ASTCENC_SUCCESS;
180cc1dc7a3Sopenharmony_ci	default:
181cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_PROFILE;
182cc1dc7a3Sopenharmony_ci	}
183cc1dc7a3Sopenharmony_ci}
184cc1dc7a3Sopenharmony_ci
185cc1dc7a3Sopenharmony_ci/**
186cc1dc7a3Sopenharmony_ci * @brief Validate block size.
187cc1dc7a3Sopenharmony_ci *
188cc1dc7a3Sopenharmony_ci * @param block_x   The block x dimensions.
189cc1dc7a3Sopenharmony_ci * @param block_y   The block y dimensions.
190cc1dc7a3Sopenharmony_ci * @param block_z   The block z dimensions.
191cc1dc7a3Sopenharmony_ci *
192cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
193cc1dc7a3Sopenharmony_ci */
194cc1dc7a3Sopenharmony_cistatic astcenc_error validate_block_size(
195cc1dc7a3Sopenharmony_ci	unsigned int block_x,
196cc1dc7a3Sopenharmony_ci	unsigned int block_y,
197cc1dc7a3Sopenharmony_ci	unsigned int block_z
198cc1dc7a3Sopenharmony_ci) {
199cc1dc7a3Sopenharmony_ci	// Test if this is a legal block size at all
200cc1dc7a3Sopenharmony_ci	bool is_legal = (((block_z <= 1) && is_legal_2d_block_size(block_x, block_y)) ||
201cc1dc7a3Sopenharmony_ci	                 ((block_z >= 2) && is_legal_3d_block_size(block_x, block_y, block_z)));
202cc1dc7a3Sopenharmony_ci	if (!is_legal)
203cc1dc7a3Sopenharmony_ci	{
204cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_BLOCK_SIZE;
205cc1dc7a3Sopenharmony_ci	}
206cc1dc7a3Sopenharmony_ci
207cc1dc7a3Sopenharmony_ci	// Test if this build has sufficient capacity for this block size
208cc1dc7a3Sopenharmony_ci	bool have_capacity = (block_x * block_y * block_z) <= BLOCK_MAX_TEXELS;
209cc1dc7a3Sopenharmony_ci	if (!have_capacity)
210cc1dc7a3Sopenharmony_ci	{
211cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_NOT_IMPLEMENTED;
212cc1dc7a3Sopenharmony_ci	}
213cc1dc7a3Sopenharmony_ci
214cc1dc7a3Sopenharmony_ci	return ASTCENC_SUCCESS;
215cc1dc7a3Sopenharmony_ci}
216cc1dc7a3Sopenharmony_ci
217cc1dc7a3Sopenharmony_ci/**
218cc1dc7a3Sopenharmony_ci * @brief Validate flags.
219cc1dc7a3Sopenharmony_ci *
220cc1dc7a3Sopenharmony_ci * @param profile   The profile to check.
221cc1dc7a3Sopenharmony_ci * @param flags     The flags to check.
222cc1dc7a3Sopenharmony_ci *
223cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
224cc1dc7a3Sopenharmony_ci */
225cc1dc7a3Sopenharmony_cistatic astcenc_error validate_flags(
226cc1dc7a3Sopenharmony_ci	astcenc_profile profile,
227cc1dc7a3Sopenharmony_ci	unsigned int flags
228cc1dc7a3Sopenharmony_ci) {
229cc1dc7a3Sopenharmony_ci	// Flags field must not contain any unknown flag bits
230cc1dc7a3Sopenharmony_ci	unsigned int exMask = ~ASTCENC_ALL_FLAGS;
231cc1dc7a3Sopenharmony_ci	if (popcount(flags & exMask) != 0)
232cc1dc7a3Sopenharmony_ci	{
233cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_FLAGS;
234cc1dc7a3Sopenharmony_ci	}
235cc1dc7a3Sopenharmony_ci
236cc1dc7a3Sopenharmony_ci	// Flags field must only contain at most a single map type
237cc1dc7a3Sopenharmony_ci	exMask = ASTCENC_FLG_MAP_NORMAL
238cc1dc7a3Sopenharmony_ci	       | ASTCENC_FLG_MAP_RGBM;
239cc1dc7a3Sopenharmony_ci	if (popcount(flags & exMask) > 1)
240cc1dc7a3Sopenharmony_ci	{
241cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_FLAGS;
242cc1dc7a3Sopenharmony_ci	}
243cc1dc7a3Sopenharmony_ci
244cc1dc7a3Sopenharmony_ci	// Decode_unorm8 must only be used with an LDR profile
245cc1dc7a3Sopenharmony_ci	bool is_unorm8 = flags & ASTCENC_FLG_USE_DECODE_UNORM8;
246cc1dc7a3Sopenharmony_ci	bool is_hdr = (profile == ASTCENC_PRF_HDR) || (profile == ASTCENC_PRF_HDR_RGB_LDR_A);
247cc1dc7a3Sopenharmony_ci	if (is_unorm8 && is_hdr)
248cc1dc7a3Sopenharmony_ci	{
249cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_DECODE_MODE;
250cc1dc7a3Sopenharmony_ci	}
251cc1dc7a3Sopenharmony_ci
252cc1dc7a3Sopenharmony_ci	return ASTCENC_SUCCESS;
253cc1dc7a3Sopenharmony_ci}
254cc1dc7a3Sopenharmony_ci
255cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY)
256cc1dc7a3Sopenharmony_ci
257cc1dc7a3Sopenharmony_ci/**
258cc1dc7a3Sopenharmony_ci * @brief Validate single channel compression swizzle.
259cc1dc7a3Sopenharmony_ci *
260cc1dc7a3Sopenharmony_ci * @param swizzle   The swizzle to check.
261cc1dc7a3Sopenharmony_ci *
262cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
263cc1dc7a3Sopenharmony_ci */
264cc1dc7a3Sopenharmony_cistatic astcenc_error validate_compression_swz(
265cc1dc7a3Sopenharmony_ci	astcenc_swz swizzle
266cc1dc7a3Sopenharmony_ci) {
267cc1dc7a3Sopenharmony_ci	// Not all enum values are handled; SWZ_Z is invalid for compression
268cc1dc7a3Sopenharmony_ci	switch (static_cast<int>(swizzle))
269cc1dc7a3Sopenharmony_ci	{
270cc1dc7a3Sopenharmony_ci	case ASTCENC_SWZ_R:
271cc1dc7a3Sopenharmony_ci	case ASTCENC_SWZ_G:
272cc1dc7a3Sopenharmony_ci	case ASTCENC_SWZ_B:
273cc1dc7a3Sopenharmony_ci	case ASTCENC_SWZ_A:
274cc1dc7a3Sopenharmony_ci	case ASTCENC_SWZ_0:
275cc1dc7a3Sopenharmony_ci	case ASTCENC_SWZ_1:
276cc1dc7a3Sopenharmony_ci		return ASTCENC_SUCCESS;
277cc1dc7a3Sopenharmony_ci	default:
278cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_SWIZZLE;
279cc1dc7a3Sopenharmony_ci	}
280cc1dc7a3Sopenharmony_ci}
281cc1dc7a3Sopenharmony_ci
282cc1dc7a3Sopenharmony_ci/**
283cc1dc7a3Sopenharmony_ci * @brief Validate overall compression swizzle.
284cc1dc7a3Sopenharmony_ci *
285cc1dc7a3Sopenharmony_ci * @param swizzle   The swizzle to check.
286cc1dc7a3Sopenharmony_ci *
287cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
288cc1dc7a3Sopenharmony_ci */
289cc1dc7a3Sopenharmony_cistatic astcenc_error validate_compression_swizzle(
290cc1dc7a3Sopenharmony_ci	const astcenc_swizzle& swizzle
291cc1dc7a3Sopenharmony_ci) {
292cc1dc7a3Sopenharmony_ci	if (validate_compression_swz(swizzle.r) ||
293cc1dc7a3Sopenharmony_ci	    validate_compression_swz(swizzle.g) ||
294cc1dc7a3Sopenharmony_ci	    validate_compression_swz(swizzle.b) ||
295cc1dc7a3Sopenharmony_ci	    validate_compression_swz(swizzle.a))
296cc1dc7a3Sopenharmony_ci	{
297cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_SWIZZLE;
298cc1dc7a3Sopenharmony_ci	}
299cc1dc7a3Sopenharmony_ci
300cc1dc7a3Sopenharmony_ci	return ASTCENC_SUCCESS;
301cc1dc7a3Sopenharmony_ci}
302cc1dc7a3Sopenharmony_ci#endif
303cc1dc7a3Sopenharmony_ci
304cc1dc7a3Sopenharmony_ci/**
305cc1dc7a3Sopenharmony_ci * @brief Validate single channel decompression swizzle.
306cc1dc7a3Sopenharmony_ci *
307cc1dc7a3Sopenharmony_ci * @param swizzle   The swizzle to check.
308cc1dc7a3Sopenharmony_ci *
309cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
310cc1dc7a3Sopenharmony_ci */
311cc1dc7a3Sopenharmony_cistatic astcenc_error validate_decompression_swz(
312cc1dc7a3Sopenharmony_ci	astcenc_swz swizzle
313cc1dc7a3Sopenharmony_ci) {
314cc1dc7a3Sopenharmony_ci	// Values in this enum are from an external user, so not guaranteed to be
315cc1dc7a3Sopenharmony_ci	// bounded to the enum values
316cc1dc7a3Sopenharmony_ci	switch (static_cast<int>(swizzle))
317cc1dc7a3Sopenharmony_ci	{
318cc1dc7a3Sopenharmony_ci	case ASTCENC_SWZ_R:
319cc1dc7a3Sopenharmony_ci	case ASTCENC_SWZ_G:
320cc1dc7a3Sopenharmony_ci	case ASTCENC_SWZ_B:
321cc1dc7a3Sopenharmony_ci	case ASTCENC_SWZ_A:
322cc1dc7a3Sopenharmony_ci	case ASTCENC_SWZ_0:
323cc1dc7a3Sopenharmony_ci	case ASTCENC_SWZ_1:
324cc1dc7a3Sopenharmony_ci	case ASTCENC_SWZ_Z:
325cc1dc7a3Sopenharmony_ci		return ASTCENC_SUCCESS;
326cc1dc7a3Sopenharmony_ci	default:
327cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_SWIZZLE;
328cc1dc7a3Sopenharmony_ci	}
329cc1dc7a3Sopenharmony_ci}
330cc1dc7a3Sopenharmony_ci
331cc1dc7a3Sopenharmony_ci/**
332cc1dc7a3Sopenharmony_ci * @brief Validate overall decompression swizzle.
333cc1dc7a3Sopenharmony_ci *
334cc1dc7a3Sopenharmony_ci * @param swizzle   The swizzle to check.
335cc1dc7a3Sopenharmony_ci *
336cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
337cc1dc7a3Sopenharmony_ci */
338cc1dc7a3Sopenharmony_cistatic astcenc_error validate_decompression_swizzle(
339cc1dc7a3Sopenharmony_ci	const astcenc_swizzle& swizzle
340cc1dc7a3Sopenharmony_ci) {
341cc1dc7a3Sopenharmony_ci	if (validate_decompression_swz(swizzle.r) ||
342cc1dc7a3Sopenharmony_ci	    validate_decompression_swz(swizzle.g) ||
343cc1dc7a3Sopenharmony_ci	    validate_decompression_swz(swizzle.b) ||
344cc1dc7a3Sopenharmony_ci	    validate_decompression_swz(swizzle.a))
345cc1dc7a3Sopenharmony_ci	{
346cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_SWIZZLE;
347cc1dc7a3Sopenharmony_ci	}
348cc1dc7a3Sopenharmony_ci
349cc1dc7a3Sopenharmony_ci	return ASTCENC_SUCCESS;
350cc1dc7a3Sopenharmony_ci}
351cc1dc7a3Sopenharmony_ci
352cc1dc7a3Sopenharmony_ci/**
353cc1dc7a3Sopenharmony_ci * Validate that an incoming configuration is in-spec.
354cc1dc7a3Sopenharmony_ci *
355cc1dc7a3Sopenharmony_ci * This function can respond in two ways:
356cc1dc7a3Sopenharmony_ci *
357cc1dc7a3Sopenharmony_ci *   * Numerical inputs that have valid ranges are clamped to those valid ranges. No error is thrown
358cc1dc7a3Sopenharmony_ci *     for out-of-range inputs in this case.
359cc1dc7a3Sopenharmony_ci *   * Numerical inputs and logic inputs are are logically invalid and which make no sense
360cc1dc7a3Sopenharmony_ci *     algorithmically will return an error.
361cc1dc7a3Sopenharmony_ci *
362cc1dc7a3Sopenharmony_ci * @param[in,out] config   The input compressor configuration.
363cc1dc7a3Sopenharmony_ci *
364cc1dc7a3Sopenharmony_ci * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
365cc1dc7a3Sopenharmony_ci */
366cc1dc7a3Sopenharmony_cistatic astcenc_error validate_config(
367cc1dc7a3Sopenharmony_ci	astcenc_config &config
368cc1dc7a3Sopenharmony_ci) {
369cc1dc7a3Sopenharmony_ci	astcenc_error status;
370cc1dc7a3Sopenharmony_ci
371cc1dc7a3Sopenharmony_ci	status = validate_profile(config.profile);
372cc1dc7a3Sopenharmony_ci	if (status != ASTCENC_SUCCESS)
373cc1dc7a3Sopenharmony_ci	{
374cc1dc7a3Sopenharmony_ci		return status;
375cc1dc7a3Sopenharmony_ci	}
376cc1dc7a3Sopenharmony_ci
377cc1dc7a3Sopenharmony_ci	status = validate_flags(config.profile, config.flags);
378cc1dc7a3Sopenharmony_ci	if (status != ASTCENC_SUCCESS)
379cc1dc7a3Sopenharmony_ci	{
380cc1dc7a3Sopenharmony_ci		return status;
381cc1dc7a3Sopenharmony_ci	}
382cc1dc7a3Sopenharmony_ci
383cc1dc7a3Sopenharmony_ci	status = validate_block_size(config.block_x, config.block_y, config.block_z);
384cc1dc7a3Sopenharmony_ci	if (status != ASTCENC_SUCCESS)
385cc1dc7a3Sopenharmony_ci	{
386cc1dc7a3Sopenharmony_ci		return status;
387cc1dc7a3Sopenharmony_ci	}
388cc1dc7a3Sopenharmony_ci
389cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DECOMPRESS_ONLY)
390cc1dc7a3Sopenharmony_ci	// Decompress-only builds only support decompress-only contexts
391cc1dc7a3Sopenharmony_ci	if (!(config.flags & ASTCENC_FLG_DECOMPRESS_ONLY))
392cc1dc7a3Sopenharmony_ci	{
393cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_PARAM;
394cc1dc7a3Sopenharmony_ci	}
395cc1dc7a3Sopenharmony_ci#endif
396cc1dc7a3Sopenharmony_ci
397cc1dc7a3Sopenharmony_ci	config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f);
398cc1dc7a3Sopenharmony_ci
399cc1dc7a3Sopenharmony_ci	config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u);
400cc1dc7a3Sopenharmony_ci	config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
401cc1dc7a3Sopenharmony_ci	config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
402cc1dc7a3Sopenharmony_ci	config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
403cc1dc7a3Sopenharmony_ci	config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u);
404cc1dc7a3Sopenharmony_ci	config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u);
405cc1dc7a3Sopenharmony_ci	config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES);
406cc1dc7a3Sopenharmony_ci	config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
407cc1dc7a3Sopenharmony_ci	config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
408cc1dc7a3Sopenharmony_ci	config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
409cc1dc7a3Sopenharmony_ci	config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f);
410cc1dc7a3Sopenharmony_ci	config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f);
411cc1dc7a3Sopenharmony_ci	config.tune_2partition_early_out_limit_factor = astc::max(config.tune_2partition_early_out_limit_factor, 0.0f);
412cc1dc7a3Sopenharmony_ci	config.tune_3partition_early_out_limit_factor = astc::max(config.tune_3partition_early_out_limit_factor, 0.0f);
413cc1dc7a3Sopenharmony_ci	config.tune_2plane_early_out_limit_correlation = astc::max(config.tune_2plane_early_out_limit_correlation, 0.0f);
414cc1dc7a3Sopenharmony_ci
415cc1dc7a3Sopenharmony_ci	// Specifying a zero weight color component is not allowed; force to small value
416cc1dc7a3Sopenharmony_ci	float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight),
417cc1dc7a3Sopenharmony_ci	                             astc::max(config.cw_b_weight, config.cw_a_weight));
418cc1dc7a3Sopenharmony_ci	if (max_weight > 0.0f)
419cc1dc7a3Sopenharmony_ci	{
420cc1dc7a3Sopenharmony_ci		max_weight /= 1000.0f;
421cc1dc7a3Sopenharmony_ci		config.cw_r_weight = astc::max(config.cw_r_weight, max_weight);
422cc1dc7a3Sopenharmony_ci		config.cw_g_weight = astc::max(config.cw_g_weight, max_weight);
423cc1dc7a3Sopenharmony_ci		config.cw_b_weight = astc::max(config.cw_b_weight, max_weight);
424cc1dc7a3Sopenharmony_ci		config.cw_a_weight = astc::max(config.cw_a_weight, max_weight);
425cc1dc7a3Sopenharmony_ci	}
426cc1dc7a3Sopenharmony_ci	// If all color components error weights are zero then return an error
427cc1dc7a3Sopenharmony_ci	else
428cc1dc7a3Sopenharmony_ci	{
429cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_PARAM;
430cc1dc7a3Sopenharmony_ci	}
431cc1dc7a3Sopenharmony_ci
432cc1dc7a3Sopenharmony_ci	return ASTCENC_SUCCESS;
433cc1dc7a3Sopenharmony_ci}
434cc1dc7a3Sopenharmony_ci
435cc1dc7a3Sopenharmony_ci/* See header for documentation. */
436cc1dc7a3Sopenharmony_ciastcenc_error astcenc_config_init(
437cc1dc7a3Sopenharmony_ci	astcenc_profile profile,
438cc1dc7a3Sopenharmony_ci	unsigned int block_x,
439cc1dc7a3Sopenharmony_ci	unsigned int block_y,
440cc1dc7a3Sopenharmony_ci	unsigned int block_z,
441cc1dc7a3Sopenharmony_ci	float quality,
442cc1dc7a3Sopenharmony_ci	unsigned int flags,
443cc1dc7a3Sopenharmony_ci	astcenc_config* configp
444cc1dc7a3Sopenharmony_ci) {
445cc1dc7a3Sopenharmony_ci	astcenc_error status;
446cc1dc7a3Sopenharmony_ci
447cc1dc7a3Sopenharmony_ci	status = validate_cpu_float();
448cc1dc7a3Sopenharmony_ci	if (status != ASTCENC_SUCCESS)
449cc1dc7a3Sopenharmony_ci	{
450cc1dc7a3Sopenharmony_ci		return status;
451cc1dc7a3Sopenharmony_ci	}
452cc1dc7a3Sopenharmony_ci
453cc1dc7a3Sopenharmony_ci	// Zero init all config fields; although most of will be over written
454cc1dc7a3Sopenharmony_ci	astcenc_config& config = *configp;
455cc1dc7a3Sopenharmony_ci	std::memset(&config, 0, sizeof(config));
456cc1dc7a3Sopenharmony_ci
457cc1dc7a3Sopenharmony_ci	// Process the block size
458cc1dc7a3Sopenharmony_ci	block_z = astc::max(block_z, 1u); // For 2D blocks Z==0 is accepted, but convert to 1
459cc1dc7a3Sopenharmony_ci	status = validate_block_size(block_x, block_y, block_z);
460cc1dc7a3Sopenharmony_ci	if (status != ASTCENC_SUCCESS)
461cc1dc7a3Sopenharmony_ci	{
462cc1dc7a3Sopenharmony_ci		return status;
463cc1dc7a3Sopenharmony_ci	}
464cc1dc7a3Sopenharmony_ci
465cc1dc7a3Sopenharmony_ci	config.block_x = block_x;
466cc1dc7a3Sopenharmony_ci	config.block_y = block_y;
467cc1dc7a3Sopenharmony_ci	config.block_z = block_z;
468cc1dc7a3Sopenharmony_ci
469cc1dc7a3Sopenharmony_ci	float texels = static_cast<float>(block_x * block_y * block_z);
470cc1dc7a3Sopenharmony_ci	float ltexels = logf(texels) / logf(10.0f);
471cc1dc7a3Sopenharmony_ci
472cc1dc7a3Sopenharmony_ci	// Process the performance quality level or preset; note that this must be done before we
473cc1dc7a3Sopenharmony_ci	// process any additional settings, such as color profile and flags, which may replace some of
474cc1dc7a3Sopenharmony_ci	// these settings with more use case tuned values
475cc1dc7a3Sopenharmony_ci	if (quality < ASTCENC_PRE_FASTEST ||
476cc1dc7a3Sopenharmony_ci	    quality > ASTCENC_PRE_EXHAUSTIVE)
477cc1dc7a3Sopenharmony_ci	{
478cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_QUALITY;
479cc1dc7a3Sopenharmony_ci	}
480cc1dc7a3Sopenharmony_ci
481cc1dc7a3Sopenharmony_ci	static const std::array<astcenc_preset_config, 6>* preset_configs;
482cc1dc7a3Sopenharmony_ci	int texels_int = block_x * block_y * block_z;
483cc1dc7a3Sopenharmony_ci	if (texels_int < 25)
484cc1dc7a3Sopenharmony_ci	{
485cc1dc7a3Sopenharmony_ci		preset_configs = &preset_configs_high;
486cc1dc7a3Sopenharmony_ci	}
487cc1dc7a3Sopenharmony_ci	else if (texels_int < 64)
488cc1dc7a3Sopenharmony_ci	{
489cc1dc7a3Sopenharmony_ci		preset_configs = &preset_configs_mid;
490cc1dc7a3Sopenharmony_ci	}
491cc1dc7a3Sopenharmony_ci	else
492cc1dc7a3Sopenharmony_ci	{
493cc1dc7a3Sopenharmony_ci		preset_configs = &preset_configs_low;
494cc1dc7a3Sopenharmony_ci	}
495cc1dc7a3Sopenharmony_ci
496cc1dc7a3Sopenharmony_ci	// Determine which preset to use, or which pair to interpolate
497cc1dc7a3Sopenharmony_ci	size_t start;
498cc1dc7a3Sopenharmony_ci	size_t end;
499cc1dc7a3Sopenharmony_ci	for (end = 0; end < preset_configs->size(); end++)
500cc1dc7a3Sopenharmony_ci	{
501cc1dc7a3Sopenharmony_ci		if ((*preset_configs)[end].quality >= quality)
502cc1dc7a3Sopenharmony_ci		{
503cc1dc7a3Sopenharmony_ci			break;
504cc1dc7a3Sopenharmony_ci		}
505cc1dc7a3Sopenharmony_ci	}
506cc1dc7a3Sopenharmony_ci
507cc1dc7a3Sopenharmony_ci	start = end == 0 ? 0 : end - 1;
508cc1dc7a3Sopenharmony_ci
509cc1dc7a3Sopenharmony_ci	// Start and end node are the same - so just transfer the values.
510cc1dc7a3Sopenharmony_ci	if (start == end)
511cc1dc7a3Sopenharmony_ci	{
512cc1dc7a3Sopenharmony_ci		config.tune_partition_count_limit = (*preset_configs)[start].tune_partition_count_limit;
513cc1dc7a3Sopenharmony_ci		config.tune_2partition_index_limit = (*preset_configs)[start].tune_2partition_index_limit;
514cc1dc7a3Sopenharmony_ci		config.tune_3partition_index_limit = (*preset_configs)[start].tune_3partition_index_limit;
515cc1dc7a3Sopenharmony_ci		config.tune_4partition_index_limit = (*preset_configs)[start].tune_4partition_index_limit;
516cc1dc7a3Sopenharmony_ci		config.tune_block_mode_limit = (*preset_configs)[start].tune_block_mode_limit;
517cc1dc7a3Sopenharmony_ci		config.tune_refinement_limit = (*preset_configs)[start].tune_refinement_limit;
518cc1dc7a3Sopenharmony_ci		config.tune_candidate_limit = (*preset_configs)[start].tune_candidate_limit;
519cc1dc7a3Sopenharmony_ci		config.tune_2partitioning_candidate_limit = (*preset_configs)[start].tune_2partitioning_candidate_limit;
520cc1dc7a3Sopenharmony_ci		config.tune_3partitioning_candidate_limit = (*preset_configs)[start].tune_3partitioning_candidate_limit;
521cc1dc7a3Sopenharmony_ci		config.tune_4partitioning_candidate_limit = (*preset_configs)[start].tune_4partitioning_candidate_limit;
522cc1dc7a3Sopenharmony_ci		config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels,
523cc1dc7a3Sopenharmony_ci		                                 (*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels);
524cc1dc7a3Sopenharmony_ci
525cc1dc7a3Sopenharmony_ci		config.tune_mse_overshoot = (*preset_configs)[start].tune_mse_overshoot;
526cc1dc7a3Sopenharmony_ci
527cc1dc7a3Sopenharmony_ci		config.tune_2partition_early_out_limit_factor = (*preset_configs)[start].tune_2partition_early_out_limit_factor;
528cc1dc7a3Sopenharmony_ci		config.tune_3partition_early_out_limit_factor = (*preset_configs)[start].tune_3partition_early_out_limit_factor;
529cc1dc7a3Sopenharmony_ci		config.tune_2plane_early_out_limit_correlation = (*preset_configs)[start].tune_2plane_early_out_limit_correlation;
530cc1dc7a3Sopenharmony_ci		config.tune_search_mode0_enable = (*preset_configs)[start].tune_search_mode0_enable;
531cc1dc7a3Sopenharmony_ci	}
532cc1dc7a3Sopenharmony_ci	// Start and end node are not the same - so interpolate between them
533cc1dc7a3Sopenharmony_ci	else
534cc1dc7a3Sopenharmony_ci	{
535cc1dc7a3Sopenharmony_ci		auto& node_a = (*preset_configs)[start];
536cc1dc7a3Sopenharmony_ci		auto& node_b = (*preset_configs)[end];
537cc1dc7a3Sopenharmony_ci
538cc1dc7a3Sopenharmony_ci		float wt_range = node_b.quality - node_a.quality;
539cc1dc7a3Sopenharmony_ci		assert(wt_range > 0);
540cc1dc7a3Sopenharmony_ci
541cc1dc7a3Sopenharmony_ci		// Compute interpolation factors
542cc1dc7a3Sopenharmony_ci		float wt_node_a = (node_b.quality - quality) / wt_range;
543cc1dc7a3Sopenharmony_ci		float wt_node_b = (quality - node_a.quality) / wt_range;
544cc1dc7a3Sopenharmony_ci
545cc1dc7a3Sopenharmony_ci		#define LERP(param) ((node_a.param * wt_node_a) + (node_b.param * wt_node_b))
546cc1dc7a3Sopenharmony_ci		#define LERPI(param) astc::flt2int_rtn(\
547cc1dc7a3Sopenharmony_ci		                         (static_cast<float>(node_a.param) * wt_node_a) + \
548cc1dc7a3Sopenharmony_ci		                         (static_cast<float>(node_b.param) * wt_node_b))
549cc1dc7a3Sopenharmony_ci		#define LERPUI(param) static_cast<unsigned int>(LERPI(param))
550cc1dc7a3Sopenharmony_ci
551cc1dc7a3Sopenharmony_ci		config.tune_partition_count_limit = LERPI(tune_partition_count_limit);
552cc1dc7a3Sopenharmony_ci		config.tune_2partition_index_limit = LERPI(tune_2partition_index_limit);
553cc1dc7a3Sopenharmony_ci		config.tune_3partition_index_limit = LERPI(tune_3partition_index_limit);
554cc1dc7a3Sopenharmony_ci		config.tune_4partition_index_limit = LERPI(tune_4partition_index_limit);
555cc1dc7a3Sopenharmony_ci		config.tune_block_mode_limit = LERPI(tune_block_mode_limit);
556cc1dc7a3Sopenharmony_ci		config.tune_refinement_limit = LERPI(tune_refinement_limit);
557cc1dc7a3Sopenharmony_ci		config.tune_candidate_limit = LERPUI(tune_candidate_limit);
558cc1dc7a3Sopenharmony_ci		config.tune_2partitioning_candidate_limit = LERPUI(tune_2partitioning_candidate_limit);
559cc1dc7a3Sopenharmony_ci		config.tune_3partitioning_candidate_limit = LERPUI(tune_3partitioning_candidate_limit);
560cc1dc7a3Sopenharmony_ci		config.tune_4partitioning_candidate_limit = LERPUI(tune_4partitioning_candidate_limit);
561cc1dc7a3Sopenharmony_ci		config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels,
562cc1dc7a3Sopenharmony_ci		                                 LERP(tune_db_limit_b_base) - 19 * ltexels);
563cc1dc7a3Sopenharmony_ci
564cc1dc7a3Sopenharmony_ci		config.tune_mse_overshoot = LERP(tune_mse_overshoot);
565cc1dc7a3Sopenharmony_ci
566cc1dc7a3Sopenharmony_ci		config.tune_2partition_early_out_limit_factor = LERP(tune_2partition_early_out_limit_factor);
567cc1dc7a3Sopenharmony_ci		config.tune_3partition_early_out_limit_factor = LERP(tune_3partition_early_out_limit_factor);
568cc1dc7a3Sopenharmony_ci		config.tune_2plane_early_out_limit_correlation = LERP(tune_2plane_early_out_limit_correlation);
569cc1dc7a3Sopenharmony_ci		config.tune_search_mode0_enable = LERP(tune_search_mode0_enable);
570cc1dc7a3Sopenharmony_ci		#undef LERP
571cc1dc7a3Sopenharmony_ci		#undef LERPI
572cc1dc7a3Sopenharmony_ci		#undef LERPUI
573cc1dc7a3Sopenharmony_ci	}
574cc1dc7a3Sopenharmony_ci
575cc1dc7a3Sopenharmony_ci	// Set heuristics to the defaults for each color profile
576cc1dc7a3Sopenharmony_ci	config.cw_r_weight = 1.0f;
577cc1dc7a3Sopenharmony_ci	config.cw_g_weight = 1.0f;
578cc1dc7a3Sopenharmony_ci	config.cw_b_weight = 1.0f;
579cc1dc7a3Sopenharmony_ci	config.cw_a_weight = 1.0f;
580cc1dc7a3Sopenharmony_ci
581cc1dc7a3Sopenharmony_ci	config.a_scale_radius = 0;
582cc1dc7a3Sopenharmony_ci
583cc1dc7a3Sopenharmony_ci	config.rgbm_m_scale = 0.0f;
584cc1dc7a3Sopenharmony_ci
585cc1dc7a3Sopenharmony_ci	config.profile = profile;
586cc1dc7a3Sopenharmony_ci
587cc1dc7a3Sopenharmony_ci	// Values in this enum are from an external user, so not guaranteed to be
588cc1dc7a3Sopenharmony_ci	// bounded to the enum values
589cc1dc7a3Sopenharmony_ci	switch (static_cast<int>(profile))
590cc1dc7a3Sopenharmony_ci	{
591cc1dc7a3Sopenharmony_ci	case ASTCENC_PRF_LDR:
592cc1dc7a3Sopenharmony_ci	case ASTCENC_PRF_LDR_SRGB:
593cc1dc7a3Sopenharmony_ci		break;
594cc1dc7a3Sopenharmony_ci	case ASTCENC_PRF_HDR_RGB_LDR_A:
595cc1dc7a3Sopenharmony_ci	case ASTCENC_PRF_HDR:
596cc1dc7a3Sopenharmony_ci		config.tune_db_limit = 999.0f;
597cc1dc7a3Sopenharmony_ci		config.tune_search_mode0_enable = 0.0f;
598cc1dc7a3Sopenharmony_ci		break;
599cc1dc7a3Sopenharmony_ci	default:
600cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_PROFILE;
601cc1dc7a3Sopenharmony_ci	}
602cc1dc7a3Sopenharmony_ci
603cc1dc7a3Sopenharmony_ci	// Flags field must not contain any unknown flag bits
604cc1dc7a3Sopenharmony_ci	status = validate_flags(profile, flags);
605cc1dc7a3Sopenharmony_ci	if (status != ASTCENC_SUCCESS)
606cc1dc7a3Sopenharmony_ci	{
607cc1dc7a3Sopenharmony_ci		return status;
608cc1dc7a3Sopenharmony_ci	}
609cc1dc7a3Sopenharmony_ci
610cc1dc7a3Sopenharmony_ci	if (flags & ASTCENC_FLG_MAP_NORMAL)
611cc1dc7a3Sopenharmony_ci	{
612cc1dc7a3Sopenharmony_ci		// Normal map encoding uses L+A blocks, so allow one more partitioning
613cc1dc7a3Sopenharmony_ci		// than normal. We need need fewer bits for endpoints, so more likely
614cc1dc7a3Sopenharmony_ci		// to be able to use more partitions than an RGB/RGBA block
615cc1dc7a3Sopenharmony_ci		config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1u, 4u);
616cc1dc7a3Sopenharmony_ci
617cc1dc7a3Sopenharmony_ci		config.cw_g_weight = 0.0f;
618cc1dc7a3Sopenharmony_ci		config.cw_b_weight = 0.0f;
619cc1dc7a3Sopenharmony_ci		config.tune_2partition_early_out_limit_factor *= 1.5f;
620cc1dc7a3Sopenharmony_ci		config.tune_3partition_early_out_limit_factor *= 1.5f;
621cc1dc7a3Sopenharmony_ci		config.tune_2plane_early_out_limit_correlation = 0.99f;
622cc1dc7a3Sopenharmony_ci
623cc1dc7a3Sopenharmony_ci		// Normals are prone to blocking artifacts on smooth curves
624cc1dc7a3Sopenharmony_ci		// so force compressor to try harder here ...
625cc1dc7a3Sopenharmony_ci		config.tune_db_limit *= 1.03f;
626cc1dc7a3Sopenharmony_ci	}
627cc1dc7a3Sopenharmony_ci	else if (flags & ASTCENC_FLG_MAP_RGBM)
628cc1dc7a3Sopenharmony_ci	{
629cc1dc7a3Sopenharmony_ci		config.rgbm_m_scale = 5.0f;
630cc1dc7a3Sopenharmony_ci		config.cw_a_weight = 2.0f * config.rgbm_m_scale;
631cc1dc7a3Sopenharmony_ci	}
632cc1dc7a3Sopenharmony_ci	else // (This is color data)
633cc1dc7a3Sopenharmony_ci	{
634cc1dc7a3Sopenharmony_ci		// This is a very basic perceptual metric for RGB color data, which weights error
635cc1dc7a3Sopenharmony_ci		// significance by the perceptual luminance contribution of each color channel. For
636cc1dc7a3Sopenharmony_ci		// luminance the usual weights to compute luminance from a linear RGB value are as
637cc1dc7a3Sopenharmony_ci		// follows:
638cc1dc7a3Sopenharmony_ci		//
639cc1dc7a3Sopenharmony_ci		//     l = r * 0.3 + g * 0.59 + b * 0.11
640cc1dc7a3Sopenharmony_ci		//
641cc1dc7a3Sopenharmony_ci		// ... but we scale these up to keep a better balance between color and alpha. Note
642cc1dc7a3Sopenharmony_ci		// that if the content is using alpha we'd recommend using the -a option to weight
643cc1dc7a3Sopenharmony_ci		// the color contribution by the alpha transparency.
644cc1dc7a3Sopenharmony_ci		if (flags & ASTCENC_FLG_USE_PERCEPTUAL)
645cc1dc7a3Sopenharmony_ci		{
646cc1dc7a3Sopenharmony_ci			config.cw_r_weight = 0.30f * 2.25f;
647cc1dc7a3Sopenharmony_ci			config.cw_g_weight = 0.59f * 2.25f;
648cc1dc7a3Sopenharmony_ci			config.cw_b_weight = 0.11f * 2.25f;
649cc1dc7a3Sopenharmony_ci		}
650cc1dc7a3Sopenharmony_ci	}
651cc1dc7a3Sopenharmony_ci	config.flags = flags;
652cc1dc7a3Sopenharmony_ci
653cc1dc7a3Sopenharmony_ci	return ASTCENC_SUCCESS;
654cc1dc7a3Sopenharmony_ci}
655cc1dc7a3Sopenharmony_ci
656cc1dc7a3Sopenharmony_ci/* See header for documentation. */
657cc1dc7a3Sopenharmony_ciastcenc_error astcenc_context_alloc(
658cc1dc7a3Sopenharmony_ci	const astcenc_config* configp,
659cc1dc7a3Sopenharmony_ci	unsigned int thread_count,
660cc1dc7a3Sopenharmony_ci	astcenc_context** context
661cc1dc7a3Sopenharmony_ci) {
662cc1dc7a3Sopenharmony_ci	astcenc_error status;
663cc1dc7a3Sopenharmony_ci	const astcenc_config& config = *configp;
664cc1dc7a3Sopenharmony_ci
665cc1dc7a3Sopenharmony_ci	status = validate_cpu_float();
666cc1dc7a3Sopenharmony_ci	if (status != ASTCENC_SUCCESS)
667cc1dc7a3Sopenharmony_ci	{
668cc1dc7a3Sopenharmony_ci		return status;
669cc1dc7a3Sopenharmony_ci	}
670cc1dc7a3Sopenharmony_ci
671cc1dc7a3Sopenharmony_ci	if (thread_count == 0)
672cc1dc7a3Sopenharmony_ci	{
673cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_PARAM;
674cc1dc7a3Sopenharmony_ci	}
675cc1dc7a3Sopenharmony_ci
676cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DIAGNOSTICS)
677cc1dc7a3Sopenharmony_ci	// Force single threaded compressor use in diagnostic mode.
678cc1dc7a3Sopenharmony_ci	if (thread_count != 1)
679cc1dc7a3Sopenharmony_ci	{
680cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_PARAM;
681cc1dc7a3Sopenharmony_ci	}
682cc1dc7a3Sopenharmony_ci#endif
683cc1dc7a3Sopenharmony_ci
684cc1dc7a3Sopenharmony_ci#ifndef ASTC_CUSTOMIZED_ENABLE
685cc1dc7a3Sopenharmony_ci	if (config.privateProfile == CUSTOMIZED_PROFILE)
686cc1dc7a3Sopenharmony_ci	{
687cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_PARAM;
688cc1dc7a3Sopenharmony_ci	}
689cc1dc7a3Sopenharmony_ci#endif
690cc1dc7a3Sopenharmony_ci
691cc1dc7a3Sopenharmony_ci	astcenc_context* ctxo = new astcenc_context;
692cc1dc7a3Sopenharmony_ci	astcenc_contexti* ctx = &ctxo->context;
693cc1dc7a3Sopenharmony_ci	ctx->thread_count = thread_count;
694cc1dc7a3Sopenharmony_ci	ctx->config = config;
695cc1dc7a3Sopenharmony_ci	ctx->working_buffers = nullptr;
696cc1dc7a3Sopenharmony_ci
697cc1dc7a3Sopenharmony_ci	// These are allocated per-compress, as they depend on image size
698cc1dc7a3Sopenharmony_ci	ctx->input_alpha_averages = nullptr;
699cc1dc7a3Sopenharmony_ci
700cc1dc7a3Sopenharmony_ci	// Copy the config first and validate the copy (we may modify it)
701cc1dc7a3Sopenharmony_ci	status = validate_config(ctx->config);
702cc1dc7a3Sopenharmony_ci	if (status != ASTCENC_SUCCESS)
703cc1dc7a3Sopenharmony_ci	{
704cc1dc7a3Sopenharmony_ci		delete ctxo;
705cc1dc7a3Sopenharmony_ci		return status;
706cc1dc7a3Sopenharmony_ci	}
707cc1dc7a3Sopenharmony_ci
708cc1dc7a3Sopenharmony_ci	ctx->bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN);
709cc1dc7a3Sopenharmony_ci	if (!ctx->bsd)
710cc1dc7a3Sopenharmony_ci	{
711cc1dc7a3Sopenharmony_ci		delete ctxo;
712cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_OUT_OF_MEM;
713cc1dc7a3Sopenharmony_ci	}
714cc1dc7a3Sopenharmony_ci
715cc1dc7a3Sopenharmony_ci	bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY);
716cc1dc7a3Sopenharmony_ci#ifdef ASTC_CUSTOMIZED_ENABLE
717cc1dc7a3Sopenharmony_ci	if (!init_block_size_descriptor(ctx->config.privateProfile, config.block_x, config.block_y, config.block_z,
718cc1dc7a3Sopenharmony_ci	                           can_omit_modes,
719cc1dc7a3Sopenharmony_ci	                           config.tune_partition_count_limit,
720cc1dc7a3Sopenharmony_ci	                           static_cast<float>(config.tune_block_mode_limit) / 100.0f,
721cc1dc7a3Sopenharmony_ci	                           *ctx->bsd))
722cc1dc7a3Sopenharmony_ci	{
723cc1dc7a3Sopenharmony_ci		aligned_free<block_size_descriptor>(ctx->bsd);
724cc1dc7a3Sopenharmony_ci		delete ctxo;
725cc1dc7a3Sopenharmony_ci		*context = nullptr;
726cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_DLOPEN_FAILED;
727cc1dc7a3Sopenharmony_ci	}
728cc1dc7a3Sopenharmony_ci#else
729cc1dc7a3Sopenharmony_ci	init_block_size_descriptor(ctx->config.privateProfile, config.block_x, config.block_y, config.block_z,
730cc1dc7a3Sopenharmony_ci	                           can_omit_modes,
731cc1dc7a3Sopenharmony_ci	                           config.tune_partition_count_limit,
732cc1dc7a3Sopenharmony_ci	                           static_cast<float>(config.tune_block_mode_limit) / 100.0f,
733cc1dc7a3Sopenharmony_ci	                           *ctx->bsd);
734cc1dc7a3Sopenharmony_ci#endif
735cc1dc7a3Sopenharmony_ci
736cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY)
737cc1dc7a3Sopenharmony_ci	// Do setup only needed by compression
738cc1dc7a3Sopenharmony_ci	if (!(ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY))
739cc1dc7a3Sopenharmony_ci	{
740cc1dc7a3Sopenharmony_ci		// Turn a dB limit into a per-texel error for faster use later
741cc1dc7a3Sopenharmony_ci		if ((ctx->config.profile == ASTCENC_PRF_LDR) || (ctx->config.profile == ASTCENC_PRF_LDR_SRGB))
742cc1dc7a3Sopenharmony_ci		{
743cc1dc7a3Sopenharmony_ci			ctx->config.tune_db_limit = astc::pow(0.1f, ctx->config.tune_db_limit * 0.1f) * 65535.0f * 65535.0f;
744cc1dc7a3Sopenharmony_ci		}
745cc1dc7a3Sopenharmony_ci		else
746cc1dc7a3Sopenharmony_ci		{
747cc1dc7a3Sopenharmony_ci			ctx->config.tune_db_limit = 0.0f;
748cc1dc7a3Sopenharmony_ci		}
749cc1dc7a3Sopenharmony_ci
750cc1dc7a3Sopenharmony_ci		size_t worksize = sizeof(compression_working_buffers) * thread_count;
751cc1dc7a3Sopenharmony_ci		ctx->working_buffers = aligned_malloc<compression_working_buffers>(worksize, ASTCENC_VECALIGN);
752cc1dc7a3Sopenharmony_ci		static_assert((ASTCENC_VECALIGN == 0) || ((sizeof(compression_working_buffers) % ASTCENC_VECALIGN) == 0),
753cc1dc7a3Sopenharmony_ci		              "compression_working_buffers size must be multiple of vector alignment");
754cc1dc7a3Sopenharmony_ci		if (!ctx->working_buffers)
755cc1dc7a3Sopenharmony_ci		{
756cc1dc7a3Sopenharmony_ci			aligned_free<block_size_descriptor>(ctx->bsd);
757cc1dc7a3Sopenharmony_ci			delete ctxo;
758cc1dc7a3Sopenharmony_ci			*context = nullptr;
759cc1dc7a3Sopenharmony_ci			return ASTCENC_ERR_OUT_OF_MEM;
760cc1dc7a3Sopenharmony_ci		}
761cc1dc7a3Sopenharmony_ci	}
762cc1dc7a3Sopenharmony_ci#endif
763cc1dc7a3Sopenharmony_ci
764cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DIAGNOSTICS)
765cc1dc7a3Sopenharmony_ci	ctx->trace_log = new TraceLog(ctx->config.trace_file_path);
766cc1dc7a3Sopenharmony_ci	if (!ctx->trace_log->m_file)
767cc1dc7a3Sopenharmony_ci	{
768cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_DTRACE_FAILURE;
769cc1dc7a3Sopenharmony_ci	}
770cc1dc7a3Sopenharmony_ci
771cc1dc7a3Sopenharmony_ci	trace_add_data("block_x", config.block_x);
772cc1dc7a3Sopenharmony_ci	trace_add_data("block_y", config.block_y);
773cc1dc7a3Sopenharmony_ci	trace_add_data("block_z", config.block_z);
774cc1dc7a3Sopenharmony_ci#endif
775cc1dc7a3Sopenharmony_ci
776cc1dc7a3Sopenharmony_ci	*context = ctxo;
777cc1dc7a3Sopenharmony_ci
778cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY)
779cc1dc7a3Sopenharmony_ci	prepare_angular_tables();
780cc1dc7a3Sopenharmony_ci#endif
781cc1dc7a3Sopenharmony_ci
782cc1dc7a3Sopenharmony_ci	return ASTCENC_SUCCESS;
783cc1dc7a3Sopenharmony_ci}
784cc1dc7a3Sopenharmony_ci
785cc1dc7a3Sopenharmony_ci/* See header dor documentation. */
786cc1dc7a3Sopenharmony_civoid astcenc_context_free(
787cc1dc7a3Sopenharmony_ci	astcenc_context* ctxo
788cc1dc7a3Sopenharmony_ci) {
789cc1dc7a3Sopenharmony_ci	if (ctxo)
790cc1dc7a3Sopenharmony_ci	{
791cc1dc7a3Sopenharmony_ci		astcenc_contexti* ctx = &ctxo->context;
792cc1dc7a3Sopenharmony_ci		if (ctx->working_buffers)
793cc1dc7a3Sopenharmony_ci		{
794cc1dc7a3Sopenharmony_ci			aligned_free<compression_working_buffers>(ctx->working_buffers);
795cc1dc7a3Sopenharmony_ci		}
796cc1dc7a3Sopenharmony_ci		else
797cc1dc7a3Sopenharmony_ci		{
798cc1dc7a3Sopenharmony_ci			printf("ctx->working_buffers is nullptr !!\n");
799cc1dc7a3Sopenharmony_ci		}
800cc1dc7a3Sopenharmony_ci		if (ctx->bsd)
801cc1dc7a3Sopenharmony_ci		{
802cc1dc7a3Sopenharmony_ci			aligned_free<block_size_descriptor>(ctx->bsd);
803cc1dc7a3Sopenharmony_ci		}
804cc1dc7a3Sopenharmony_ci		else
805cc1dc7a3Sopenharmony_ci		{
806cc1dc7a3Sopenharmony_ci			printf("ctx->bsd is nullptr !!\n");
807cc1dc7a3Sopenharmony_ci		}
808cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DIAGNOSTICS)
809cc1dc7a3Sopenharmony_ci		delete ctx->trace_log;
810cc1dc7a3Sopenharmony_ci#endif
811cc1dc7a3Sopenharmony_ci		delete ctxo;
812cc1dc7a3Sopenharmony_ci	}
813cc1dc7a3Sopenharmony_ci}
814cc1dc7a3Sopenharmony_ci
815cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY)
816cc1dc7a3Sopenharmony_ci
817cc1dc7a3Sopenharmony_ci/**
818cc1dc7a3Sopenharmony_ci * @brief Compress an image, after any preflight has completed.
819cc1dc7a3Sopenharmony_ci *
820cc1dc7a3Sopenharmony_ci * @param[out] ctxo           The compressor context.
821cc1dc7a3Sopenharmony_ci * @param      thread_index   The thread index.
822cc1dc7a3Sopenharmony_ci * @param      image          The intput image.
823cc1dc7a3Sopenharmony_ci * @param      swizzle        The input swizzle.
824cc1dc7a3Sopenharmony_ci * @param[out] buffer         The output array for the compressed data.
825cc1dc7a3Sopenharmony_ci */
826cc1dc7a3Sopenharmony_cistatic void compress_image(
827cc1dc7a3Sopenharmony_ci	astcenc_context& ctxo,
828cc1dc7a3Sopenharmony_ci	unsigned int thread_index,
829cc1dc7a3Sopenharmony_ci	const astcenc_image& image,
830cc1dc7a3Sopenharmony_ci	const astcenc_swizzle& swizzle,
831cc1dc7a3Sopenharmony_ci#if QUALITY_CONTROL
832cc1dc7a3Sopenharmony_ci	uint8_t* buffer,
833cc1dc7a3Sopenharmony_ci	bool calQualityEnable,
834cc1dc7a3Sopenharmony_ci	int32_t *mse[RGBA_COM]
835cc1dc7a3Sopenharmony_ci#else
836cc1dc7a3Sopenharmony_ci	uint8_t* buffer
837cc1dc7a3Sopenharmony_ci#endif
838cc1dc7a3Sopenharmony_ci) {
839cc1dc7a3Sopenharmony_ci	astcenc_contexti& ctx = ctxo.context;
840cc1dc7a3Sopenharmony_ci	const block_size_descriptor& bsd = *ctx.bsd;
841cc1dc7a3Sopenharmony_ci	astcenc_profile decode_mode = ctx.config.profile;
842cc1dc7a3Sopenharmony_ci
843cc1dc7a3Sopenharmony_ci	image_block blk;
844cc1dc7a3Sopenharmony_ci
845cc1dc7a3Sopenharmony_ci	int block_x = bsd.xdim;
846cc1dc7a3Sopenharmony_ci	int block_y = bsd.ydim;
847cc1dc7a3Sopenharmony_ci	int block_z = bsd.zdim;
848cc1dc7a3Sopenharmony_ci	blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
849cc1dc7a3Sopenharmony_ci
850cc1dc7a3Sopenharmony_ci	int dim_x = image.dim_x;
851cc1dc7a3Sopenharmony_ci	int dim_y = image.dim_y;
852cc1dc7a3Sopenharmony_ci	int dim_z = image.dim_z;
853cc1dc7a3Sopenharmony_ci
854cc1dc7a3Sopenharmony_ci	int xblocks = (dim_x + block_x - 1) / block_x;
855cc1dc7a3Sopenharmony_ci	int yblocks = (dim_y + block_y - 1) / block_y;
856cc1dc7a3Sopenharmony_ci	int zblocks = (dim_z + block_z - 1) / block_z;
857cc1dc7a3Sopenharmony_ci	int block_count = zblocks * yblocks * xblocks;
858cc1dc7a3Sopenharmony_ci
859cc1dc7a3Sopenharmony_ci	int row_blocks = xblocks;
860cc1dc7a3Sopenharmony_ci	int plane_blocks = xblocks * yblocks;
861cc1dc7a3Sopenharmony_ci
862cc1dc7a3Sopenharmony_ci	blk.decode_unorm8 = ctxo.context.config.flags & ASTCENC_FLG_USE_DECODE_UNORM8;
863cc1dc7a3Sopenharmony_ci
864cc1dc7a3Sopenharmony_ci	// Populate the block channel weights
865cc1dc7a3Sopenharmony_ci	blk.channel_weight = vfloat4(ctx.config.cw_r_weight,
866cc1dc7a3Sopenharmony_ci	                             ctx.config.cw_g_weight,
867cc1dc7a3Sopenharmony_ci	                             ctx.config.cw_b_weight,
868cc1dc7a3Sopenharmony_ci	                             ctx.config.cw_a_weight);
869cc1dc7a3Sopenharmony_ci
870cc1dc7a3Sopenharmony_ci	// Use preallocated scratch buffer
871cc1dc7a3Sopenharmony_ci	auto& temp_buffers = ctx.working_buffers[thread_index];
872cc1dc7a3Sopenharmony_ci
873cc1dc7a3Sopenharmony_ci	// Only the first thread actually runs the initializer
874cc1dc7a3Sopenharmony_ci	ctxo.manage_compress.init(block_count, ctx.config.progress_callback);
875cc1dc7a3Sopenharmony_ci
876cc1dc7a3Sopenharmony_ci	// Determine if we can use an optimized load function
877cc1dc7a3Sopenharmony_ci	bool needs_swz = (swizzle.r != ASTCENC_SWZ_R) || (swizzle.g != ASTCENC_SWZ_G) ||
878cc1dc7a3Sopenharmony_ci	                 (swizzle.b != ASTCENC_SWZ_B) || (swizzle.a != ASTCENC_SWZ_A);
879cc1dc7a3Sopenharmony_ci
880cc1dc7a3Sopenharmony_ci	bool needs_hdr = (decode_mode == ASTCENC_PRF_HDR) ||
881cc1dc7a3Sopenharmony_ci	                 (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A);
882cc1dc7a3Sopenharmony_ci
883cc1dc7a3Sopenharmony_ci	bool use_fast_load = !needs_swz && !needs_hdr &&
884cc1dc7a3Sopenharmony_ci	                     block_z == 1 && image.data_type == ASTCENC_TYPE_U8;
885cc1dc7a3Sopenharmony_ci
886cc1dc7a3Sopenharmony_ci	auto load_func = load_image_block;
887cc1dc7a3Sopenharmony_ci	if (use_fast_load)
888cc1dc7a3Sopenharmony_ci	{
889cc1dc7a3Sopenharmony_ci		load_func = load_image_block_fast_ldr;
890cc1dc7a3Sopenharmony_ci	}
891cc1dc7a3Sopenharmony_ci
892cc1dc7a3Sopenharmony_ci	// All threads run this processing loop until there is no work remaining
893cc1dc7a3Sopenharmony_ci	while (true)
894cc1dc7a3Sopenharmony_ci	{
895cc1dc7a3Sopenharmony_ci		unsigned int count;
896cc1dc7a3Sopenharmony_ci		unsigned int base = ctxo.manage_compress.get_task_assignment(16, count);
897cc1dc7a3Sopenharmony_ci		if (!count)
898cc1dc7a3Sopenharmony_ci		{
899cc1dc7a3Sopenharmony_ci			break;
900cc1dc7a3Sopenharmony_ci		}
901cc1dc7a3Sopenharmony_ci
902cc1dc7a3Sopenharmony_ci		for (unsigned int i = base; i < base + count; i++)
903cc1dc7a3Sopenharmony_ci		{
904cc1dc7a3Sopenharmony_ci			// Decode i into x, y, z block indices
905cc1dc7a3Sopenharmony_ci			int z = i / plane_blocks;
906cc1dc7a3Sopenharmony_ci			unsigned int rem = i - (z * plane_blocks);
907cc1dc7a3Sopenharmony_ci			int y = rem / row_blocks;
908cc1dc7a3Sopenharmony_ci			int x = rem - (y * row_blocks);
909cc1dc7a3Sopenharmony_ci
910cc1dc7a3Sopenharmony_ci			// Test if we can apply some basic alpha-scale RDO
911cc1dc7a3Sopenharmony_ci			bool use_full_block = true;
912cc1dc7a3Sopenharmony_ci			if (ctx.config.a_scale_radius != 0 && block_z == 1)
913cc1dc7a3Sopenharmony_ci			{
914cc1dc7a3Sopenharmony_ci				int start_x = x * block_x;
915cc1dc7a3Sopenharmony_ci				int end_x = astc::min(dim_x, start_x + block_x);
916cc1dc7a3Sopenharmony_ci
917cc1dc7a3Sopenharmony_ci				int start_y = y * block_y;
918cc1dc7a3Sopenharmony_ci				int end_y = astc::min(dim_y, start_y + block_y);
919cc1dc7a3Sopenharmony_ci
920cc1dc7a3Sopenharmony_ci				// SATs accumulate error, so don't test exactly zero. Test for
921cc1dc7a3Sopenharmony_ci				// less than 1 alpha in the expanded block footprint that
922cc1dc7a3Sopenharmony_ci				// includes the alpha radius.
923cc1dc7a3Sopenharmony_ci				int x_footprint = block_x + 2 * (ctx.config.a_scale_radius - 1);
924cc1dc7a3Sopenharmony_ci
925cc1dc7a3Sopenharmony_ci				int y_footprint = block_y + 2 * (ctx.config.a_scale_radius - 1);
926cc1dc7a3Sopenharmony_ci
927cc1dc7a3Sopenharmony_ci				float footprint = static_cast<float>(x_footprint * y_footprint);
928cc1dc7a3Sopenharmony_ci				float threshold = 0.9f / (255.0f * footprint);
929cc1dc7a3Sopenharmony_ci
930cc1dc7a3Sopenharmony_ci				// Do we have any alpha values?
931cc1dc7a3Sopenharmony_ci				use_full_block = false;
932cc1dc7a3Sopenharmony_ci				for (int ay = start_y; ay < end_y; ay++)
933cc1dc7a3Sopenharmony_ci				{
934cc1dc7a3Sopenharmony_ci					for (int ax = start_x; ax < end_x; ax++)
935cc1dc7a3Sopenharmony_ci					{
936cc1dc7a3Sopenharmony_ci						float a_avg = ctx.input_alpha_averages[ay * dim_x + ax];
937cc1dc7a3Sopenharmony_ci						if (a_avg > threshold)
938cc1dc7a3Sopenharmony_ci						{
939cc1dc7a3Sopenharmony_ci							use_full_block = true;
940cc1dc7a3Sopenharmony_ci							ax = end_x;
941cc1dc7a3Sopenharmony_ci							ay = end_y;
942cc1dc7a3Sopenharmony_ci						}
943cc1dc7a3Sopenharmony_ci					}
944cc1dc7a3Sopenharmony_ci				}
945cc1dc7a3Sopenharmony_ci			}
946cc1dc7a3Sopenharmony_ci
947cc1dc7a3Sopenharmony_ci			// Fetch the full block for compression
948cc1dc7a3Sopenharmony_ci			if (use_full_block)
949cc1dc7a3Sopenharmony_ci			{
950cc1dc7a3Sopenharmony_ci				load_func(decode_mode, image, blk, bsd, x * block_x, y * block_y, z * block_z, swizzle);
951cc1dc7a3Sopenharmony_ci
952cc1dc7a3Sopenharmony_ci				// Scale RGB error contribution by the maximum alpha in the block
953cc1dc7a3Sopenharmony_ci				// This encourages preserving alpha accuracy in regions with high
954cc1dc7a3Sopenharmony_ci				// transparency, and can buy up to 0.5 dB PSNR.
955cc1dc7a3Sopenharmony_ci				if (ctx.config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT)
956cc1dc7a3Sopenharmony_ci				{
957cc1dc7a3Sopenharmony_ci					float alpha_scale = blk.data_max.lane<3>() * (1.0f / 65535.0f);
958cc1dc7a3Sopenharmony_ci					blk.channel_weight = vfloat4(ctx.config.cw_r_weight * alpha_scale,
959cc1dc7a3Sopenharmony_ci					                             ctx.config.cw_g_weight * alpha_scale,
960cc1dc7a3Sopenharmony_ci					                             ctx.config.cw_b_weight * alpha_scale,
961cc1dc7a3Sopenharmony_ci					                             ctx.config.cw_a_weight);
962cc1dc7a3Sopenharmony_ci				}
963cc1dc7a3Sopenharmony_ci			}
964cc1dc7a3Sopenharmony_ci			// Apply alpha scale RDO - substitute constant color block
965cc1dc7a3Sopenharmony_ci			else
966cc1dc7a3Sopenharmony_ci			{
967cc1dc7a3Sopenharmony_ci				blk.origin_texel = vfloat4::zero();
968cc1dc7a3Sopenharmony_ci				blk.data_min = vfloat4::zero();
969cc1dc7a3Sopenharmony_ci				blk.data_mean = vfloat4::zero();
970cc1dc7a3Sopenharmony_ci				blk.data_max = vfloat4::zero();
971cc1dc7a3Sopenharmony_ci				blk.grayscale = true;
972cc1dc7a3Sopenharmony_ci			}
973cc1dc7a3Sopenharmony_ci
974cc1dc7a3Sopenharmony_ci			int offset = ((z * yblocks + y) * xblocks + x) * 16;
975cc1dc7a3Sopenharmony_ci			uint8_t *bp = buffer + offset;
976cc1dc7a3Sopenharmony_ci#if QUALITY_CONTROL
977cc1dc7a3Sopenharmony_ci			int32_t *mseBlock[RGBA_COM] = {nullptr, nullptr, nullptr, nullptr};
978cc1dc7a3Sopenharmony_ci			if (calQualityEnable) {
979cc1dc7a3Sopenharmony_ci				offset = (z * yblocks + y) * xblocks + x;
980cc1dc7a3Sopenharmony_ci				mseBlock[R_COM] = mse[R_COM] + offset;
981cc1dc7a3Sopenharmony_ci				mseBlock[G_COM] = mse[G_COM] + offset;
982cc1dc7a3Sopenharmony_ci				mseBlock[B_COM] = mse[B_COM] + offset;
983cc1dc7a3Sopenharmony_ci				mseBlock[A_COM] = mse[A_COM] + offset;
984cc1dc7a3Sopenharmony_ci			}
985cc1dc7a3Sopenharmony_ci			compress_block(ctx, blk, bp, temp_buffers, calQualityEnable, mseBlock);
986cc1dc7a3Sopenharmony_ci#else
987cc1dc7a3Sopenharmony_ci			compress_block(ctx, blk, bp, temp_buffers);
988cc1dc7a3Sopenharmony_ci#endif
989cc1dc7a3Sopenharmony_ci		}
990cc1dc7a3Sopenharmony_ci
991cc1dc7a3Sopenharmony_ci		ctxo.manage_compress.complete_task_assignment(count);
992cc1dc7a3Sopenharmony_ci	}
993cc1dc7a3Sopenharmony_ci}
994cc1dc7a3Sopenharmony_ci
995cc1dc7a3Sopenharmony_ci/**
996cc1dc7a3Sopenharmony_ci * @brief Compute regional averages in an image.
997cc1dc7a3Sopenharmony_ci *
998cc1dc7a3Sopenharmony_ci * This function can be called by multiple threads, but only after a single
999cc1dc7a3Sopenharmony_ci * thread calls the setup function @c init_compute_averages().
1000cc1dc7a3Sopenharmony_ci *
1001cc1dc7a3Sopenharmony_ci * Results are written back into @c img->input_alpha_averages.
1002cc1dc7a3Sopenharmony_ci *
1003cc1dc7a3Sopenharmony_ci * @param[out] ctx   The context.
1004cc1dc7a3Sopenharmony_ci * @param      ag    The average and variance arguments created during setup.
1005cc1dc7a3Sopenharmony_ci */
1006cc1dc7a3Sopenharmony_cistatic void compute_averages(
1007cc1dc7a3Sopenharmony_ci	astcenc_context& ctx,
1008cc1dc7a3Sopenharmony_ci	const avg_args &ag
1009cc1dc7a3Sopenharmony_ci) {
1010cc1dc7a3Sopenharmony_ci	pixel_region_args arg = ag.arg;
1011cc1dc7a3Sopenharmony_ci	arg.work_memory = new vfloat4[ag.work_memory_size];
1012cc1dc7a3Sopenharmony_ci
1013cc1dc7a3Sopenharmony_ci	int size_x = ag.img_size_x;
1014cc1dc7a3Sopenharmony_ci	int size_y = ag.img_size_y;
1015cc1dc7a3Sopenharmony_ci	int size_z = ag.img_size_z;
1016cc1dc7a3Sopenharmony_ci
1017cc1dc7a3Sopenharmony_ci	int step_xy = ag.blk_size_xy;
1018cc1dc7a3Sopenharmony_ci	int step_z = ag.blk_size_z;
1019cc1dc7a3Sopenharmony_ci
1020cc1dc7a3Sopenharmony_ci	int y_tasks = (size_y + step_xy - 1) / step_xy;
1021cc1dc7a3Sopenharmony_ci
1022cc1dc7a3Sopenharmony_ci	// All threads run this processing loop until there is no work remaining
1023cc1dc7a3Sopenharmony_ci	while (true)
1024cc1dc7a3Sopenharmony_ci	{
1025cc1dc7a3Sopenharmony_ci		unsigned int count;
1026cc1dc7a3Sopenharmony_ci		unsigned int base = ctx.manage_avg.get_task_assignment(16, count);
1027cc1dc7a3Sopenharmony_ci		if (!count)
1028cc1dc7a3Sopenharmony_ci		{
1029cc1dc7a3Sopenharmony_ci			break;
1030cc1dc7a3Sopenharmony_ci		}
1031cc1dc7a3Sopenharmony_ci
1032cc1dc7a3Sopenharmony_ci		for (unsigned int i = base; i < base + count; i++)
1033cc1dc7a3Sopenharmony_ci		{
1034cc1dc7a3Sopenharmony_ci			int z = (i / (y_tasks)) * step_z;
1035cc1dc7a3Sopenharmony_ci			int y = (i - (z * y_tasks)) * step_xy;
1036cc1dc7a3Sopenharmony_ci
1037cc1dc7a3Sopenharmony_ci			arg.size_z = astc::min(step_z, size_z - z);
1038cc1dc7a3Sopenharmony_ci			arg.offset_z = z;
1039cc1dc7a3Sopenharmony_ci
1040cc1dc7a3Sopenharmony_ci			arg.size_y = astc::min(step_xy, size_y - y);
1041cc1dc7a3Sopenharmony_ci			arg.offset_y = y;
1042cc1dc7a3Sopenharmony_ci
1043cc1dc7a3Sopenharmony_ci			for (int x = 0; x < size_x; x += step_xy)
1044cc1dc7a3Sopenharmony_ci			{
1045cc1dc7a3Sopenharmony_ci				arg.size_x = astc::min(step_xy, size_x - x);
1046cc1dc7a3Sopenharmony_ci				arg.offset_x = x;
1047cc1dc7a3Sopenharmony_ci				compute_pixel_region_variance(ctx.context, arg);
1048cc1dc7a3Sopenharmony_ci			}
1049cc1dc7a3Sopenharmony_ci		}
1050cc1dc7a3Sopenharmony_ci
1051cc1dc7a3Sopenharmony_ci		ctx.manage_avg.complete_task_assignment(count);
1052cc1dc7a3Sopenharmony_ci	}
1053cc1dc7a3Sopenharmony_ci
1054cc1dc7a3Sopenharmony_ci	delete[] arg.work_memory;
1055cc1dc7a3Sopenharmony_ci}
1056cc1dc7a3Sopenharmony_ci
1057cc1dc7a3Sopenharmony_ci#endif
1058cc1dc7a3Sopenharmony_ci
1059cc1dc7a3Sopenharmony_ci/* See header for documentation. */
1060cc1dc7a3Sopenharmony_ciastcenc_error astcenc_compress_image(
1061cc1dc7a3Sopenharmony_ci	astcenc_context* ctxo,
1062cc1dc7a3Sopenharmony_ci	astcenc_image* imagep,
1063cc1dc7a3Sopenharmony_ci	const astcenc_swizzle* swizzle,
1064cc1dc7a3Sopenharmony_ci	uint8_t* data_out,
1065cc1dc7a3Sopenharmony_ci	size_t data_len,
1066cc1dc7a3Sopenharmony_ci#if QUALITY_CONTROL
1067cc1dc7a3Sopenharmony_ci	bool calQualityEnable,
1068cc1dc7a3Sopenharmony_ci	int32_t *mse[RGBA_COM],
1069cc1dc7a3Sopenharmony_ci#endif
1070cc1dc7a3Sopenharmony_ci	unsigned int thread_index
1071cc1dc7a3Sopenharmony_ci) {
1072cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DECOMPRESS_ONLY)
1073cc1dc7a3Sopenharmony_ci	(void)ctxo;
1074cc1dc7a3Sopenharmony_ci	(void)imagep;
1075cc1dc7a3Sopenharmony_ci	(void)swizzle;
1076cc1dc7a3Sopenharmony_ci	(void)data_out;
1077cc1dc7a3Sopenharmony_ci	(void)data_len;
1078cc1dc7a3Sopenharmony_ci	(void)thread_index;
1079cc1dc7a3Sopenharmony_ci	return ASTCENC_ERR_BAD_CONTEXT;
1080cc1dc7a3Sopenharmony_ci#else
1081cc1dc7a3Sopenharmony_ci	astcenc_contexti* ctx = &ctxo->context;
1082cc1dc7a3Sopenharmony_ci	astcenc_error status;
1083cc1dc7a3Sopenharmony_ci	astcenc_image& image = *imagep;
1084cc1dc7a3Sopenharmony_ci
1085cc1dc7a3Sopenharmony_ci	if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
1086cc1dc7a3Sopenharmony_ci	{
1087cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_CONTEXT;
1088cc1dc7a3Sopenharmony_ci	}
1089cc1dc7a3Sopenharmony_ci
1090cc1dc7a3Sopenharmony_ci	status = validate_compression_swizzle(*swizzle);
1091cc1dc7a3Sopenharmony_ci	if (status != ASTCENC_SUCCESS)
1092cc1dc7a3Sopenharmony_ci	{
1093cc1dc7a3Sopenharmony_ci		return status;
1094cc1dc7a3Sopenharmony_ci	}
1095cc1dc7a3Sopenharmony_ci
1096cc1dc7a3Sopenharmony_ci	if (thread_index >= ctx->thread_count)
1097cc1dc7a3Sopenharmony_ci	{
1098cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_PARAM;
1099cc1dc7a3Sopenharmony_ci	}
1100cc1dc7a3Sopenharmony_ci
1101cc1dc7a3Sopenharmony_ci	unsigned int block_x = ctx->config.block_x;
1102cc1dc7a3Sopenharmony_ci	unsigned int block_y = ctx->config.block_y;
1103cc1dc7a3Sopenharmony_ci	unsigned int block_z = ctx->config.block_z;
1104cc1dc7a3Sopenharmony_ci
1105cc1dc7a3Sopenharmony_ci	unsigned int xblocks = (image.dim_x + block_x - 1) / block_x;
1106cc1dc7a3Sopenharmony_ci	unsigned int yblocks = (image.dim_y + block_y - 1) / block_y;
1107cc1dc7a3Sopenharmony_ci	unsigned int zblocks = (image.dim_z + block_z - 1) / block_z;
1108cc1dc7a3Sopenharmony_ci
1109cc1dc7a3Sopenharmony_ci	// Check we have enough output space (16 bytes per block)
1110cc1dc7a3Sopenharmony_ci	size_t size_needed = xblocks * yblocks * zblocks * 16;
1111cc1dc7a3Sopenharmony_ci	if (data_len < size_needed)
1112cc1dc7a3Sopenharmony_ci	{
1113cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_OUT_OF_MEM;
1114cc1dc7a3Sopenharmony_ci	}
1115cc1dc7a3Sopenharmony_ci
1116cc1dc7a3Sopenharmony_ci	// If context thread count is one then implicitly reset
1117cc1dc7a3Sopenharmony_ci	if (ctx->thread_count == 1)
1118cc1dc7a3Sopenharmony_ci	{
1119cc1dc7a3Sopenharmony_ci		astcenc_compress_reset(ctxo);
1120cc1dc7a3Sopenharmony_ci	}
1121cc1dc7a3Sopenharmony_ci
1122cc1dc7a3Sopenharmony_ci	if (ctx->config.a_scale_radius != 0)
1123cc1dc7a3Sopenharmony_ci	{
1124cc1dc7a3Sopenharmony_ci		// First thread to enter will do setup, other threads will subsequently
1125cc1dc7a3Sopenharmony_ci		// enter the critical section but simply skip over the initialization
1126cc1dc7a3Sopenharmony_ci		auto init_avg = [ctx, &image, swizzle]() {
1127cc1dc7a3Sopenharmony_ci			// Perform memory allocations for the destination buffers
1128cc1dc7a3Sopenharmony_ci			size_t texel_count = image.dim_x * image.dim_y * image.dim_z;
1129cc1dc7a3Sopenharmony_ci			ctx->input_alpha_averages = new float[texel_count];
1130cc1dc7a3Sopenharmony_ci
1131cc1dc7a3Sopenharmony_ci			return init_compute_averages(
1132cc1dc7a3Sopenharmony_ci				image, ctx->config.a_scale_radius, *swizzle,
1133cc1dc7a3Sopenharmony_ci				ctx->avg_preprocess_args);
1134cc1dc7a3Sopenharmony_ci		};
1135cc1dc7a3Sopenharmony_ci
1136cc1dc7a3Sopenharmony_ci		// Only the first thread actually runs the initializer
1137cc1dc7a3Sopenharmony_ci		ctxo->manage_avg.init(init_avg);
1138cc1dc7a3Sopenharmony_ci
1139cc1dc7a3Sopenharmony_ci		// All threads will enter this function and dynamically grab work
1140cc1dc7a3Sopenharmony_ci		compute_averages(*ctxo, ctx->avg_preprocess_args);
1141cc1dc7a3Sopenharmony_ci	}
1142cc1dc7a3Sopenharmony_ci
1143cc1dc7a3Sopenharmony_ci	// Wait for compute_averages to complete before compressing
1144cc1dc7a3Sopenharmony_ci	ctxo->manage_avg.wait();
1145cc1dc7a3Sopenharmony_ci#if QUALITY_CONTROL
1146cc1dc7a3Sopenharmony_ci	compress_image(*ctxo, thread_index, image, *swizzle, data_out, calQualityEnable, mse);
1147cc1dc7a3Sopenharmony_ci#else
1148cc1dc7a3Sopenharmony_ci	compress_image(*ctxo, thread_index, image, *swizzle, data_out);
1149cc1dc7a3Sopenharmony_ci#endif
1150cc1dc7a3Sopenharmony_ci	// Wait for compress to complete before freeing memory
1151cc1dc7a3Sopenharmony_ci	ctxo->manage_compress.wait();
1152cc1dc7a3Sopenharmony_ci
1153cc1dc7a3Sopenharmony_ci	auto term_compress = [ctx]() {
1154cc1dc7a3Sopenharmony_ci		delete[] ctx->input_alpha_averages;
1155cc1dc7a3Sopenharmony_ci		ctx->input_alpha_averages = nullptr;
1156cc1dc7a3Sopenharmony_ci	};
1157cc1dc7a3Sopenharmony_ci
1158cc1dc7a3Sopenharmony_ci	// Only the first thread to arrive actually runs the term
1159cc1dc7a3Sopenharmony_ci	ctxo->manage_compress.term(term_compress);
1160cc1dc7a3Sopenharmony_ci
1161cc1dc7a3Sopenharmony_ci	return ASTCENC_SUCCESS;
1162cc1dc7a3Sopenharmony_ci#endif
1163cc1dc7a3Sopenharmony_ci}
1164cc1dc7a3Sopenharmony_ci
1165cc1dc7a3Sopenharmony_ci/* See header for documentation. */
1166cc1dc7a3Sopenharmony_ciastcenc_error astcenc_compress_reset(
1167cc1dc7a3Sopenharmony_ci	astcenc_context* ctxo
1168cc1dc7a3Sopenharmony_ci) {
1169cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DECOMPRESS_ONLY)
1170cc1dc7a3Sopenharmony_ci	(void)ctxo;
1171cc1dc7a3Sopenharmony_ci	return ASTCENC_ERR_BAD_CONTEXT;
1172cc1dc7a3Sopenharmony_ci#else
1173cc1dc7a3Sopenharmony_ci	astcenc_contexti* ctx = &ctxo->context;
1174cc1dc7a3Sopenharmony_ci	if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
1175cc1dc7a3Sopenharmony_ci	{
1176cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_CONTEXT;
1177cc1dc7a3Sopenharmony_ci	}
1178cc1dc7a3Sopenharmony_ci
1179cc1dc7a3Sopenharmony_ci	ctxo->manage_avg.reset();
1180cc1dc7a3Sopenharmony_ci	ctxo->manage_compress.reset();
1181cc1dc7a3Sopenharmony_ci	return ASTCENC_SUCCESS;
1182cc1dc7a3Sopenharmony_ci#endif
1183cc1dc7a3Sopenharmony_ci}
1184cc1dc7a3Sopenharmony_ci
1185cc1dc7a3Sopenharmony_ci/* See header for documentation. */
1186cc1dc7a3Sopenharmony_ciastcenc_error astcenc_decompress_image(
1187cc1dc7a3Sopenharmony_ci	astcenc_context* ctxo,
1188cc1dc7a3Sopenharmony_ci	const uint8_t* data,
1189cc1dc7a3Sopenharmony_ci	size_t data_len,
1190cc1dc7a3Sopenharmony_ci	astcenc_image* image_outp,
1191cc1dc7a3Sopenharmony_ci	const astcenc_swizzle* swizzle,
1192cc1dc7a3Sopenharmony_ci	unsigned int thread_index
1193cc1dc7a3Sopenharmony_ci) {
1194cc1dc7a3Sopenharmony_ci	astcenc_error status;
1195cc1dc7a3Sopenharmony_ci	astcenc_image& image_out = *image_outp;
1196cc1dc7a3Sopenharmony_ci	astcenc_contexti* ctx = &ctxo->context;
1197cc1dc7a3Sopenharmony_ci
1198cc1dc7a3Sopenharmony_ci	// Today this doesn't matter (working set on stack) but might in future ...
1199cc1dc7a3Sopenharmony_ci	if (thread_index >= ctx->thread_count)
1200cc1dc7a3Sopenharmony_ci	{
1201cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_BAD_PARAM;
1202cc1dc7a3Sopenharmony_ci	}
1203cc1dc7a3Sopenharmony_ci
1204cc1dc7a3Sopenharmony_ci	status = validate_decompression_swizzle(*swizzle);
1205cc1dc7a3Sopenharmony_ci	if (status != ASTCENC_SUCCESS)
1206cc1dc7a3Sopenharmony_ci	{
1207cc1dc7a3Sopenharmony_ci		return status;
1208cc1dc7a3Sopenharmony_ci	}
1209cc1dc7a3Sopenharmony_ci
1210cc1dc7a3Sopenharmony_ci	unsigned int block_x = ctx->config.block_x;
1211cc1dc7a3Sopenharmony_ci	unsigned int block_y = ctx->config.block_y;
1212cc1dc7a3Sopenharmony_ci	unsigned int block_z = ctx->config.block_z;
1213cc1dc7a3Sopenharmony_ci
1214cc1dc7a3Sopenharmony_ci	unsigned int xblocks = (image_out.dim_x + block_x - 1) / block_x;
1215cc1dc7a3Sopenharmony_ci	unsigned int yblocks = (image_out.dim_y + block_y - 1) / block_y;
1216cc1dc7a3Sopenharmony_ci	unsigned int zblocks = (image_out.dim_z + block_z - 1) / block_z;
1217cc1dc7a3Sopenharmony_ci	unsigned int block_count = zblocks * yblocks * xblocks;
1218cc1dc7a3Sopenharmony_ci
1219cc1dc7a3Sopenharmony_ci	int row_blocks = xblocks;
1220cc1dc7a3Sopenharmony_ci	int plane_blocks = xblocks * yblocks;
1221cc1dc7a3Sopenharmony_ci
1222cc1dc7a3Sopenharmony_ci	// Check we have enough output space (16 bytes per block)
1223cc1dc7a3Sopenharmony_ci	size_t size_needed = xblocks * yblocks * zblocks * 16;
1224cc1dc7a3Sopenharmony_ci	if (data_len < size_needed)
1225cc1dc7a3Sopenharmony_ci	{
1226cc1dc7a3Sopenharmony_ci		return ASTCENC_ERR_OUT_OF_MEM;
1227cc1dc7a3Sopenharmony_ci	}
1228cc1dc7a3Sopenharmony_ci
1229cc1dc7a3Sopenharmony_ci	image_block blk;
1230cc1dc7a3Sopenharmony_ci	blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
1231cc1dc7a3Sopenharmony_ci
1232cc1dc7a3Sopenharmony_ci	// Decode mode inferred from the output data type
1233cc1dc7a3Sopenharmony_ci	blk.decode_unorm8 = image_out.data_type == ASTCENC_TYPE_U8;
1234cc1dc7a3Sopenharmony_ci
1235cc1dc7a3Sopenharmony_ci	// If context thread count is one then implicitly reset
1236cc1dc7a3Sopenharmony_ci	if (ctx->thread_count == 1)
1237cc1dc7a3Sopenharmony_ci	{
1238cc1dc7a3Sopenharmony_ci		astcenc_decompress_reset(ctxo);
1239cc1dc7a3Sopenharmony_ci	}
1240cc1dc7a3Sopenharmony_ci
1241cc1dc7a3Sopenharmony_ci	// Only the first thread actually runs the initializer
1242cc1dc7a3Sopenharmony_ci	ctxo->manage_decompress.init(block_count, nullptr);
1243cc1dc7a3Sopenharmony_ci
1244cc1dc7a3Sopenharmony_ci	// All threads run this processing loop until there is no work remaining
1245cc1dc7a3Sopenharmony_ci	while (true)
1246cc1dc7a3Sopenharmony_ci	{
1247cc1dc7a3Sopenharmony_ci		unsigned int count;
1248cc1dc7a3Sopenharmony_ci		unsigned int base = ctxo->manage_decompress.get_task_assignment(128, count);
1249cc1dc7a3Sopenharmony_ci		if (!count)
1250cc1dc7a3Sopenharmony_ci		{
1251cc1dc7a3Sopenharmony_ci			break;
1252cc1dc7a3Sopenharmony_ci		}
1253cc1dc7a3Sopenharmony_ci
1254cc1dc7a3Sopenharmony_ci		for (unsigned int i = base; i < base + count; i++)
1255cc1dc7a3Sopenharmony_ci		{
1256cc1dc7a3Sopenharmony_ci			// Decode i into x, y, z block indices
1257cc1dc7a3Sopenharmony_ci			int z = i / plane_blocks;
1258cc1dc7a3Sopenharmony_ci			unsigned int rem = i - (z * plane_blocks);
1259cc1dc7a3Sopenharmony_ci			int y = rem / row_blocks;
1260cc1dc7a3Sopenharmony_ci			int x = rem - (y * row_blocks);
1261cc1dc7a3Sopenharmony_ci
1262cc1dc7a3Sopenharmony_ci			unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16;
1263cc1dc7a3Sopenharmony_ci			const uint8_t* bp = data + offset;
1264cc1dc7a3Sopenharmony_ci
1265cc1dc7a3Sopenharmony_ci			symbolic_compressed_block scb;
1266cc1dc7a3Sopenharmony_ci
1267cc1dc7a3Sopenharmony_ci			physical_to_symbolic(*ctx->bsd, bp, scb);
1268cc1dc7a3Sopenharmony_ci
1269cc1dc7a3Sopenharmony_ci			decompress_symbolic_block(ctx->config.profile, *ctx->bsd,
1270cc1dc7a3Sopenharmony_ci			                          x * block_x, y * block_y, z * block_z,
1271cc1dc7a3Sopenharmony_ci			                          scb, blk);
1272cc1dc7a3Sopenharmony_ci
1273cc1dc7a3Sopenharmony_ci			store_image_block(image_out, blk, *ctx->bsd,
1274cc1dc7a3Sopenharmony_ci			                  x * block_x, y * block_y, z * block_z, *swizzle);
1275cc1dc7a3Sopenharmony_ci		}
1276cc1dc7a3Sopenharmony_ci
1277cc1dc7a3Sopenharmony_ci		ctxo->manage_decompress.complete_task_assignment(count);
1278cc1dc7a3Sopenharmony_ci	}
1279cc1dc7a3Sopenharmony_ci
1280cc1dc7a3Sopenharmony_ci	return ASTCENC_SUCCESS;
1281cc1dc7a3Sopenharmony_ci}
1282cc1dc7a3Sopenharmony_ci
1283cc1dc7a3Sopenharmony_ci/* See header for documentation. */
1284cc1dc7a3Sopenharmony_ciastcenc_error astcenc_decompress_reset(
1285cc1dc7a3Sopenharmony_ci	astcenc_context* ctxo
1286cc1dc7a3Sopenharmony_ci) {
1287cc1dc7a3Sopenharmony_ci	ctxo->manage_decompress.reset();
1288cc1dc7a3Sopenharmony_ci	return ASTCENC_SUCCESS;
1289cc1dc7a3Sopenharmony_ci}
1290cc1dc7a3Sopenharmony_ci
1291cc1dc7a3Sopenharmony_ci/* See header for documentation. */
1292cc1dc7a3Sopenharmony_ciastcenc_error astcenc_get_block_info(
1293cc1dc7a3Sopenharmony_ci	astcenc_context* ctxo,
1294cc1dc7a3Sopenharmony_ci	const uint8_t data[16],
1295cc1dc7a3Sopenharmony_ci	astcenc_block_info* info
1296cc1dc7a3Sopenharmony_ci) {
1297cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DECOMPRESS_ONLY)
1298cc1dc7a3Sopenharmony_ci	(void)ctxo;
1299cc1dc7a3Sopenharmony_ci	(void)data;
1300cc1dc7a3Sopenharmony_ci	(void)info;
1301cc1dc7a3Sopenharmony_ci	return ASTCENC_ERR_BAD_CONTEXT;
1302cc1dc7a3Sopenharmony_ci#else
1303cc1dc7a3Sopenharmony_ci	astcenc_contexti* ctx = &ctxo->context;
1304cc1dc7a3Sopenharmony_ci
1305cc1dc7a3Sopenharmony_ci	// Decode the compressed data into a symbolic form
1306cc1dc7a3Sopenharmony_ci	symbolic_compressed_block scb;
1307cc1dc7a3Sopenharmony_ci	physical_to_symbolic(*ctx->bsd, data, scb);
1308cc1dc7a3Sopenharmony_ci
1309cc1dc7a3Sopenharmony_ci	// Fetch the appropriate partition and decimation tables
1310cc1dc7a3Sopenharmony_ci	block_size_descriptor& bsd = *ctx->bsd;
1311cc1dc7a3Sopenharmony_ci
1312cc1dc7a3Sopenharmony_ci	// Start from a clean slate
1313cc1dc7a3Sopenharmony_ci	memset(info, 0, sizeof(*info));
1314cc1dc7a3Sopenharmony_ci
1315cc1dc7a3Sopenharmony_ci	// Basic info we can always populate
1316cc1dc7a3Sopenharmony_ci	info->profile = ctx->config.profile;
1317cc1dc7a3Sopenharmony_ci
1318cc1dc7a3Sopenharmony_ci	info->block_x = ctx->config.block_x;
1319cc1dc7a3Sopenharmony_ci	info->block_y = ctx->config.block_y;
1320cc1dc7a3Sopenharmony_ci	info->block_z = ctx->config.block_z;
1321cc1dc7a3Sopenharmony_ci	info->texel_count = bsd.texel_count;
1322cc1dc7a3Sopenharmony_ci
1323cc1dc7a3Sopenharmony_ci	// Check for error blocks first
1324cc1dc7a3Sopenharmony_ci	info->is_error_block = scb.block_type == SYM_BTYPE_ERROR;
1325cc1dc7a3Sopenharmony_ci	if (info->is_error_block)
1326cc1dc7a3Sopenharmony_ci	{
1327cc1dc7a3Sopenharmony_ci		return ASTCENC_SUCCESS;
1328cc1dc7a3Sopenharmony_ci	}
1329cc1dc7a3Sopenharmony_ci
1330cc1dc7a3Sopenharmony_ci	// Check for constant color blocks second
1331cc1dc7a3Sopenharmony_ci	info->is_constant_block = scb.block_type == SYM_BTYPE_CONST_F16 ||
1332cc1dc7a3Sopenharmony_ci	                          scb.block_type == SYM_BTYPE_CONST_U16;
1333cc1dc7a3Sopenharmony_ci	if (info->is_constant_block)
1334cc1dc7a3Sopenharmony_ci	{
1335cc1dc7a3Sopenharmony_ci		return ASTCENC_SUCCESS;
1336cc1dc7a3Sopenharmony_ci	}
1337cc1dc7a3Sopenharmony_ci
1338cc1dc7a3Sopenharmony_ci	// Otherwise handle a full block ; known to be valid after conditions above have been checked
1339cc1dc7a3Sopenharmony_ci	int partition_count = scb.partition_count;
1340cc1dc7a3Sopenharmony_ci	const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
1341cc1dc7a3Sopenharmony_ci
1342cc1dc7a3Sopenharmony_ci	const block_mode& bm = bsd.get_block_mode(scb.block_mode);
1343cc1dc7a3Sopenharmony_ci	const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
1344cc1dc7a3Sopenharmony_ci
1345cc1dc7a3Sopenharmony_ci	info->weight_x = di.weight_x;
1346cc1dc7a3Sopenharmony_ci	info->weight_y = di.weight_y;
1347cc1dc7a3Sopenharmony_ci	info->weight_z = di.weight_z;
1348cc1dc7a3Sopenharmony_ci
1349cc1dc7a3Sopenharmony_ci	info->is_dual_plane_block = bm.is_dual_plane != 0;
1350cc1dc7a3Sopenharmony_ci
1351cc1dc7a3Sopenharmony_ci	info->partition_count = scb.partition_count;
1352cc1dc7a3Sopenharmony_ci	info->partition_index = scb.partition_index;
1353cc1dc7a3Sopenharmony_ci	info->dual_plane_component = scb.plane2_component;
1354cc1dc7a3Sopenharmony_ci
1355cc1dc7a3Sopenharmony_ci	info->color_level_count = get_quant_level(scb.get_color_quant_mode());
1356cc1dc7a3Sopenharmony_ci	info->weight_level_count = get_quant_level(bm.get_weight_quant_mode());
1357cc1dc7a3Sopenharmony_ci
1358cc1dc7a3Sopenharmony_ci	// Unpack color endpoints for each active partition
1359cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < scb.partition_count; i++)
1360cc1dc7a3Sopenharmony_ci	{
1361cc1dc7a3Sopenharmony_ci		bool rgb_hdr;
1362cc1dc7a3Sopenharmony_ci		bool a_hdr;
1363cc1dc7a3Sopenharmony_ci		vint4 endpnt[2];
1364cc1dc7a3Sopenharmony_ci
1365cc1dc7a3Sopenharmony_ci		unpack_color_endpoints(ctx->config.profile,
1366cc1dc7a3Sopenharmony_ci		                       scb.color_formats[i],
1367cc1dc7a3Sopenharmony_ci		                       scb.color_values[i],
1368cc1dc7a3Sopenharmony_ci		                       rgb_hdr, a_hdr,
1369cc1dc7a3Sopenharmony_ci		                       endpnt[0], endpnt[1]);
1370cc1dc7a3Sopenharmony_ci
1371cc1dc7a3Sopenharmony_ci		// Store the color endpoint mode info
1372cc1dc7a3Sopenharmony_ci		info->color_endpoint_modes[i] = scb.color_formats[i];
1373cc1dc7a3Sopenharmony_ci		info->is_hdr_block = info->is_hdr_block || rgb_hdr || a_hdr;
1374cc1dc7a3Sopenharmony_ci
1375cc1dc7a3Sopenharmony_ci		// Store the unpacked and decoded color endpoint
1376cc1dc7a3Sopenharmony_ci		vmask4 hdr_mask(rgb_hdr, rgb_hdr, rgb_hdr, a_hdr);
1377cc1dc7a3Sopenharmony_ci		for (int j = 0; j < 2; j++)
1378cc1dc7a3Sopenharmony_ci		{
1379cc1dc7a3Sopenharmony_ci			vint4 color_lns = lns_to_sf16(endpnt[j]);
1380cc1dc7a3Sopenharmony_ci			vint4 color_unorm = unorm16_to_sf16(endpnt[j]);
1381cc1dc7a3Sopenharmony_ci			vint4 datai = select(color_unorm, color_lns, hdr_mask);
1382cc1dc7a3Sopenharmony_ci			store(float16_to_float(datai), info->color_endpoints[i][j]);
1383cc1dc7a3Sopenharmony_ci		}
1384cc1dc7a3Sopenharmony_ci	}
1385cc1dc7a3Sopenharmony_ci
1386cc1dc7a3Sopenharmony_ci	// Unpack weights for each texel
1387cc1dc7a3Sopenharmony_ci	int weight_plane1[BLOCK_MAX_TEXELS];
1388cc1dc7a3Sopenharmony_ci	int weight_plane2[BLOCK_MAX_TEXELS];
1389cc1dc7a3Sopenharmony_ci
1390cc1dc7a3Sopenharmony_ci	unpack_weights(bsd, scb, di, bm.is_dual_plane, weight_plane1, weight_plane2);
1391cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < bsd.texel_count; i++)
1392cc1dc7a3Sopenharmony_ci	{
1393cc1dc7a3Sopenharmony_ci		info->weight_values_plane1[i] = static_cast<float>(weight_plane1[i]) * (1.0f / WEIGHTS_TEXEL_SUM);
1394cc1dc7a3Sopenharmony_ci		if (info->is_dual_plane_block)
1395cc1dc7a3Sopenharmony_ci		{
1396cc1dc7a3Sopenharmony_ci			info->weight_values_plane2[i] = static_cast<float>(weight_plane2[i]) * (1.0f / WEIGHTS_TEXEL_SUM);
1397cc1dc7a3Sopenharmony_ci		}
1398cc1dc7a3Sopenharmony_ci	}
1399cc1dc7a3Sopenharmony_ci
1400cc1dc7a3Sopenharmony_ci	// Unpack partition assignments for each texel
1401cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < bsd.texel_count; i++)
1402cc1dc7a3Sopenharmony_ci	{
1403cc1dc7a3Sopenharmony_ci		info->partition_assignment[i] = pi.partition_of_texel[i];
1404cc1dc7a3Sopenharmony_ci	}
1405cc1dc7a3Sopenharmony_ci
1406cc1dc7a3Sopenharmony_ci	return ASTCENC_SUCCESS;
1407cc1dc7a3Sopenharmony_ci#endif
1408cc1dc7a3Sopenharmony_ci}
1409cc1dc7a3Sopenharmony_ci
1410cc1dc7a3Sopenharmony_ci/* See header for documentation. */
1411cc1dc7a3Sopenharmony_ciconst char* astcenc_get_error_string(
1412cc1dc7a3Sopenharmony_ci	astcenc_error status
1413cc1dc7a3Sopenharmony_ci) {
1414cc1dc7a3Sopenharmony_ci	// Values in this enum are from an external user, so not guaranteed to be
1415cc1dc7a3Sopenharmony_ci	// bounded to the enum values
1416cc1dc7a3Sopenharmony_ci	switch (static_cast<int>(status))
1417cc1dc7a3Sopenharmony_ci	{
1418cc1dc7a3Sopenharmony_ci	case ASTCENC_SUCCESS:
1419cc1dc7a3Sopenharmony_ci		return "ASTCENC_SUCCESS";
1420cc1dc7a3Sopenharmony_ci	case ASTCENC_ERR_OUT_OF_MEM:
1421cc1dc7a3Sopenharmony_ci		return "ASTCENC_ERR_OUT_OF_MEM";
1422cc1dc7a3Sopenharmony_ci	case ASTCENC_ERR_BAD_CPU_FLOAT:
1423cc1dc7a3Sopenharmony_ci		return "ASTCENC_ERR_BAD_CPU_FLOAT";
1424cc1dc7a3Sopenharmony_ci	case ASTCENC_ERR_BAD_PARAM:
1425cc1dc7a3Sopenharmony_ci		return "ASTCENC_ERR_BAD_PARAM";
1426cc1dc7a3Sopenharmony_ci	case ASTCENC_ERR_BAD_BLOCK_SIZE:
1427cc1dc7a3Sopenharmony_ci		return "ASTCENC_ERR_BAD_BLOCK_SIZE";
1428cc1dc7a3Sopenharmony_ci	case ASTCENC_ERR_BAD_PROFILE:
1429cc1dc7a3Sopenharmony_ci		return "ASTCENC_ERR_BAD_PROFILE";
1430cc1dc7a3Sopenharmony_ci	case ASTCENC_ERR_BAD_QUALITY:
1431cc1dc7a3Sopenharmony_ci		return "ASTCENC_ERR_BAD_QUALITY";
1432cc1dc7a3Sopenharmony_ci	case ASTCENC_ERR_BAD_FLAGS:
1433cc1dc7a3Sopenharmony_ci		return "ASTCENC_ERR_BAD_FLAGS";
1434cc1dc7a3Sopenharmony_ci	case ASTCENC_ERR_BAD_SWIZZLE:
1435cc1dc7a3Sopenharmony_ci		return "ASTCENC_ERR_BAD_SWIZZLE";
1436cc1dc7a3Sopenharmony_ci	case ASTCENC_ERR_BAD_CONTEXT:
1437cc1dc7a3Sopenharmony_ci		return "ASTCENC_ERR_BAD_CONTEXT";
1438cc1dc7a3Sopenharmony_ci	case ASTCENC_ERR_NOT_IMPLEMENTED:
1439cc1dc7a3Sopenharmony_ci		return "ASTCENC_ERR_NOT_IMPLEMENTED";
1440cc1dc7a3Sopenharmony_ci	case ASTCENC_ERR_BAD_DECODE_MODE:
1441cc1dc7a3Sopenharmony_ci		return "ASTCENC_ERR_BAD_DECODE_MODE";
1442cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DIAGNOSTICS)
1443cc1dc7a3Sopenharmony_ci	case ASTCENC_ERR_DTRACE_FAILURE:
1444cc1dc7a3Sopenharmony_ci		return "ASTCENC_ERR_DTRACE_FAILURE";
1445cc1dc7a3Sopenharmony_ci#endif
1446cc1dc7a3Sopenharmony_ci	default:
1447cc1dc7a3Sopenharmony_ci		return nullptr;
1448cc1dc7a3Sopenharmony_ci	}
1449cc1dc7a3Sopenharmony_ci}
1450