1cc1dc7a3Sopenharmony_ci// SPDX-License-Identifier: Apache-2.0
2cc1dc7a3Sopenharmony_ci// ----------------------------------------------------------------------------
3cc1dc7a3Sopenharmony_ci// Copyright 2011-2022 Arm Limited
4cc1dc7a3Sopenharmony_ci//
5cc1dc7a3Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6cc1dc7a3Sopenharmony_ci// use this file except in compliance with the License. You may obtain a copy
7cc1dc7a3Sopenharmony_ci// of the License at:
8cc1dc7a3Sopenharmony_ci//
9cc1dc7a3Sopenharmony_ci//     http://www.apache.org/licenses/LICENSE-2.0
10cc1dc7a3Sopenharmony_ci//
11cc1dc7a3Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software
12cc1dc7a3Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13cc1dc7a3Sopenharmony_ci// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14cc1dc7a3Sopenharmony_ci// License for the specific language governing permissions and limitations
15cc1dc7a3Sopenharmony_ci// under the License.
16cc1dc7a3Sopenharmony_ci// ----------------------------------------------------------------------------
17cc1dc7a3Sopenharmony_ci
18cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY)
19cc1dc7a3Sopenharmony_ci
20cc1dc7a3Sopenharmony_ci/**
21cc1dc7a3Sopenharmony_ci * @brief Functions for finding best endpoint format.
22cc1dc7a3Sopenharmony_ci *
23cc1dc7a3Sopenharmony_ci * We assume there are two independent sources of error in any given partition:
24cc1dc7a3Sopenharmony_ci *
25cc1dc7a3Sopenharmony_ci *   - Encoding choice errors
26cc1dc7a3Sopenharmony_ci *   - Quantization errors
27cc1dc7a3Sopenharmony_ci *
28cc1dc7a3Sopenharmony_ci * Encoding choice errors are caused by encoder decisions. For example:
29cc1dc7a3Sopenharmony_ci *
30cc1dc7a3Sopenharmony_ci *   - Using luminance instead of separate RGB components.
31cc1dc7a3Sopenharmony_ci *   - Using a constant 1.0 alpha instead of storing an alpha component.
32cc1dc7a3Sopenharmony_ci *   - Using RGB+scale instead of storing two full RGB endpoints.
33cc1dc7a3Sopenharmony_ci *
34cc1dc7a3Sopenharmony_ci * Quantization errors occur due to the limited precision we use for storage. These errors generally
35cc1dc7a3Sopenharmony_ci * scale with quantization level, but are not actually independent of color encoding. In particular:
36cc1dc7a3Sopenharmony_ci *
37cc1dc7a3Sopenharmony_ci *   - If we can use offset encoding then quantization error is halved.
38cc1dc7a3Sopenharmony_ci *   - If we can use blue-contraction then quantization error for RG is halved.
39cc1dc7a3Sopenharmony_ci *   - If we use HDR endpoints the quantization error is higher.
40cc1dc7a3Sopenharmony_ci *
41cc1dc7a3Sopenharmony_ci * Apart from these effects, we assume the error is proportional to the quantization step size.
42cc1dc7a3Sopenharmony_ci */
43cc1dc7a3Sopenharmony_ci
44cc1dc7a3Sopenharmony_ci
45cc1dc7a3Sopenharmony_ci#include "astcenc_internal.h"
46cc1dc7a3Sopenharmony_ci#include "astcenc_vecmathlib.h"
47cc1dc7a3Sopenharmony_ci
48cc1dc7a3Sopenharmony_ci#include <assert.h>
49cc1dc7a3Sopenharmony_ci
50cc1dc7a3Sopenharmony_ci/**
51cc1dc7a3Sopenharmony_ci * @brief Compute the errors of the endpoint line options for one partition.
52cc1dc7a3Sopenharmony_ci *
53cc1dc7a3Sopenharmony_ci * Uncorrelated data assumes storing completely independent RGBA channels for each endpoint. Same
54cc1dc7a3Sopenharmony_ci * chroma data assumes storing RGBA endpoints which pass though the origin (LDR only). RGBL data
55cc1dc7a3Sopenharmony_ci * assumes storing RGB + lumashift (HDR only). Luminance error assumes storing RGB channels as a
56cc1dc7a3Sopenharmony_ci * single value.
57cc1dc7a3Sopenharmony_ci *
58cc1dc7a3Sopenharmony_ci *
59cc1dc7a3Sopenharmony_ci * @param      pi                The partition info data.
60cc1dc7a3Sopenharmony_ci * @param      partition_index   The partition index to compule the error for.
61cc1dc7a3Sopenharmony_ci * @param      blk               The image block.
62cc1dc7a3Sopenharmony_ci * @param      uncor_pline       The endpoint line assuming uncorrelated endpoints.
63cc1dc7a3Sopenharmony_ci * @param[out] uncor_err         The computed error for the uncorrelated endpoint line.
64cc1dc7a3Sopenharmony_ci * @param      samec_pline       The endpoint line assuming the same chroma for both endpoints.
65cc1dc7a3Sopenharmony_ci * @param[out] samec_err         The computed error for the uncorrelated endpoint line.
66cc1dc7a3Sopenharmony_ci * @param      rgbl_pline        The endpoint line assuming RGB + lumashift data.
67cc1dc7a3Sopenharmony_ci * @param[out] rgbl_err          The computed error for the RGB + lumashift endpoint line.
68cc1dc7a3Sopenharmony_ci * @param      l_pline           The endpoint line assuming luminance data.
69cc1dc7a3Sopenharmony_ci * @param[out] l_err             The computed error for the luminance endpoint line.
70cc1dc7a3Sopenharmony_ci * @param[out] a_drop_err        The computed error for dropping the alpha component.
71cc1dc7a3Sopenharmony_ci */
72cc1dc7a3Sopenharmony_cistatic void compute_error_squared_rgb_single_partition(
73cc1dc7a3Sopenharmony_ci	const partition_info& pi,
74cc1dc7a3Sopenharmony_ci	int partition_index,
75cc1dc7a3Sopenharmony_ci	const image_block& blk,
76cc1dc7a3Sopenharmony_ci	const processed_line3& uncor_pline,
77cc1dc7a3Sopenharmony_ci	float& uncor_err,
78cc1dc7a3Sopenharmony_ci	const processed_line3& samec_pline,
79cc1dc7a3Sopenharmony_ci	float& samec_err,
80cc1dc7a3Sopenharmony_ci	const processed_line3& rgbl_pline,
81cc1dc7a3Sopenharmony_ci	float& rgbl_err,
82cc1dc7a3Sopenharmony_ci	const processed_line3& l_pline,
83cc1dc7a3Sopenharmony_ci	float& l_err,
84cc1dc7a3Sopenharmony_ci	float& a_drop_err
85cc1dc7a3Sopenharmony_ci) {
86cc1dc7a3Sopenharmony_ci	vfloat4 ews = blk.channel_weight;
87cc1dc7a3Sopenharmony_ci
88cc1dc7a3Sopenharmony_ci	unsigned int texel_count = pi.partition_texel_count[partition_index];
89cc1dc7a3Sopenharmony_ci	const uint8_t* texel_indexes = pi.texels_of_partition[partition_index];
90cc1dc7a3Sopenharmony_ci	promise(texel_count > 0);
91cc1dc7a3Sopenharmony_ci
92cc1dc7a3Sopenharmony_ci	vfloatacc a_drop_errv = vfloatacc::zero();
93cc1dc7a3Sopenharmony_ci	vfloat default_a(blk.get_default_alpha());
94cc1dc7a3Sopenharmony_ci
95cc1dc7a3Sopenharmony_ci	vfloatacc uncor_errv = vfloatacc::zero();
96cc1dc7a3Sopenharmony_ci	vfloat uncor_bs0(uncor_pline.bs.lane<0>());
97cc1dc7a3Sopenharmony_ci	vfloat uncor_bs1(uncor_pline.bs.lane<1>());
98cc1dc7a3Sopenharmony_ci	vfloat uncor_bs2(uncor_pline.bs.lane<2>());
99cc1dc7a3Sopenharmony_ci
100cc1dc7a3Sopenharmony_ci	vfloat uncor_amod0(uncor_pline.amod.lane<0>());
101cc1dc7a3Sopenharmony_ci	vfloat uncor_amod1(uncor_pline.amod.lane<1>());
102cc1dc7a3Sopenharmony_ci	vfloat uncor_amod2(uncor_pline.amod.lane<2>());
103cc1dc7a3Sopenharmony_ci
104cc1dc7a3Sopenharmony_ci	vfloatacc samec_errv = vfloatacc::zero();
105cc1dc7a3Sopenharmony_ci	vfloat samec_bs0(samec_pline.bs.lane<0>());
106cc1dc7a3Sopenharmony_ci	vfloat samec_bs1(samec_pline.bs.lane<1>());
107cc1dc7a3Sopenharmony_ci	vfloat samec_bs2(samec_pline.bs.lane<2>());
108cc1dc7a3Sopenharmony_ci
109cc1dc7a3Sopenharmony_ci	vfloatacc rgbl_errv = vfloatacc::zero();
110cc1dc7a3Sopenharmony_ci	vfloat rgbl_bs0(rgbl_pline.bs.lane<0>());
111cc1dc7a3Sopenharmony_ci	vfloat rgbl_bs1(rgbl_pline.bs.lane<1>());
112cc1dc7a3Sopenharmony_ci	vfloat rgbl_bs2(rgbl_pline.bs.lane<2>());
113cc1dc7a3Sopenharmony_ci
114cc1dc7a3Sopenharmony_ci	vfloat rgbl_amod0(rgbl_pline.amod.lane<0>());
115cc1dc7a3Sopenharmony_ci	vfloat rgbl_amod1(rgbl_pline.amod.lane<1>());
116cc1dc7a3Sopenharmony_ci	vfloat rgbl_amod2(rgbl_pline.amod.lane<2>());
117cc1dc7a3Sopenharmony_ci
118cc1dc7a3Sopenharmony_ci	vfloatacc l_errv = vfloatacc::zero();
119cc1dc7a3Sopenharmony_ci	vfloat l_bs0(l_pline.bs.lane<0>());
120cc1dc7a3Sopenharmony_ci	vfloat l_bs1(l_pline.bs.lane<1>());
121cc1dc7a3Sopenharmony_ci	vfloat l_bs2(l_pline.bs.lane<2>());
122cc1dc7a3Sopenharmony_ci
123cc1dc7a3Sopenharmony_ci	vfloat one_third(1/3.0f, 1/3.0f, 1/3.0f, 1/3.0f);
124cc1dc7a3Sopenharmony_ci	vfloat uncor_errv0 = vfloat::zero();
125cc1dc7a3Sopenharmony_ci	vfloat uncor_errv1 = vfloat::zero();
126cc1dc7a3Sopenharmony_ci	vfloat uncor_errv2 = vfloat::zero();
127cc1dc7a3Sopenharmony_ci	vfloat samec_errv0 = vfloat::zero();
128cc1dc7a3Sopenharmony_ci	vfloat samec_errv1 = vfloat::zero();
129cc1dc7a3Sopenharmony_ci	vfloat samec_errv2 = vfloat::zero();
130cc1dc7a3Sopenharmony_ci	vfloat rgbl_errv0 = vfloat::zero();
131cc1dc7a3Sopenharmony_ci	vfloat rgbl_errv1 = vfloat::zero();
132cc1dc7a3Sopenharmony_ci	vfloat rgbl_errv2 = vfloat::zero();
133cc1dc7a3Sopenharmony_ci	vfloat l_errv0 = vfloat::zero();
134cc1dc7a3Sopenharmony_ci	vfloat l_errv1 = vfloat::zero();
135cc1dc7a3Sopenharmony_ci	vfloat l_errv2 = vfloat::zero();
136cc1dc7a3Sopenharmony_ci
137cc1dc7a3Sopenharmony_ci	unsigned int i = 0;
138cc1dc7a3Sopenharmony_ci	for (; i + ASTCENC_SIMD_WIDTH <= texel_count; i += ASTCENC_SIMD_WIDTH)
139cc1dc7a3Sopenharmony_ci	{
140cc1dc7a3Sopenharmony_ci#ifdef ASTCENC_USE_COMMON_GATHERF
141cc1dc7a3Sopenharmony_ci		const uint8_t* tix = texel_indexes + i;
142cc1dc7a3Sopenharmony_ci#else
143cc1dc7a3Sopenharmony_ci		vint tix(texel_indexes + i);
144cc1dc7a3Sopenharmony_ci#endif
145cc1dc7a3Sopenharmony_ci
146cc1dc7a3Sopenharmony_ci		// Compute the error that arises from just ditching alpha
147cc1dc7a3Sopenharmony_ci		vfloat data_a = gatherf(blk.data_a, tix);
148cc1dc7a3Sopenharmony_ci		vfloat alpha_diff = data_a - default_a;
149cc1dc7a3Sopenharmony_ci		alpha_diff = alpha_diff * alpha_diff;
150cc1dc7a3Sopenharmony_ci
151cc1dc7a3Sopenharmony_ci		haccumulate(a_drop_errv, alpha_diff);
152cc1dc7a3Sopenharmony_ci
153cc1dc7a3Sopenharmony_ci		vfloat data_r = gatherf(blk.data_r, tix);
154cc1dc7a3Sopenharmony_ci		vfloat data_g = gatherf(blk.data_g, tix);
155cc1dc7a3Sopenharmony_ci		vfloat data_b = gatherf(blk.data_b, tix);
156cc1dc7a3Sopenharmony_ci
157cc1dc7a3Sopenharmony_ci		vfloat data_rgb_avg = (data_r + data_g + data_b) * one_third;
158cc1dc7a3Sopenharmony_ci		vfloat data_rgb_0 = data_rgb_avg - data_r;
159cc1dc7a3Sopenharmony_ci		vfloat data_rgb_1 = data_rgb_avg - data_g;
160cc1dc7a3Sopenharmony_ci		vfloat data_rgb_2 = data_rgb_avg - data_b;
161cc1dc7a3Sopenharmony_ci
162cc1dc7a3Sopenharmony_ci		// Compute uncorrelated error
163cc1dc7a3Sopenharmony_ci		vfloat param = data_r * uncor_bs0
164cc1dc7a3Sopenharmony_ci		             + data_g * uncor_bs1
165cc1dc7a3Sopenharmony_ci		             + data_b * uncor_bs2;
166cc1dc7a3Sopenharmony_ci
167cc1dc7a3Sopenharmony_ci		vfloat dist0 = (uncor_amod0 + param * uncor_bs0) - data_r;
168cc1dc7a3Sopenharmony_ci		vfloat dist1 = (uncor_amod1 + param * uncor_bs1) - data_g;
169cc1dc7a3Sopenharmony_ci		vfloat dist2 = (uncor_amod2 + param * uncor_bs2) - data_b;
170cc1dc7a3Sopenharmony_ci
171cc1dc7a3Sopenharmony_ci		haccumulate(uncor_errv0, dist0 * dist0);
172cc1dc7a3Sopenharmony_ci		haccumulate(uncor_errv1, dist1 * dist1);
173cc1dc7a3Sopenharmony_ci		haccumulate(uncor_errv2, dist2 * dist2);
174cc1dc7a3Sopenharmony_ci
175cc1dc7a3Sopenharmony_ci		// Compute same chroma error - no "amod", its always zero
176cc1dc7a3Sopenharmony_ci		param = data_r * samec_bs0
177cc1dc7a3Sopenharmony_ci		      + data_g * samec_bs1
178cc1dc7a3Sopenharmony_ci		      + data_b * samec_bs2;
179cc1dc7a3Sopenharmony_ci
180cc1dc7a3Sopenharmony_ci		dist0 = (param * samec_bs0) - data_r;
181cc1dc7a3Sopenharmony_ci		dist1 = (param * samec_bs1) - data_g;
182cc1dc7a3Sopenharmony_ci		dist2 = (param * samec_bs2) - data_b;
183cc1dc7a3Sopenharmony_ci
184cc1dc7a3Sopenharmony_ci		haccumulate(uncor_errv0, dist0 * dist0);
185cc1dc7a3Sopenharmony_ci		haccumulate(uncor_errv1, dist1 * dist1);
186cc1dc7a3Sopenharmony_ci		haccumulate(uncor_errv2, dist2 * dist2);
187cc1dc7a3Sopenharmony_ci
188cc1dc7a3Sopenharmony_ci		// Compute rgbl error
189cc1dc7a3Sopenharmony_ci		dist0 = rgbl_amod0 + data_rgb_0;
190cc1dc7a3Sopenharmony_ci		dist1 = rgbl_amod1 + data_rgb_1;
191cc1dc7a3Sopenharmony_ci		dist2 = rgbl_amod2 + data_rgb_2;
192cc1dc7a3Sopenharmony_ci
193cc1dc7a3Sopenharmony_ci		haccumulate(rgbl_errv0, dist0 * dist0);
194cc1dc7a3Sopenharmony_ci		haccumulate(rgbl_errv1, dist1 * dist1);
195cc1dc7a3Sopenharmony_ci		haccumulate(rgbl_errv2, dist2 * dist2);
196cc1dc7a3Sopenharmony_ci
197cc1dc7a3Sopenharmony_ci		// Compute luma error - no "amod", its always zero
198cc1dc7a3Sopenharmony_ci		dist0 = data_rgb_0;
199cc1dc7a3Sopenharmony_ci		dist1 = data_rgb_1;
200cc1dc7a3Sopenharmony_ci		dist2 = data_rgb_2;
201cc1dc7a3Sopenharmony_ci
202cc1dc7a3Sopenharmony_ci		haccumulate(l_errv0, dist0 * dist0);
203cc1dc7a3Sopenharmony_ci		haccumulate(l_errv1, dist1 * dist1);
204cc1dc7a3Sopenharmony_ci		haccumulate(l_errv2, dist2 * dist2);
205cc1dc7a3Sopenharmony_ci	}
206cc1dc7a3Sopenharmony_ci
207cc1dc7a3Sopenharmony_ci	uncor_errv = uncor_errv0 * ews.lane<0>() + uncor_errv1 * ews.lane<1>() + uncor_errv2 * ews.lane<2>(); // channel 0,1,2
208cc1dc7a3Sopenharmony_ci	samec_errv = samec_errv0 * ews.lane<0>() + samec_errv1 * ews.lane<1>() + samec_errv2 * ews.lane<2>(); // channel 0,1,2
209cc1dc7a3Sopenharmony_ci	rgbl_errv = rgbl_errv0 * ews.lane<0>() + rgbl_errv1 * ews.lane<1>() + rgbl_errv2 * ews.lane<2>(); // channel 0,1,2
210cc1dc7a3Sopenharmony_ci	l_errv = l_errv0 * ews.lane<0>() + l_errv1 * ews.lane<1>() + l_errv2 * ews.lane<2>(); // channel 0,1,2
211cc1dc7a3Sopenharmony_ci
212cc1dc7a3Sopenharmony_ci	if (i < texel_count)
213cc1dc7a3Sopenharmony_ci	{
214cc1dc7a3Sopenharmony_ci		vint lane_ids = vint::lane_id() + i;
215cc1dc7a3Sopenharmony_ci		vint tix(texel_indexes + i);
216cc1dc7a3Sopenharmony_ci
217cc1dc7a3Sopenharmony_ci		vmask mask = lane_ids < vint(texel_count);
218cc1dc7a3Sopenharmony_ci		lane_ids += vint(ASTCENC_SIMD_WIDTH);
219cc1dc7a3Sopenharmony_ci
220cc1dc7a3Sopenharmony_ci		// Compute the error that arises from just ditching alpha
221cc1dc7a3Sopenharmony_ci		vfloat data_a = gatherf(blk.data_a, tix);
222cc1dc7a3Sopenharmony_ci		vfloat alpha_diff = data_a - default_a;
223cc1dc7a3Sopenharmony_ci		alpha_diff = alpha_diff * alpha_diff;
224cc1dc7a3Sopenharmony_ci
225cc1dc7a3Sopenharmony_ci		haccumulate(a_drop_errv, alpha_diff, mask);
226cc1dc7a3Sopenharmony_ci
227cc1dc7a3Sopenharmony_ci		vfloat data_r = gatherf(blk.data_r, tix);
228cc1dc7a3Sopenharmony_ci		vfloat data_g = gatherf(blk.data_g, tix);
229cc1dc7a3Sopenharmony_ci		vfloat data_b = gatherf(blk.data_b, tix);
230cc1dc7a3Sopenharmony_ci
231cc1dc7a3Sopenharmony_ci		vfloat data_rgb_avg = (data_r + data_g + data_b) * one_third;
232cc1dc7a3Sopenharmony_ci		vfloat data_rgb_0 = data_rgb_avg - data_r;
233cc1dc7a3Sopenharmony_ci		vfloat data_rgb_1 = data_rgb_avg - data_g;
234cc1dc7a3Sopenharmony_ci		vfloat data_rgb_2 = data_rgb_avg - data_b;
235cc1dc7a3Sopenharmony_ci
236cc1dc7a3Sopenharmony_ci		// Compute uncorrelated error
237cc1dc7a3Sopenharmony_ci		vfloat param = data_r * uncor_bs0
238cc1dc7a3Sopenharmony_ci		             + data_g * uncor_bs1
239cc1dc7a3Sopenharmony_ci		             + data_b * uncor_bs2;
240cc1dc7a3Sopenharmony_ci
241cc1dc7a3Sopenharmony_ci		vfloat dist0 = (uncor_amod0 + param * uncor_bs0) - data_r;
242cc1dc7a3Sopenharmony_ci		vfloat dist1 = (uncor_amod1 + param * uncor_bs1) - data_g;
243cc1dc7a3Sopenharmony_ci		vfloat dist2 = (uncor_amod2 + param * uncor_bs2) - data_b;
244cc1dc7a3Sopenharmony_ci
245cc1dc7a3Sopenharmony_ci		vfloat error = dist0 * dist0 * ews.lane<0>()
246cc1dc7a3Sopenharmony_ci		             + dist1 * dist1 * ews.lane<1>()
247cc1dc7a3Sopenharmony_ci		             + dist2 * dist2 * ews.lane<2>();
248cc1dc7a3Sopenharmony_ci
249cc1dc7a3Sopenharmony_ci		haccumulate(uncor_errv, error, mask);
250cc1dc7a3Sopenharmony_ci
251cc1dc7a3Sopenharmony_ci		// Compute same chroma error - no "amod", its always zero
252cc1dc7a3Sopenharmony_ci		param = data_r * samec_bs0
253cc1dc7a3Sopenharmony_ci		      + data_g * samec_bs1
254cc1dc7a3Sopenharmony_ci		      + data_b * samec_bs2;
255cc1dc7a3Sopenharmony_ci
256cc1dc7a3Sopenharmony_ci		dist0 = (param * samec_bs0) - data_r;
257cc1dc7a3Sopenharmony_ci		dist1 = (param * samec_bs1) - data_g;
258cc1dc7a3Sopenharmony_ci		dist2 = (param * samec_bs2) - data_b;
259cc1dc7a3Sopenharmony_ci
260cc1dc7a3Sopenharmony_ci		error = dist0 * dist0 * ews.lane<0>()
261cc1dc7a3Sopenharmony_ci		      + dist1 * dist1 * ews.lane<1>()
262cc1dc7a3Sopenharmony_ci		      + dist2 * dist2 * ews.lane<2>();
263cc1dc7a3Sopenharmony_ci
264cc1dc7a3Sopenharmony_ci		haccumulate(samec_errv, error, mask);
265cc1dc7a3Sopenharmony_ci
266cc1dc7a3Sopenharmony_ci		// Compute rgbl error
267cc1dc7a3Sopenharmony_ci		dist0 = rgbl_amod0 + data_rgb_0;
268cc1dc7a3Sopenharmony_ci		dist1 = rgbl_amod1 + data_rgb_1;
269cc1dc7a3Sopenharmony_ci		dist2 = rgbl_amod2 + data_rgb_2;
270cc1dc7a3Sopenharmony_ci
271cc1dc7a3Sopenharmony_ci		error = dist0 * dist0 * ews.lane<0>()
272cc1dc7a3Sopenharmony_ci		      + dist1 * dist1 * ews.lane<1>()
273cc1dc7a3Sopenharmony_ci		      + dist2 * dist2 * ews.lane<2>();
274cc1dc7a3Sopenharmony_ci
275cc1dc7a3Sopenharmony_ci		haccumulate(rgbl_errv, error, mask);
276cc1dc7a3Sopenharmony_ci
277cc1dc7a3Sopenharmony_ci		// Compute luma error - no "amod", its always zero
278cc1dc7a3Sopenharmony_ci		dist0 = data_rgb_0;
279cc1dc7a3Sopenharmony_ci		dist1 = data_rgb_1;
280cc1dc7a3Sopenharmony_ci		dist2 = data_rgb_2;
281cc1dc7a3Sopenharmony_ci
282cc1dc7a3Sopenharmony_ci		error = dist0 * dist0 * ews.lane<0>()
283cc1dc7a3Sopenharmony_ci		      + dist1 * dist1 * ews.lane<1>()
284cc1dc7a3Sopenharmony_ci		      + dist2 * dist2 * ews.lane<2>();
285cc1dc7a3Sopenharmony_ci
286cc1dc7a3Sopenharmony_ci		haccumulate(l_errv, error, mask);
287cc1dc7a3Sopenharmony_ci	}
288cc1dc7a3Sopenharmony_ci
289cc1dc7a3Sopenharmony_ci	a_drop_err = hadd_s(a_drop_errv) * ews.lane<3>();
290cc1dc7a3Sopenharmony_ci	uncor_err = hadd_s(uncor_errv);
291cc1dc7a3Sopenharmony_ci	samec_err = hadd_s(samec_errv);
292cc1dc7a3Sopenharmony_ci	rgbl_err = hadd_s(rgbl_errv);
293cc1dc7a3Sopenharmony_ci	l_err = hadd_s(l_errv);
294cc1dc7a3Sopenharmony_ci}
295cc1dc7a3Sopenharmony_ci
296cc1dc7a3Sopenharmony_ci/**
297cc1dc7a3Sopenharmony_ci * @brief For a given set of input colors and partitioning determine endpoint encode errors.
298cc1dc7a3Sopenharmony_ci *
299cc1dc7a3Sopenharmony_ci * This function determines the color error that results from RGB-scale encoding (LDR only),
300cc1dc7a3Sopenharmony_ci * RGB-lumashift encoding (HDR only), luminance-encoding, and alpha drop. Also determines whether
301cc1dc7a3Sopenharmony_ci * the endpoints are eligible for offset encoding or blue-contraction
302cc1dc7a3Sopenharmony_ci *
303cc1dc7a3Sopenharmony_ci * @param      blk   The image block.
304cc1dc7a3Sopenharmony_ci * @param      pi    The partition info data.
305cc1dc7a3Sopenharmony_ci * @param      ep    The idealized endpoints.
306cc1dc7a3Sopenharmony_ci * @param[out] eci   The resulting encoding choice error metrics.
307cc1dc7a3Sopenharmony_ci  */
308cc1dc7a3Sopenharmony_cistatic void compute_encoding_choice_errors(
309cc1dc7a3Sopenharmony_ci	QualityProfile privateProfile,
310cc1dc7a3Sopenharmony_ci	const image_block& blk,
311cc1dc7a3Sopenharmony_ci	const partition_info& pi,
312cc1dc7a3Sopenharmony_ci	const endpoints& ep,
313cc1dc7a3Sopenharmony_ci	encoding_choice_errors eci[BLOCK_MAX_PARTITIONS])
314cc1dc7a3Sopenharmony_ci{
315cc1dc7a3Sopenharmony_ci	int partition_count = pi.partition_count;
316cc1dc7a3Sopenharmony_ci	promise(partition_count > 0);
317cc1dc7a3Sopenharmony_ci
318cc1dc7a3Sopenharmony_ci	partition_metrics *pms = reinterpret_cast<partition_metrics *>(&blk.pms[0]);
319cc1dc7a3Sopenharmony_ci
320cc1dc7a3Sopenharmony_ci	if (!blk.is_constant_channel(3) || (partition_count != 1 && privateProfile == HIGH_QUALITY_PROFILE))
321cc1dc7a3Sopenharmony_ci	{
322cc1dc7a3Sopenharmony_ci		compute_avgs_and_dirs_3_comp_rgb(pi, blk, pms);
323cc1dc7a3Sopenharmony_ci	}
324cc1dc7a3Sopenharmony_ci
325cc1dc7a3Sopenharmony_ci	for (int i = 0; i < partition_count; i++)
326cc1dc7a3Sopenharmony_ci	{
327cc1dc7a3Sopenharmony_ci		partition_metrics& pm = pms[i];
328cc1dc7a3Sopenharmony_ci
329cc1dc7a3Sopenharmony_ci		line3 uncor_rgb_lines;
330cc1dc7a3Sopenharmony_ci		line3 samec_rgb_lines;  // for LDR-RGB-scale
331cc1dc7a3Sopenharmony_ci		line3 rgb_luma_lines;   // for HDR-RGB-scale
332cc1dc7a3Sopenharmony_ci
333cc1dc7a3Sopenharmony_ci		processed_line3 uncor_rgb_plines;
334cc1dc7a3Sopenharmony_ci		processed_line3 samec_rgb_plines;
335cc1dc7a3Sopenharmony_ci		processed_line3 rgb_luma_plines;
336cc1dc7a3Sopenharmony_ci		processed_line3 luminance_plines;
337cc1dc7a3Sopenharmony_ci
338cc1dc7a3Sopenharmony_ci		float uncorr_rgb_error;
339cc1dc7a3Sopenharmony_ci		float samechroma_rgb_error;
340cc1dc7a3Sopenharmony_ci		float rgb_luma_error;
341cc1dc7a3Sopenharmony_ci		float luminance_rgb_error;
342cc1dc7a3Sopenharmony_ci		float alpha_drop_error;
343cc1dc7a3Sopenharmony_ci
344cc1dc7a3Sopenharmony_ci		uncor_rgb_lines.a = pm.avg;
345cc1dc7a3Sopenharmony_ci		uncor_rgb_lines.b = normalize_safe(pm.dir, unit3());
346cc1dc7a3Sopenharmony_ci
347cc1dc7a3Sopenharmony_ci		samec_rgb_lines.a = vfloat4::zero();
348cc1dc7a3Sopenharmony_ci		samec_rgb_lines.b = normalize_safe(pm.avg, unit3());
349cc1dc7a3Sopenharmony_ci
350cc1dc7a3Sopenharmony_ci		rgb_luma_lines.a = pm.avg;
351cc1dc7a3Sopenharmony_ci		rgb_luma_lines.b = unit3();
352cc1dc7a3Sopenharmony_ci
353cc1dc7a3Sopenharmony_ci		uncor_rgb_plines.amod = uncor_rgb_lines.a - uncor_rgb_lines.b * dot3(uncor_rgb_lines.a, uncor_rgb_lines.b);
354cc1dc7a3Sopenharmony_ci		uncor_rgb_plines.bs   = uncor_rgb_lines.b;
355cc1dc7a3Sopenharmony_ci
356cc1dc7a3Sopenharmony_ci		// Same chroma always goes though zero, so this is simpler than the others
357cc1dc7a3Sopenharmony_ci		samec_rgb_plines.amod = vfloat4::zero();
358cc1dc7a3Sopenharmony_ci		samec_rgb_plines.bs   = samec_rgb_lines.b;
359cc1dc7a3Sopenharmony_ci
360cc1dc7a3Sopenharmony_ci		rgb_luma_plines.amod = rgb_luma_lines.a - rgb_luma_lines.b * dot3(rgb_luma_lines.a, rgb_luma_lines.b);
361cc1dc7a3Sopenharmony_ci		rgb_luma_plines.bs   = rgb_luma_lines.b;
362cc1dc7a3Sopenharmony_ci
363cc1dc7a3Sopenharmony_ci		// Luminance always goes though zero, so this is simpler than the others
364cc1dc7a3Sopenharmony_ci		luminance_plines.amod = vfloat4::zero();
365cc1dc7a3Sopenharmony_ci		luminance_plines.bs   = unit3();
366cc1dc7a3Sopenharmony_ci
367cc1dc7a3Sopenharmony_ci		compute_error_squared_rgb_single_partition(
368cc1dc7a3Sopenharmony_ci		    pi, i, blk,
369cc1dc7a3Sopenharmony_ci		    uncor_rgb_plines, uncorr_rgb_error,
370cc1dc7a3Sopenharmony_ci		    samec_rgb_plines, samechroma_rgb_error,
371cc1dc7a3Sopenharmony_ci		    rgb_luma_plines,  rgb_luma_error,
372cc1dc7a3Sopenharmony_ci		    luminance_plines, luminance_rgb_error,
373cc1dc7a3Sopenharmony_ci		                      alpha_drop_error);
374cc1dc7a3Sopenharmony_ci
375cc1dc7a3Sopenharmony_ci		// Determine if we can offset encode RGB lanes
376cc1dc7a3Sopenharmony_ci		vfloat4 endpt0 = ep.endpt0[i];
377cc1dc7a3Sopenharmony_ci		vfloat4 endpt1 = ep.endpt1[i];
378cc1dc7a3Sopenharmony_ci		vfloat4 endpt_diff = abs(endpt1 - endpt0);
379cc1dc7a3Sopenharmony_ci		vmask4 endpt_can_offset = endpt_diff < vfloat4(0.12f * 65535.0f);
380cc1dc7a3Sopenharmony_ci		bool can_offset_encode = (mask(endpt_can_offset) & 0x7) == 0x7;
381cc1dc7a3Sopenharmony_ci
382cc1dc7a3Sopenharmony_ci		// Store out the settings
383cc1dc7a3Sopenharmony_ci		eci[i].rgb_scale_error = (samechroma_rgb_error - uncorr_rgb_error) * 0.7f;  // empirical
384cc1dc7a3Sopenharmony_ci		eci[i].rgb_luma_error  = (rgb_luma_error - uncorr_rgb_error) * 1.5f;        // wild guess
385cc1dc7a3Sopenharmony_ci		eci[i].luminance_error = (luminance_rgb_error - uncorr_rgb_error) * 3.0f;   // empirical
386cc1dc7a3Sopenharmony_ci		eci[i].alpha_drop_error = alpha_drop_error * 3.0f;
387cc1dc7a3Sopenharmony_ci		eci[i].can_offset_encode = can_offset_encode;
388cc1dc7a3Sopenharmony_ci		eci[i].can_blue_contract = !blk.is_luminance();
389cc1dc7a3Sopenharmony_ci	}
390cc1dc7a3Sopenharmony_ci}
391cc1dc7a3Sopenharmony_ci
392cc1dc7a3Sopenharmony_ci/**
393cc1dc7a3Sopenharmony_ci * @brief For a given partition compute the error for every endpoint integer count and quant level.
394cc1dc7a3Sopenharmony_ci *
395cc1dc7a3Sopenharmony_ci * @param      encode_hdr_rgb     @c true if using HDR for RGB, @c false for LDR.
396cc1dc7a3Sopenharmony_ci * @param      encode_hdr_alpha   @c true if using HDR for alpha, @c false for LDR.
397cc1dc7a3Sopenharmony_ci * @param      partition_index    The partition index.
398cc1dc7a3Sopenharmony_ci * @param      pi                 The partition info.
399cc1dc7a3Sopenharmony_ci * @param      eci                The encoding choice error metrics.
400cc1dc7a3Sopenharmony_ci * @param      ep                 The idealized endpoints.
401cc1dc7a3Sopenharmony_ci * @param      error_weight       The resulting encoding choice error metrics.
402cc1dc7a3Sopenharmony_ci * @param[out] best_error         The best error for each integer count and quant level.
403cc1dc7a3Sopenharmony_ci * @param[out] format_of_choice   The preferred endpoint format for each integer count and quant level.
404cc1dc7a3Sopenharmony_ci */
405cc1dc7a3Sopenharmony_cistatic void compute_color_error_for_every_integer_count_and_quant_level(
406cc1dc7a3Sopenharmony_ci	bool encode_hdr_rgb,
407cc1dc7a3Sopenharmony_ci	bool encode_hdr_alpha,
408cc1dc7a3Sopenharmony_ci	int partition_index,
409cc1dc7a3Sopenharmony_ci	const partition_info& pi,
410cc1dc7a3Sopenharmony_ci	const encoding_choice_errors& eci,
411cc1dc7a3Sopenharmony_ci	const endpoints& ep,
412cc1dc7a3Sopenharmony_ci	vfloat4 error_weight,
413cc1dc7a3Sopenharmony_ci	float best_error[21][4],
414cc1dc7a3Sopenharmony_ci	uint8_t format_of_choice[21][4]
415cc1dc7a3Sopenharmony_ci) {
416cc1dc7a3Sopenharmony_ci	int partition_size = pi.partition_texel_count[partition_index];
417cc1dc7a3Sopenharmony_ci
418cc1dc7a3Sopenharmony_ci	static const float baseline_quant_error[21 - QUANT_6] {
419cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (5 * 5),
420cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (7 * 7),
421cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (9 * 9),
422cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (11 * 11),
423cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (15 * 15),
424cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (19 * 19),
425cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (23 * 23),
426cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (31 * 31),
427cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (39 * 39),
428cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (47 * 47),
429cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (63 * 63),
430cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (79 * 79),
431cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (95 * 95),
432cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (127 * 127),
433cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (159 * 159),
434cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (191 * 191),
435cc1dc7a3Sopenharmony_ci		(65536.0f * 65536.0f / 18.0f) / (255 * 255)
436cc1dc7a3Sopenharmony_ci	};
437cc1dc7a3Sopenharmony_ci
438cc1dc7a3Sopenharmony_ci	vfloat4 ep0 = ep.endpt0[partition_index];
439cc1dc7a3Sopenharmony_ci	vfloat4 ep1 = ep.endpt1[partition_index];
440cc1dc7a3Sopenharmony_ci
441cc1dc7a3Sopenharmony_ci	float ep1_min = hmin_rgb_s(ep1);
442cc1dc7a3Sopenharmony_ci	ep1_min = astc::max(ep1_min, 0.0f);
443cc1dc7a3Sopenharmony_ci
444cc1dc7a3Sopenharmony_ci	float error_weight_rgbsum = hadd_rgb_s(error_weight);
445cc1dc7a3Sopenharmony_ci
446cc1dc7a3Sopenharmony_ci	float range_upper_limit_rgb = encode_hdr_rgb ? 61440.0f : 65535.0f;
447cc1dc7a3Sopenharmony_ci	float range_upper_limit_alpha = encode_hdr_alpha ? 61440.0f : 65535.0f;
448cc1dc7a3Sopenharmony_ci
449cc1dc7a3Sopenharmony_ci	// It is possible to get endpoint colors significantly outside [0,upper-limit] even if the
450cc1dc7a3Sopenharmony_ci	// input data are safely contained in [0,upper-limit]; we need to add an error term for this
451cc1dc7a3Sopenharmony_ci	vfloat4 offset(range_upper_limit_rgb, range_upper_limit_rgb, range_upper_limit_rgb, range_upper_limit_alpha);
452cc1dc7a3Sopenharmony_ci	vfloat4 ep0_range_error_high = max(ep0 - offset, 0.0f);
453cc1dc7a3Sopenharmony_ci	vfloat4 ep1_range_error_high = max(ep1 - offset, 0.0f);
454cc1dc7a3Sopenharmony_ci
455cc1dc7a3Sopenharmony_ci	vfloat4 ep0_range_error_low = min(ep0, 0.0f);
456cc1dc7a3Sopenharmony_ci	vfloat4 ep1_range_error_low = min(ep1, 0.0f);
457cc1dc7a3Sopenharmony_ci
458cc1dc7a3Sopenharmony_ci	vfloat4 sum_range_error =
459cc1dc7a3Sopenharmony_ci		(ep0_range_error_low * ep0_range_error_low) +
460cc1dc7a3Sopenharmony_ci		(ep1_range_error_low * ep1_range_error_low) +
461cc1dc7a3Sopenharmony_ci		(ep0_range_error_high * ep0_range_error_high) +
462cc1dc7a3Sopenharmony_ci		(ep1_range_error_high * ep1_range_error_high);
463cc1dc7a3Sopenharmony_ci
464cc1dc7a3Sopenharmony_ci	float rgb_range_error = dot3_s(sum_range_error, error_weight)
465cc1dc7a3Sopenharmony_ci	                      * 0.5f * static_cast<float>(partition_size);
466cc1dc7a3Sopenharmony_ci	float alpha_range_error = sum_range_error.lane<3>() * error_weight.lane<3>()
467cc1dc7a3Sopenharmony_ci	                        * 0.5f * static_cast<float>(partition_size);
468cc1dc7a3Sopenharmony_ci
469cc1dc7a3Sopenharmony_ci	if (encode_hdr_rgb)
470cc1dc7a3Sopenharmony_ci	{
471cc1dc7a3Sopenharmony_ci
472cc1dc7a3Sopenharmony_ci		// Collect some statistics
473cc1dc7a3Sopenharmony_ci		float af, cf;
474cc1dc7a3Sopenharmony_ci		if (ep1.lane<0>() > ep1.lane<1>() && ep1.lane<0>() > ep1.lane<2>())
475cc1dc7a3Sopenharmony_ci		{
476cc1dc7a3Sopenharmony_ci			af = ep1.lane<0>();
477cc1dc7a3Sopenharmony_ci			cf = ep1.lane<0>() - ep0.lane<0>();
478cc1dc7a3Sopenharmony_ci		}
479cc1dc7a3Sopenharmony_ci		else if (ep1.lane<1>() > ep1.lane<2>())
480cc1dc7a3Sopenharmony_ci		{
481cc1dc7a3Sopenharmony_ci			af = ep1.lane<1>();
482cc1dc7a3Sopenharmony_ci			cf = ep1.lane<1>() - ep0.lane<1>();
483cc1dc7a3Sopenharmony_ci		}
484cc1dc7a3Sopenharmony_ci		else
485cc1dc7a3Sopenharmony_ci		{
486cc1dc7a3Sopenharmony_ci			af = ep1.lane<2>();
487cc1dc7a3Sopenharmony_ci			cf = ep1.lane<2>() - ep0.lane<2>();
488cc1dc7a3Sopenharmony_ci		}
489cc1dc7a3Sopenharmony_ci
490cc1dc7a3Sopenharmony_ci		// Estimate of color-component spread in high endpoint color
491cc1dc7a3Sopenharmony_ci		float bf = af - ep1_min;
492cc1dc7a3Sopenharmony_ci		vfloat4 prd = (ep1 - vfloat4(cf)).swz<0, 1, 2>();
493cc1dc7a3Sopenharmony_ci		vfloat4 pdif = prd - ep0.swz<0, 1, 2>();
494cc1dc7a3Sopenharmony_ci		// Estimate of color-component spread in low endpoint color
495cc1dc7a3Sopenharmony_ci		float df = hmax_s(abs(pdif));
496cc1dc7a3Sopenharmony_ci
497cc1dc7a3Sopenharmony_ci		int b = static_cast<int>(bf);
498cc1dc7a3Sopenharmony_ci		int c = static_cast<int>(cf);
499cc1dc7a3Sopenharmony_ci		int d = static_cast<int>(df);
500cc1dc7a3Sopenharmony_ci
501cc1dc7a3Sopenharmony_ci		// Determine which one of the 6 submodes is likely to be used in case of an RGBO-mode
502cc1dc7a3Sopenharmony_ci		int rgbo_mode = 5;		// 7 bits per component
503cc1dc7a3Sopenharmony_ci		// mode 4: 8 7 6
504cc1dc7a3Sopenharmony_ci		if (b < 32768 && c < 16384)
505cc1dc7a3Sopenharmony_ci		{
506cc1dc7a3Sopenharmony_ci			rgbo_mode = 4;
507cc1dc7a3Sopenharmony_ci		}
508cc1dc7a3Sopenharmony_ci
509cc1dc7a3Sopenharmony_ci		// mode 3: 9 6 7
510cc1dc7a3Sopenharmony_ci		if (b < 8192 && c < 16384)
511cc1dc7a3Sopenharmony_ci		{
512cc1dc7a3Sopenharmony_ci			rgbo_mode = 3;
513cc1dc7a3Sopenharmony_ci		}
514cc1dc7a3Sopenharmony_ci
515cc1dc7a3Sopenharmony_ci		// mode 2: 10 5 8
516cc1dc7a3Sopenharmony_ci		if (b < 2048 && c < 16384)
517cc1dc7a3Sopenharmony_ci		{
518cc1dc7a3Sopenharmony_ci			rgbo_mode = 2;
519cc1dc7a3Sopenharmony_ci		}
520cc1dc7a3Sopenharmony_ci
521cc1dc7a3Sopenharmony_ci		// mode 1: 11 6 5
522cc1dc7a3Sopenharmony_ci		if (b < 2048 && c < 1024)
523cc1dc7a3Sopenharmony_ci		{
524cc1dc7a3Sopenharmony_ci			rgbo_mode = 1;
525cc1dc7a3Sopenharmony_ci		}
526cc1dc7a3Sopenharmony_ci
527cc1dc7a3Sopenharmony_ci		// mode 0: 11 5 7
528cc1dc7a3Sopenharmony_ci		if (b < 1024 && c < 4096)
529cc1dc7a3Sopenharmony_ci		{
530cc1dc7a3Sopenharmony_ci			rgbo_mode = 0;
531cc1dc7a3Sopenharmony_ci		}
532cc1dc7a3Sopenharmony_ci
533cc1dc7a3Sopenharmony_ci		// Determine which one of the 9 submodes is likely to be used in case of an RGB-mode.
534cc1dc7a3Sopenharmony_ci		int rgb_mode = 8;		// 8 bits per component, except 7 bits for blue
535cc1dc7a3Sopenharmony_ci
536cc1dc7a3Sopenharmony_ci		// mode 0: 9 7 6 7
537cc1dc7a3Sopenharmony_ci		if (b < 16384 && c < 8192 && d < 8192)
538cc1dc7a3Sopenharmony_ci		{
539cc1dc7a3Sopenharmony_ci			rgb_mode = 0;
540cc1dc7a3Sopenharmony_ci		}
541cc1dc7a3Sopenharmony_ci
542cc1dc7a3Sopenharmony_ci		// mode 1: 9 8 6 6
543cc1dc7a3Sopenharmony_ci		if (b < 32768 && c < 8192 && d < 4096)
544cc1dc7a3Sopenharmony_ci		{
545cc1dc7a3Sopenharmony_ci			rgb_mode = 1;
546cc1dc7a3Sopenharmony_ci		}
547cc1dc7a3Sopenharmony_ci
548cc1dc7a3Sopenharmony_ci		// mode 2: 10 6 7 7
549cc1dc7a3Sopenharmony_ci		if (b < 4096 && c < 8192 && d < 4096)
550cc1dc7a3Sopenharmony_ci		{
551cc1dc7a3Sopenharmony_ci			rgb_mode = 2;
552cc1dc7a3Sopenharmony_ci		}
553cc1dc7a3Sopenharmony_ci
554cc1dc7a3Sopenharmony_ci		// mode 3: 10 7 7 6
555cc1dc7a3Sopenharmony_ci		if (b < 8192 && c < 8192 && d < 2048)
556cc1dc7a3Sopenharmony_ci		{
557cc1dc7a3Sopenharmony_ci			rgb_mode = 3;
558cc1dc7a3Sopenharmony_ci		}
559cc1dc7a3Sopenharmony_ci
560cc1dc7a3Sopenharmony_ci		// mode 4: 11 8 6 5
561cc1dc7a3Sopenharmony_ci		if (b < 8192 && c < 2048 && d < 512)
562cc1dc7a3Sopenharmony_ci		{
563cc1dc7a3Sopenharmony_ci			rgb_mode = 4;
564cc1dc7a3Sopenharmony_ci		}
565cc1dc7a3Sopenharmony_ci
566cc1dc7a3Sopenharmony_ci		// mode 5: 11 6 8 6
567cc1dc7a3Sopenharmony_ci		if (b < 2048 && c < 8192 && d < 1024)
568cc1dc7a3Sopenharmony_ci		{
569cc1dc7a3Sopenharmony_ci			rgb_mode = 5;
570cc1dc7a3Sopenharmony_ci		}
571cc1dc7a3Sopenharmony_ci
572cc1dc7a3Sopenharmony_ci		// mode 6: 12 7 7 5
573cc1dc7a3Sopenharmony_ci		if (b < 2048 && c < 2048 && d < 256)
574cc1dc7a3Sopenharmony_ci		{
575cc1dc7a3Sopenharmony_ci			rgb_mode = 6;
576cc1dc7a3Sopenharmony_ci		}
577cc1dc7a3Sopenharmony_ci
578cc1dc7a3Sopenharmony_ci		// mode 7: 12 6 7 6
579cc1dc7a3Sopenharmony_ci		if (b < 1024 && c < 2048 && d < 512)
580cc1dc7a3Sopenharmony_ci		{
581cc1dc7a3Sopenharmony_ci			rgb_mode = 7;
582cc1dc7a3Sopenharmony_ci		}
583cc1dc7a3Sopenharmony_ci
584cc1dc7a3Sopenharmony_ci		static const float rgbo_error_scales[6] { 4.0f, 4.0f, 16.0f, 64.0f, 256.0f, 1024.0f };
585cc1dc7a3Sopenharmony_ci		static const float rgb_error_scales[9] { 64.0f, 64.0f, 16.0f, 16.0f, 4.0f, 4.0f, 1.0f, 1.0f, 384.0f };
586cc1dc7a3Sopenharmony_ci
587cc1dc7a3Sopenharmony_ci		float mode7mult = rgbo_error_scales[rgbo_mode] * 0.0015f;  // Empirically determined ....
588cc1dc7a3Sopenharmony_ci		float mode11mult = rgb_error_scales[rgb_mode] * 0.010f;    // Empirically determined ....
589cc1dc7a3Sopenharmony_ci
590cc1dc7a3Sopenharmony_ci
591cc1dc7a3Sopenharmony_ci		float lum_high = hadd_rgb_s(ep1) * (1.0f / 3.0f);
592cc1dc7a3Sopenharmony_ci		float lum_low = hadd_rgb_s(ep0) * (1.0f / 3.0f);
593cc1dc7a3Sopenharmony_ci		float lumdif = lum_high - lum_low;
594cc1dc7a3Sopenharmony_ci		float mode23mult = lumdif < 960 ? 4.0f : lumdif < 3968 ? 16.0f : 128.0f;
595cc1dc7a3Sopenharmony_ci
596cc1dc7a3Sopenharmony_ci		mode23mult *= 0.0005f;  // Empirically determined ....
597cc1dc7a3Sopenharmony_ci
598cc1dc7a3Sopenharmony_ci		// Pick among the available HDR endpoint modes
599cc1dc7a3Sopenharmony_ci		for (int i = QUANT_2; i < QUANT_16; i++)
600cc1dc7a3Sopenharmony_ci		{
601cc1dc7a3Sopenharmony_ci			best_error[i][3] = ERROR_CALC_DEFAULT;
602cc1dc7a3Sopenharmony_ci			best_error[i][2] = ERROR_CALC_DEFAULT;
603cc1dc7a3Sopenharmony_ci			best_error[i][1] = ERROR_CALC_DEFAULT;
604cc1dc7a3Sopenharmony_ci			best_error[i][0] = ERROR_CALC_DEFAULT;
605cc1dc7a3Sopenharmony_ci
606cc1dc7a3Sopenharmony_ci			format_of_choice[i][3] = static_cast<uint8_t>(encode_hdr_alpha ? FMT_HDR_RGBA : FMT_HDR_RGB_LDR_ALPHA);
607cc1dc7a3Sopenharmony_ci			format_of_choice[i][2] = FMT_HDR_RGB;
608cc1dc7a3Sopenharmony_ci			format_of_choice[i][1] = FMT_HDR_RGB_SCALE;
609cc1dc7a3Sopenharmony_ci			format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE;
610cc1dc7a3Sopenharmony_ci		}
611cc1dc7a3Sopenharmony_ci
612cc1dc7a3Sopenharmony_ci		for (int i = QUANT_16; i <= QUANT_256; i++)
613cc1dc7a3Sopenharmony_ci		{
614cc1dc7a3Sopenharmony_ci			// The base_quant_error should depend on the scale-factor that would be used during
615cc1dc7a3Sopenharmony_ci			// actual encode of the color value
616cc1dc7a3Sopenharmony_ci
617cc1dc7a3Sopenharmony_ci			float base_quant_error = baseline_quant_error[i - QUANT_6] * static_cast<float>(partition_size);
618cc1dc7a3Sopenharmony_ci			float rgb_quantization_error = error_weight_rgbsum * base_quant_error * 2.0f;
619cc1dc7a3Sopenharmony_ci			float alpha_quantization_error = error_weight.lane<3>() * base_quant_error * 2.0f;
620cc1dc7a3Sopenharmony_ci			float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error;
621cc1dc7a3Sopenharmony_ci
622cc1dc7a3Sopenharmony_ci			// For 8 integers, we have two encodings: one with HDR A and another one with LDR A
623cc1dc7a3Sopenharmony_ci
624cc1dc7a3Sopenharmony_ci			float full_hdr_rgba_error = rgba_quantization_error + rgb_range_error + alpha_range_error;
625cc1dc7a3Sopenharmony_ci			best_error[i][3] = full_hdr_rgba_error;
626cc1dc7a3Sopenharmony_ci			format_of_choice[i][3] = static_cast<uint8_t>(encode_hdr_alpha ? FMT_HDR_RGBA : FMT_HDR_RGB_LDR_ALPHA);
627cc1dc7a3Sopenharmony_ci
628cc1dc7a3Sopenharmony_ci			// For 6 integers, we have one HDR-RGB encoding
629cc1dc7a3Sopenharmony_ci			float full_hdr_rgb_error = (rgb_quantization_error * mode11mult) + rgb_range_error + eci.alpha_drop_error;
630cc1dc7a3Sopenharmony_ci			best_error[i][2] = full_hdr_rgb_error;
631cc1dc7a3Sopenharmony_ci			format_of_choice[i][2] = FMT_HDR_RGB;
632cc1dc7a3Sopenharmony_ci
633cc1dc7a3Sopenharmony_ci			// For 4 integers, we have one HDR-RGB-Scale encoding
634cc1dc7a3Sopenharmony_ci			float hdr_rgb_scale_error = (rgb_quantization_error * mode7mult) + rgb_range_error + eci.alpha_drop_error + eci.rgb_luma_error;
635cc1dc7a3Sopenharmony_ci
636cc1dc7a3Sopenharmony_ci			best_error[i][1] = hdr_rgb_scale_error;
637cc1dc7a3Sopenharmony_ci			format_of_choice[i][1] = FMT_HDR_RGB_SCALE;
638cc1dc7a3Sopenharmony_ci
639cc1dc7a3Sopenharmony_ci			// For 2 integers, we assume luminance-with-large-range
640cc1dc7a3Sopenharmony_ci			float hdr_luminance_error = (rgb_quantization_error * mode23mult) + rgb_range_error + eci.alpha_drop_error + eci.luminance_error;
641cc1dc7a3Sopenharmony_ci			best_error[i][0] = hdr_luminance_error;
642cc1dc7a3Sopenharmony_ci			format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE;
643cc1dc7a3Sopenharmony_ci		}
644cc1dc7a3Sopenharmony_ci	}
645cc1dc7a3Sopenharmony_ci	else
646cc1dc7a3Sopenharmony_ci	{
647cc1dc7a3Sopenharmony_ci		for (int i = QUANT_2; i < QUANT_6; i++)
648cc1dc7a3Sopenharmony_ci		{
649cc1dc7a3Sopenharmony_ci			best_error[i][3] = ERROR_CALC_DEFAULT;
650cc1dc7a3Sopenharmony_ci			best_error[i][2] = ERROR_CALC_DEFAULT;
651cc1dc7a3Sopenharmony_ci			best_error[i][1] = ERROR_CALC_DEFAULT;
652cc1dc7a3Sopenharmony_ci			best_error[i][0] = ERROR_CALC_DEFAULT;
653cc1dc7a3Sopenharmony_ci
654cc1dc7a3Sopenharmony_ci			format_of_choice[i][3] = FMT_RGBA;
655cc1dc7a3Sopenharmony_ci			format_of_choice[i][2] = FMT_RGB;
656cc1dc7a3Sopenharmony_ci			format_of_choice[i][1] = FMT_RGB_SCALE;
657cc1dc7a3Sopenharmony_ci			format_of_choice[i][0] = FMT_LUMINANCE;
658cc1dc7a3Sopenharmony_ci		}
659cc1dc7a3Sopenharmony_ci
660cc1dc7a3Sopenharmony_ci		float base_quant_error_rgb = error_weight_rgbsum * static_cast<float>(partition_size);
661cc1dc7a3Sopenharmony_ci		float base_quant_error_a = error_weight.lane<3>() * static_cast<float>(partition_size);
662cc1dc7a3Sopenharmony_ci		float base_quant_error_rgba = base_quant_error_rgb + base_quant_error_a;
663cc1dc7a3Sopenharmony_ci
664cc1dc7a3Sopenharmony_ci		float error_scale_bc_rgba = eci.can_blue_contract ? 0.625f : 1.0f;
665cc1dc7a3Sopenharmony_ci		float error_scale_oe_rgba = eci.can_offset_encode ? 0.5f : 1.0f;
666cc1dc7a3Sopenharmony_ci
667cc1dc7a3Sopenharmony_ci		float error_scale_bc_rgb = eci.can_blue_contract ? 0.5f : 1.0f;
668cc1dc7a3Sopenharmony_ci		float error_scale_oe_rgb = eci.can_offset_encode ? 0.25f : 1.0f;
669cc1dc7a3Sopenharmony_ci
670cc1dc7a3Sopenharmony_ci		// Pick among the available LDR endpoint modes
671cc1dc7a3Sopenharmony_ci		for (int i = QUANT_6; i <= QUANT_256; i++)
672cc1dc7a3Sopenharmony_ci		{
673cc1dc7a3Sopenharmony_ci			// Offset encoding not possible at higher quant levels
674cc1dc7a3Sopenharmony_ci			if (i >= QUANT_192)
675cc1dc7a3Sopenharmony_ci			{
676cc1dc7a3Sopenharmony_ci				error_scale_oe_rgba = 1.0f;
677cc1dc7a3Sopenharmony_ci				error_scale_oe_rgb = 1.0f;
678cc1dc7a3Sopenharmony_ci			}
679cc1dc7a3Sopenharmony_ci
680cc1dc7a3Sopenharmony_ci			float base_quant_error = baseline_quant_error[i - QUANT_6];
681cc1dc7a3Sopenharmony_ci			float quant_error_rgb  = base_quant_error_rgb * base_quant_error;
682cc1dc7a3Sopenharmony_ci			float quant_error_rgba = base_quant_error_rgba * base_quant_error;
683cc1dc7a3Sopenharmony_ci
684cc1dc7a3Sopenharmony_ci			// 8 integers can encode as RGBA+RGBA
685cc1dc7a3Sopenharmony_ci			float full_ldr_rgba_error = quant_error_rgba
686cc1dc7a3Sopenharmony_ci			                          * error_scale_bc_rgba
687cc1dc7a3Sopenharmony_ci			                          * error_scale_oe_rgba
688cc1dc7a3Sopenharmony_ci			                          + rgb_range_error
689cc1dc7a3Sopenharmony_ci			                          + alpha_range_error;
690cc1dc7a3Sopenharmony_ci
691cc1dc7a3Sopenharmony_ci			best_error[i][3] = full_ldr_rgba_error;
692cc1dc7a3Sopenharmony_ci			format_of_choice[i][3] = FMT_RGBA;
693cc1dc7a3Sopenharmony_ci
694cc1dc7a3Sopenharmony_ci			// 6 integers can encode as RGB+RGB or RGBS+AA
695cc1dc7a3Sopenharmony_ci			float full_ldr_rgb_error = quant_error_rgb
696cc1dc7a3Sopenharmony_ci			                         * error_scale_bc_rgb
697cc1dc7a3Sopenharmony_ci			                         * error_scale_oe_rgb
698cc1dc7a3Sopenharmony_ci			                         + rgb_range_error
699cc1dc7a3Sopenharmony_ci			                         + eci.alpha_drop_error;
700cc1dc7a3Sopenharmony_ci
701cc1dc7a3Sopenharmony_ci			float rgbs_alpha_error = quant_error_rgba
702cc1dc7a3Sopenharmony_ci			                       + eci.rgb_scale_error
703cc1dc7a3Sopenharmony_ci			                       + rgb_range_error
704cc1dc7a3Sopenharmony_ci			                       + alpha_range_error;
705cc1dc7a3Sopenharmony_ci
706cc1dc7a3Sopenharmony_ci			if (rgbs_alpha_error < full_ldr_rgb_error)
707cc1dc7a3Sopenharmony_ci			{
708cc1dc7a3Sopenharmony_ci				best_error[i][2] = rgbs_alpha_error;
709cc1dc7a3Sopenharmony_ci				format_of_choice[i][2] = FMT_RGB_SCALE_ALPHA;
710cc1dc7a3Sopenharmony_ci			}
711cc1dc7a3Sopenharmony_ci			else
712cc1dc7a3Sopenharmony_ci			{
713cc1dc7a3Sopenharmony_ci				best_error[i][2] = full_ldr_rgb_error;
714cc1dc7a3Sopenharmony_ci				format_of_choice[i][2] = FMT_RGB;
715cc1dc7a3Sopenharmony_ci			}
716cc1dc7a3Sopenharmony_ci
717cc1dc7a3Sopenharmony_ci			// 4 integers can encode as RGBS or LA+LA
718cc1dc7a3Sopenharmony_ci			float ldr_rgbs_error = quant_error_rgb
719cc1dc7a3Sopenharmony_ci			                     + rgb_range_error
720cc1dc7a3Sopenharmony_ci			                     + eci.alpha_drop_error
721cc1dc7a3Sopenharmony_ci			                     + eci.rgb_scale_error;
722cc1dc7a3Sopenharmony_ci
723cc1dc7a3Sopenharmony_ci			float lum_alpha_error = quant_error_rgba
724cc1dc7a3Sopenharmony_ci			                      + rgb_range_error
725cc1dc7a3Sopenharmony_ci			                      + alpha_range_error
726cc1dc7a3Sopenharmony_ci			                      + eci.luminance_error;
727cc1dc7a3Sopenharmony_ci
728cc1dc7a3Sopenharmony_ci			if (ldr_rgbs_error < lum_alpha_error)
729cc1dc7a3Sopenharmony_ci			{
730cc1dc7a3Sopenharmony_ci				best_error[i][1] = ldr_rgbs_error;
731cc1dc7a3Sopenharmony_ci				format_of_choice[i][1] = FMT_RGB_SCALE;
732cc1dc7a3Sopenharmony_ci			}
733cc1dc7a3Sopenharmony_ci			else
734cc1dc7a3Sopenharmony_ci			{
735cc1dc7a3Sopenharmony_ci				best_error[i][1] = lum_alpha_error;
736cc1dc7a3Sopenharmony_ci				format_of_choice[i][1] = FMT_LUMINANCE_ALPHA;
737cc1dc7a3Sopenharmony_ci			}
738cc1dc7a3Sopenharmony_ci
739cc1dc7a3Sopenharmony_ci			// 2 integers can encode as L+L
740cc1dc7a3Sopenharmony_ci			float luminance_error = quant_error_rgb
741cc1dc7a3Sopenharmony_ci			                      + rgb_range_error
742cc1dc7a3Sopenharmony_ci			                      + eci.alpha_drop_error
743cc1dc7a3Sopenharmony_ci			                      + eci.luminance_error;
744cc1dc7a3Sopenharmony_ci
745cc1dc7a3Sopenharmony_ci			best_error[i][0] = luminance_error;
746cc1dc7a3Sopenharmony_ci			format_of_choice[i][0] = FMT_LUMINANCE;
747cc1dc7a3Sopenharmony_ci		}
748cc1dc7a3Sopenharmony_ci	}
749cc1dc7a3Sopenharmony_ci}
750cc1dc7a3Sopenharmony_ci
751cc1dc7a3Sopenharmony_ci/**
752cc1dc7a3Sopenharmony_ci * @brief For one partition compute the best format and quantization for a given bit count.
753cc1dc7a3Sopenharmony_ci *
754cc1dc7a3Sopenharmony_ci * @param      best_combined_error    The best error for each quant level and integer count.
755cc1dc7a3Sopenharmony_ci * @param      best_combined_format   The best format for each quant level and integer count.
756cc1dc7a3Sopenharmony_ci * @param      bits_available         The number of bits available for encoding.
757cc1dc7a3Sopenharmony_ci * @param[out] best_quant_level       The output best color quant level.
758cc1dc7a3Sopenharmony_ci * @param[out] best_format            The output best color format.
759cc1dc7a3Sopenharmony_ci *
760cc1dc7a3Sopenharmony_ci * @return The output error for the best pairing.
761cc1dc7a3Sopenharmony_ci */
762cc1dc7a3Sopenharmony_cistatic float one_partition_find_best_combination_for_bitcount(
763cc1dc7a3Sopenharmony_ci	QualityProfile privateProfile,
764cc1dc7a3Sopenharmony_ci	const float best_combined_error[21][4],
765cc1dc7a3Sopenharmony_ci	const uint8_t best_combined_format[21][4],
766cc1dc7a3Sopenharmony_ci	int bits_available,
767cc1dc7a3Sopenharmony_ci	uint8_t& best_quant_level,
768cc1dc7a3Sopenharmony_ci	uint8_t& best_format
769cc1dc7a3Sopenharmony_ci) {
770cc1dc7a3Sopenharmony_ci	int best_integer_count = 0;
771cc1dc7a3Sopenharmony_ci	float best_integer_count_error = ERROR_CALC_DEFAULT;
772cc1dc7a3Sopenharmony_ci
773cc1dc7a3Sopenharmony_ci	for (int integer_count = 1; integer_count <= 4;  integer_count++)
774cc1dc7a3Sopenharmony_ci	{
775cc1dc7a3Sopenharmony_ci		if (privateProfile != HIGH_QUALITY_PROFILE)
776cc1dc7a3Sopenharmony_ci		{
777cc1dc7a3Sopenharmony_ci			integer_count = 4; // constant 4 bit count for HIGH_SPEED_PROFILE mode
778cc1dc7a3Sopenharmony_ci		}
779cc1dc7a3Sopenharmony_ci		// Compute the quantization level for a given number of integers and a given number of bits
780cc1dc7a3Sopenharmony_ci		int quant_level = quant_mode_table[integer_count][bits_available];
781cc1dc7a3Sopenharmony_ci
782cc1dc7a3Sopenharmony_ci		// Don't have enough bits to represent a given endpoint format at all!
783cc1dc7a3Sopenharmony_ci		if (quant_level < QUANT_6)
784cc1dc7a3Sopenharmony_ci		{
785cc1dc7a3Sopenharmony_ci			continue;
786cc1dc7a3Sopenharmony_ci		}
787cc1dc7a3Sopenharmony_ci
788cc1dc7a3Sopenharmony_ci		float integer_count_error = best_combined_error[quant_level][integer_count - 1];
789cc1dc7a3Sopenharmony_ci		if (integer_count_error < best_integer_count_error)
790cc1dc7a3Sopenharmony_ci		{
791cc1dc7a3Sopenharmony_ci			best_integer_count_error = integer_count_error;
792cc1dc7a3Sopenharmony_ci			best_integer_count = integer_count - 1;
793cc1dc7a3Sopenharmony_ci		}
794cc1dc7a3Sopenharmony_ci	}
795cc1dc7a3Sopenharmony_ci
796cc1dc7a3Sopenharmony_ci	int ql = quant_mode_table[best_integer_count + 1][bits_available];
797cc1dc7a3Sopenharmony_ci
798cc1dc7a3Sopenharmony_ci	best_quant_level = static_cast<uint8_t>(ql);
799cc1dc7a3Sopenharmony_ci	if (privateProfile != HIGH_QUALITY_PROFILE) // keep openSource code style
800cc1dc7a3Sopenharmony_ci	{
801cc1dc7a3Sopenharmony_ci		best_format = FMT_RGBA;
802cc1dc7a3Sopenharmony_ci	}
803cc1dc7a3Sopenharmony_ci	else
804cc1dc7a3Sopenharmony_ci	{
805cc1dc7a3Sopenharmony_ci		best_format = FMT_LUMINANCE;
806cc1dc7a3Sopenharmony_ci
807cc1dc7a3Sopenharmony_ci		if (ql >= QUANT_6)
808cc1dc7a3Sopenharmony_ci		{
809cc1dc7a3Sopenharmony_ci			best_format = best_combined_format[ql][best_integer_count];
810cc1dc7a3Sopenharmony_ci		}
811cc1dc7a3Sopenharmony_ci	}
812cc1dc7a3Sopenharmony_ci
813cc1dc7a3Sopenharmony_ci	return best_integer_count_error;
814cc1dc7a3Sopenharmony_ci}
815cc1dc7a3Sopenharmony_ci
816cc1dc7a3Sopenharmony_ci/**
817cc1dc7a3Sopenharmony_ci * @brief For 2 partitions compute the best format combinations for every pair of quant mode and integer count.
818cc1dc7a3Sopenharmony_ci *
819cc1dc7a3Sopenharmony_ci * @param      best_error             The best error for a single endpoint quant level and integer count.
820cc1dc7a3Sopenharmony_ci * @param      best_format            The best format for a single endpoint quant level and integer count.
821cc1dc7a3Sopenharmony_ci * @param[out] best_combined_error    The best combined error pairings for the 2 partitions.
822cc1dc7a3Sopenharmony_ci * @param[out] best_combined_format   The best combined format pairings for the 2 partitions.
823cc1dc7a3Sopenharmony_ci */
824cc1dc7a3Sopenharmony_cistatic void two_partitions_find_best_combination_for_every_quantization_and_integer_count(
825cc1dc7a3Sopenharmony_ci	const float best_error[2][21][4],	// indexed by (partition, quant-level, integer-pair-count-minus-1)
826cc1dc7a3Sopenharmony_ci	const uint8_t best_format[2][21][4],
827cc1dc7a3Sopenharmony_ci	float best_combined_error[21][7],	// indexed by (quant-level, integer-pair-count-minus-2)
828cc1dc7a3Sopenharmony_ci	uint8_t best_combined_format[21][7][2]
829cc1dc7a3Sopenharmony_ci) {
830cc1dc7a3Sopenharmony_ci	for (int i = QUANT_2; i <= QUANT_256; i++)
831cc1dc7a3Sopenharmony_ci	{
832cc1dc7a3Sopenharmony_ci		for (int j = 0; j < 7; j++)
833cc1dc7a3Sopenharmony_ci		{
834cc1dc7a3Sopenharmony_ci			best_combined_error[i][j] = ERROR_CALC_DEFAULT;
835cc1dc7a3Sopenharmony_ci		}
836cc1dc7a3Sopenharmony_ci	}
837cc1dc7a3Sopenharmony_ci
838cc1dc7a3Sopenharmony_ci	for (int quant = QUANT_6; quant <= QUANT_256; quant++)
839cc1dc7a3Sopenharmony_ci	{
840cc1dc7a3Sopenharmony_ci		for (int i = 0; i < 4; i++)	// integer-count for first endpoint-pair
841cc1dc7a3Sopenharmony_ci		{
842cc1dc7a3Sopenharmony_ci			for (int j = 0; j < 4; j++)	// integer-count for second endpoint-pair
843cc1dc7a3Sopenharmony_ci			{
844cc1dc7a3Sopenharmony_ci				int low2 = astc::min(i, j);
845cc1dc7a3Sopenharmony_ci				int high2 = astc::max(i, j);
846cc1dc7a3Sopenharmony_ci				if ((high2 - low2) > 1)
847cc1dc7a3Sopenharmony_ci				{
848cc1dc7a3Sopenharmony_ci					continue;
849cc1dc7a3Sopenharmony_ci				}
850cc1dc7a3Sopenharmony_ci
851cc1dc7a3Sopenharmony_ci				int intcnt = i + j;
852cc1dc7a3Sopenharmony_ci				float errorterm = astc::min(best_error[0][quant][i] + best_error[1][quant][j], 1e10f);
853cc1dc7a3Sopenharmony_ci				if (errorterm <= best_combined_error[quant][intcnt])
854cc1dc7a3Sopenharmony_ci				{
855cc1dc7a3Sopenharmony_ci					best_combined_error[quant][intcnt] = errorterm;
856cc1dc7a3Sopenharmony_ci					best_combined_format[quant][intcnt][0] = best_format[0][quant][i];
857cc1dc7a3Sopenharmony_ci					best_combined_format[quant][intcnt][1] = best_format[1][quant][j];
858cc1dc7a3Sopenharmony_ci				}
859cc1dc7a3Sopenharmony_ci			}
860cc1dc7a3Sopenharmony_ci		}
861cc1dc7a3Sopenharmony_ci	}
862cc1dc7a3Sopenharmony_ci}
863cc1dc7a3Sopenharmony_ci
864cc1dc7a3Sopenharmony_ci/**
865cc1dc7a3Sopenharmony_ci * @brief For 2 partitions compute the best format and quantization for a given bit count.
866cc1dc7a3Sopenharmony_ci *
867cc1dc7a3Sopenharmony_ci * @param      best_combined_error    The best error for each quant level and integer count.
868cc1dc7a3Sopenharmony_ci * @param      best_combined_format   The best format for each quant level and integer count.
869cc1dc7a3Sopenharmony_ci * @param      bits_available         The number of bits available for encoding.
870cc1dc7a3Sopenharmony_ci * @param[out] best_quant_level       The output best color quant level.
871cc1dc7a3Sopenharmony_ci * @param[out] best_quant_level_mod   The output best color quant level assuming two more bits are available.
872cc1dc7a3Sopenharmony_ci * @param[out] best_formats           The output best color formats.
873cc1dc7a3Sopenharmony_ci *
874cc1dc7a3Sopenharmony_ci * @return The output error for the best pairing.
875cc1dc7a3Sopenharmony_ci */
876cc1dc7a3Sopenharmony_cistatic float two_partitions_find_best_combination_for_bitcount(
877cc1dc7a3Sopenharmony_ci	unsigned int privateProfile,
878cc1dc7a3Sopenharmony_ci	float best_combined_error[21][7],
879cc1dc7a3Sopenharmony_ci	uint8_t best_combined_format[21][7][2],
880cc1dc7a3Sopenharmony_ci	int bits_available,
881cc1dc7a3Sopenharmony_ci	uint8_t& best_quant_level,
882cc1dc7a3Sopenharmony_ci	uint8_t& best_quant_level_mod,
883cc1dc7a3Sopenharmony_ci	uint8_t* best_formats
884cc1dc7a3Sopenharmony_ci) {
885cc1dc7a3Sopenharmony_ci	int best_integer_count = 0;
886cc1dc7a3Sopenharmony_ci	float best_integer_count_error = ERROR_CALC_DEFAULT;
887cc1dc7a3Sopenharmony_ci	int integer_count = 2;
888cc1dc7a3Sopenharmony_ci	if (privateProfile != HIGH_QUALITY_PROFILE)
889cc1dc7a3Sopenharmony_ci	{
890cc1dc7a3Sopenharmony_ci		integer_count = 8;  // constant 8 bit count
891cc1dc7a3Sopenharmony_ci	}
892cc1dc7a3Sopenharmony_ci
893cc1dc7a3Sopenharmony_ci	for (; integer_count <= 8; integer_count++)
894cc1dc7a3Sopenharmony_ci	{
895cc1dc7a3Sopenharmony_ci		// Compute the quantization level for a given number of integers and a given number of bits
896cc1dc7a3Sopenharmony_ci		int quant_level = quant_mode_table[integer_count][bits_available];
897cc1dc7a3Sopenharmony_ci
898cc1dc7a3Sopenharmony_ci		// Don't have enough bits to represent a given endpoint format at all!
899cc1dc7a3Sopenharmony_ci		if (quant_level < QUANT_6)
900cc1dc7a3Sopenharmony_ci		{
901cc1dc7a3Sopenharmony_ci			break;
902cc1dc7a3Sopenharmony_ci		}
903cc1dc7a3Sopenharmony_ci
904cc1dc7a3Sopenharmony_ci		float integer_count_error = best_combined_error[quant_level][integer_count - 2];
905cc1dc7a3Sopenharmony_ci		if (integer_count_error < best_integer_count_error)
906cc1dc7a3Sopenharmony_ci		{
907cc1dc7a3Sopenharmony_ci			best_integer_count_error = integer_count_error;
908cc1dc7a3Sopenharmony_ci			best_integer_count = integer_count;
909cc1dc7a3Sopenharmony_ci		}
910cc1dc7a3Sopenharmony_ci	}
911cc1dc7a3Sopenharmony_ci
912cc1dc7a3Sopenharmony_ci	int ql = quant_mode_table[best_integer_count][bits_available];
913cc1dc7a3Sopenharmony_ci	int ql_mod = quant_mode_table[best_integer_count][bits_available + 2];
914cc1dc7a3Sopenharmony_ci
915cc1dc7a3Sopenharmony_ci	best_quant_level = static_cast<uint8_t>(ql);
916cc1dc7a3Sopenharmony_ci	best_quant_level_mod = static_cast<uint8_t>(ql_mod);
917cc1dc7a3Sopenharmony_ci
918cc1dc7a3Sopenharmony_ci	if (ql >= QUANT_6)
919cc1dc7a3Sopenharmony_ci	{
920cc1dc7a3Sopenharmony_ci		for (int i = 0; i < 2; i++)
921cc1dc7a3Sopenharmony_ci		{
922cc1dc7a3Sopenharmony_ci			best_formats[i] = best_combined_format[ql][best_integer_count - 2][i];
923cc1dc7a3Sopenharmony_ci		}
924cc1dc7a3Sopenharmony_ci	}
925cc1dc7a3Sopenharmony_ci	else
926cc1dc7a3Sopenharmony_ci	{
927cc1dc7a3Sopenharmony_ci		for (int i = 0; i < 2; i++)
928cc1dc7a3Sopenharmony_ci		{
929cc1dc7a3Sopenharmony_ci			best_formats[i] = FMT_LUMINANCE;
930cc1dc7a3Sopenharmony_ci		}
931cc1dc7a3Sopenharmony_ci	}
932cc1dc7a3Sopenharmony_ci
933cc1dc7a3Sopenharmony_ci	return best_integer_count_error;
934cc1dc7a3Sopenharmony_ci}
935cc1dc7a3Sopenharmony_ci
936cc1dc7a3Sopenharmony_ci/**
937cc1dc7a3Sopenharmony_ci * @brief For 3 partitions compute the best format combinations for every pair of quant mode and integer count.
938cc1dc7a3Sopenharmony_ci *
939cc1dc7a3Sopenharmony_ci * @param      best_error             The best error for a single endpoint quant level and integer count.
940cc1dc7a3Sopenharmony_ci * @param      best_format            The best format for a single endpoint quant level and integer count.
941cc1dc7a3Sopenharmony_ci * @param[out] best_combined_error    The best combined error pairings for the 3 partitions.
942cc1dc7a3Sopenharmony_ci * @param[out] best_combined_format   The best combined format pairings for the 3 partitions.
943cc1dc7a3Sopenharmony_ci */
944cc1dc7a3Sopenharmony_cistatic void three_partitions_find_best_combination_for_every_quantization_and_integer_count(
945cc1dc7a3Sopenharmony_ci	const float best_error[3][21][4],	// indexed by (partition, quant-level, integer-count)
946cc1dc7a3Sopenharmony_ci	const uint8_t best_format[3][21][4],
947cc1dc7a3Sopenharmony_ci	float best_combined_error[21][10],
948cc1dc7a3Sopenharmony_ci	uint8_t best_combined_format[21][10][3]
949cc1dc7a3Sopenharmony_ci) {
950cc1dc7a3Sopenharmony_ci	for (int i = QUANT_2; i <= QUANT_256; i++)
951cc1dc7a3Sopenharmony_ci	{
952cc1dc7a3Sopenharmony_ci		for (int j = 0; j < 10; j++)
953cc1dc7a3Sopenharmony_ci		{
954cc1dc7a3Sopenharmony_ci			best_combined_error[i][j] = ERROR_CALC_DEFAULT;
955cc1dc7a3Sopenharmony_ci		}
956cc1dc7a3Sopenharmony_ci	}
957cc1dc7a3Sopenharmony_ci
958cc1dc7a3Sopenharmony_ci	for (int quant = QUANT_6; quant <= QUANT_256; quant++)
959cc1dc7a3Sopenharmony_ci	{
960cc1dc7a3Sopenharmony_ci		for (int i = 0; i < 4; i++)	// integer-count for first endpoint-pair
961cc1dc7a3Sopenharmony_ci		{
962cc1dc7a3Sopenharmony_ci			for (int j = 0; j < 4; j++)	// integer-count for second endpoint-pair
963cc1dc7a3Sopenharmony_ci			{
964cc1dc7a3Sopenharmony_ci				int low2 = astc::min(i, j);
965cc1dc7a3Sopenharmony_ci				int high2 = astc::max(i, j);
966cc1dc7a3Sopenharmony_ci				if ((high2 - low2) > 1)
967cc1dc7a3Sopenharmony_ci				{
968cc1dc7a3Sopenharmony_ci					continue;
969cc1dc7a3Sopenharmony_ci				}
970cc1dc7a3Sopenharmony_ci
971cc1dc7a3Sopenharmony_ci				for (int k = 0; k < 4; k++)	// integer-count for third endpoint-pair
972cc1dc7a3Sopenharmony_ci				{
973cc1dc7a3Sopenharmony_ci					int low3 = astc::min(k, low2);
974cc1dc7a3Sopenharmony_ci					int high3 = astc::max(k, high2);
975cc1dc7a3Sopenharmony_ci					if ((high3 - low3) > 1)
976cc1dc7a3Sopenharmony_ci					{
977cc1dc7a3Sopenharmony_ci						continue;
978cc1dc7a3Sopenharmony_ci					}
979cc1dc7a3Sopenharmony_ci
980cc1dc7a3Sopenharmony_ci					int intcnt = i + j + k;
981cc1dc7a3Sopenharmony_ci					float errorterm = astc::min(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k], 1e10f);
982cc1dc7a3Sopenharmony_ci					if (errorterm <= best_combined_error[quant][intcnt])
983cc1dc7a3Sopenharmony_ci					{
984cc1dc7a3Sopenharmony_ci						best_combined_error[quant][intcnt] = errorterm;
985cc1dc7a3Sopenharmony_ci						best_combined_format[quant][intcnt][0] = best_format[0][quant][i];
986cc1dc7a3Sopenharmony_ci						best_combined_format[quant][intcnt][1] = best_format[1][quant][j];
987cc1dc7a3Sopenharmony_ci						best_combined_format[quant][intcnt][2] = best_format[2][quant][k];
988cc1dc7a3Sopenharmony_ci					}
989cc1dc7a3Sopenharmony_ci				}
990cc1dc7a3Sopenharmony_ci			}
991cc1dc7a3Sopenharmony_ci		}
992cc1dc7a3Sopenharmony_ci	}
993cc1dc7a3Sopenharmony_ci}
994cc1dc7a3Sopenharmony_ci
995cc1dc7a3Sopenharmony_ci/**
996cc1dc7a3Sopenharmony_ci * @brief For 3 partitions compute the best format and quantization for a given bit count.
997cc1dc7a3Sopenharmony_ci *
998cc1dc7a3Sopenharmony_ci * @param      best_combined_error    The best error for each quant level and integer count.
999cc1dc7a3Sopenharmony_ci * @param      best_combined_format   The best format for each quant level and integer count.
1000cc1dc7a3Sopenharmony_ci * @param      bits_available         The number of bits available for encoding.
1001cc1dc7a3Sopenharmony_ci * @param[out] best_quant_level       The output best color quant level.
1002cc1dc7a3Sopenharmony_ci * @param[out] best_quant_level_mod   The output best color quant level assuming two more bits are available.
1003cc1dc7a3Sopenharmony_ci * @param[out] best_formats           The output best color formats.
1004cc1dc7a3Sopenharmony_ci *
1005cc1dc7a3Sopenharmony_ci * @return The output error for the best pairing.
1006cc1dc7a3Sopenharmony_ci */
1007cc1dc7a3Sopenharmony_cistatic float three_partitions_find_best_combination_for_bitcount(
1008cc1dc7a3Sopenharmony_ci	const float best_combined_error[21][10],
1009cc1dc7a3Sopenharmony_ci	const uint8_t best_combined_format[21][10][3],
1010cc1dc7a3Sopenharmony_ci	int bits_available,
1011cc1dc7a3Sopenharmony_ci	uint8_t& best_quant_level,
1012cc1dc7a3Sopenharmony_ci	uint8_t& best_quant_level_mod,
1013cc1dc7a3Sopenharmony_ci	uint8_t* best_formats
1014cc1dc7a3Sopenharmony_ci) {
1015cc1dc7a3Sopenharmony_ci	int best_integer_count = 0;
1016cc1dc7a3Sopenharmony_ci	float best_integer_count_error = ERROR_CALC_DEFAULT;
1017cc1dc7a3Sopenharmony_ci
1018cc1dc7a3Sopenharmony_ci	for (int integer_count = 3; integer_count <= 9; integer_count++)
1019cc1dc7a3Sopenharmony_ci	{
1020cc1dc7a3Sopenharmony_ci		// Compute the quantization level for a given number of integers and a given number of bits
1021cc1dc7a3Sopenharmony_ci		int quant_level = quant_mode_table[integer_count][bits_available];
1022cc1dc7a3Sopenharmony_ci
1023cc1dc7a3Sopenharmony_ci		// Don't have enough bits to represent a given endpoint format at all!
1024cc1dc7a3Sopenharmony_ci		if (quant_level < QUANT_6)
1025cc1dc7a3Sopenharmony_ci		{
1026cc1dc7a3Sopenharmony_ci			break;
1027cc1dc7a3Sopenharmony_ci		}
1028cc1dc7a3Sopenharmony_ci
1029cc1dc7a3Sopenharmony_ci		float integer_count_error = best_combined_error[quant_level][integer_count - 3];
1030cc1dc7a3Sopenharmony_ci		if (integer_count_error < best_integer_count_error)
1031cc1dc7a3Sopenharmony_ci		{
1032cc1dc7a3Sopenharmony_ci			best_integer_count_error = integer_count_error;
1033cc1dc7a3Sopenharmony_ci			best_integer_count = integer_count;
1034cc1dc7a3Sopenharmony_ci		}
1035cc1dc7a3Sopenharmony_ci	}
1036cc1dc7a3Sopenharmony_ci
1037cc1dc7a3Sopenharmony_ci	int ql = quant_mode_table[best_integer_count][bits_available];
1038cc1dc7a3Sopenharmony_ci	int ql_mod = quant_mode_table[best_integer_count][bits_available + 5];
1039cc1dc7a3Sopenharmony_ci
1040cc1dc7a3Sopenharmony_ci	best_quant_level = static_cast<uint8_t>(ql);
1041cc1dc7a3Sopenharmony_ci	best_quant_level_mod = static_cast<uint8_t>(ql_mod);
1042cc1dc7a3Sopenharmony_ci
1043cc1dc7a3Sopenharmony_ci	if (ql >= QUANT_6)
1044cc1dc7a3Sopenharmony_ci	{
1045cc1dc7a3Sopenharmony_ci		for (int i = 0; i < 3; i++)
1046cc1dc7a3Sopenharmony_ci		{
1047cc1dc7a3Sopenharmony_ci			best_formats[i] = best_combined_format[ql][best_integer_count - 3][i];
1048cc1dc7a3Sopenharmony_ci		}
1049cc1dc7a3Sopenharmony_ci	}
1050cc1dc7a3Sopenharmony_ci	else
1051cc1dc7a3Sopenharmony_ci	{
1052cc1dc7a3Sopenharmony_ci		for (int i = 0; i < 3; i++)
1053cc1dc7a3Sopenharmony_ci		{
1054cc1dc7a3Sopenharmony_ci			best_formats[i] = FMT_LUMINANCE;
1055cc1dc7a3Sopenharmony_ci		}
1056cc1dc7a3Sopenharmony_ci	}
1057cc1dc7a3Sopenharmony_ci
1058cc1dc7a3Sopenharmony_ci	return best_integer_count_error;
1059cc1dc7a3Sopenharmony_ci}
1060cc1dc7a3Sopenharmony_ci
1061cc1dc7a3Sopenharmony_ci/**
1062cc1dc7a3Sopenharmony_ci * @brief For 4 partitions compute the best format combinations for every pair of quant mode and integer count.
1063cc1dc7a3Sopenharmony_ci *
1064cc1dc7a3Sopenharmony_ci * @param      best_error             The best error for a single endpoint quant level and integer count.
1065cc1dc7a3Sopenharmony_ci * @param      best_format            The best format for a single endpoint quant level and integer count.
1066cc1dc7a3Sopenharmony_ci * @param[out] best_combined_error    The best combined error pairings for the 4 partitions.
1067cc1dc7a3Sopenharmony_ci * @param[out] best_combined_format   The best combined format pairings for the 4 partitions.
1068cc1dc7a3Sopenharmony_ci */
1069cc1dc7a3Sopenharmony_cistatic void four_partitions_find_best_combination_for_every_quantization_and_integer_count(
1070cc1dc7a3Sopenharmony_ci	const float best_error[4][21][4],	// indexed by (partition, quant-level, integer-count)
1071cc1dc7a3Sopenharmony_ci	const uint8_t best_format[4][21][4],
1072cc1dc7a3Sopenharmony_ci	float best_combined_error[21][13],
1073cc1dc7a3Sopenharmony_ci	uint8_t best_combined_format[21][13][4]
1074cc1dc7a3Sopenharmony_ci) {
1075cc1dc7a3Sopenharmony_ci	for (int i = QUANT_2; i <= QUANT_256; i++)
1076cc1dc7a3Sopenharmony_ci	{
1077cc1dc7a3Sopenharmony_ci		for (int j = 0; j < 13; j++)
1078cc1dc7a3Sopenharmony_ci		{
1079cc1dc7a3Sopenharmony_ci			best_combined_error[i][j] = ERROR_CALC_DEFAULT;
1080cc1dc7a3Sopenharmony_ci		}
1081cc1dc7a3Sopenharmony_ci	}
1082cc1dc7a3Sopenharmony_ci
1083cc1dc7a3Sopenharmony_ci	for (int quant = QUANT_6; quant <= QUANT_256; quant++)
1084cc1dc7a3Sopenharmony_ci	{
1085cc1dc7a3Sopenharmony_ci		for (int i = 0; i < 4; i++)	// integer-count for first endpoint-pair
1086cc1dc7a3Sopenharmony_ci		{
1087cc1dc7a3Sopenharmony_ci			for (int j = 0; j < 4; j++)	// integer-count for second endpoint-pair
1088cc1dc7a3Sopenharmony_ci			{
1089cc1dc7a3Sopenharmony_ci				int low2 = astc::min(i, j);
1090cc1dc7a3Sopenharmony_ci				int high2 = astc::max(i, j);
1091cc1dc7a3Sopenharmony_ci				if ((high2 - low2) > 1)
1092cc1dc7a3Sopenharmony_ci				{
1093cc1dc7a3Sopenharmony_ci					continue;
1094cc1dc7a3Sopenharmony_ci				}
1095cc1dc7a3Sopenharmony_ci
1096cc1dc7a3Sopenharmony_ci				for (int k = 0; k < 4; k++)	// integer-count for third endpoint-pair
1097cc1dc7a3Sopenharmony_ci				{
1098cc1dc7a3Sopenharmony_ci					int low3 = astc::min(k, low2);
1099cc1dc7a3Sopenharmony_ci					int high3 = astc::max(k, high2);
1100cc1dc7a3Sopenharmony_ci					if ((high3 - low3) > 1)
1101cc1dc7a3Sopenharmony_ci					{
1102cc1dc7a3Sopenharmony_ci						continue;
1103cc1dc7a3Sopenharmony_ci					}
1104cc1dc7a3Sopenharmony_ci
1105cc1dc7a3Sopenharmony_ci					for (int l = 0; l < 4; l++)	// integer-count for fourth endpoint-pair
1106cc1dc7a3Sopenharmony_ci					{
1107cc1dc7a3Sopenharmony_ci						int low4 = astc::min(l, low3);
1108cc1dc7a3Sopenharmony_ci						int high4 = astc::max(l, high3);
1109cc1dc7a3Sopenharmony_ci						if ((high4 - low4) > 1)
1110cc1dc7a3Sopenharmony_ci						{
1111cc1dc7a3Sopenharmony_ci							continue;
1112cc1dc7a3Sopenharmony_ci						}
1113cc1dc7a3Sopenharmony_ci
1114cc1dc7a3Sopenharmony_ci						int intcnt = i + j + k + l;
1115cc1dc7a3Sopenharmony_ci						float errorterm = astc::min(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k] + best_error[3][quant][l], 1e10f);
1116cc1dc7a3Sopenharmony_ci						if (errorterm <= best_combined_error[quant][intcnt])
1117cc1dc7a3Sopenharmony_ci						{
1118cc1dc7a3Sopenharmony_ci							best_combined_error[quant][intcnt] = errorterm;
1119cc1dc7a3Sopenharmony_ci							best_combined_format[quant][intcnt][0] = best_format[0][quant][i];
1120cc1dc7a3Sopenharmony_ci							best_combined_format[quant][intcnt][1] = best_format[1][quant][j];
1121cc1dc7a3Sopenharmony_ci							best_combined_format[quant][intcnt][2] = best_format[2][quant][k];
1122cc1dc7a3Sopenharmony_ci							best_combined_format[quant][intcnt][3] = best_format[3][quant][l];
1123cc1dc7a3Sopenharmony_ci						}
1124cc1dc7a3Sopenharmony_ci					}
1125cc1dc7a3Sopenharmony_ci				}
1126cc1dc7a3Sopenharmony_ci			}
1127cc1dc7a3Sopenharmony_ci		}
1128cc1dc7a3Sopenharmony_ci	}
1129cc1dc7a3Sopenharmony_ci}
1130cc1dc7a3Sopenharmony_ci
1131cc1dc7a3Sopenharmony_ci/**
1132cc1dc7a3Sopenharmony_ci * @brief For 4 partitions compute the best format and quantization for a given bit count.
1133cc1dc7a3Sopenharmony_ci *
1134cc1dc7a3Sopenharmony_ci * @param      best_combined_error    The best error for each quant level and integer count.
1135cc1dc7a3Sopenharmony_ci * @param      best_combined_format   The best format for each quant level and integer count.
1136cc1dc7a3Sopenharmony_ci * @param      bits_available         The number of bits available for encoding.
1137cc1dc7a3Sopenharmony_ci * @param[out] best_quant_level       The output best color quant level.
1138cc1dc7a3Sopenharmony_ci * @param[out] best_quant_level_mod   The output best color quant level assuming two more bits are available.
1139cc1dc7a3Sopenharmony_ci * @param[out] best_formats           The output best color formats.
1140cc1dc7a3Sopenharmony_ci *
1141cc1dc7a3Sopenharmony_ci * @return best_error The output error for the best pairing.
1142cc1dc7a3Sopenharmony_ci */
1143cc1dc7a3Sopenharmony_cistatic float four_partitions_find_best_combination_for_bitcount(
1144cc1dc7a3Sopenharmony_ci	const float best_combined_error[21][13],
1145cc1dc7a3Sopenharmony_ci	const uint8_t best_combined_format[21][13][4],
1146cc1dc7a3Sopenharmony_ci	int bits_available,
1147cc1dc7a3Sopenharmony_ci	uint8_t& best_quant_level,
1148cc1dc7a3Sopenharmony_ci	uint8_t& best_quant_level_mod,
1149cc1dc7a3Sopenharmony_ci	uint8_t* best_formats
1150cc1dc7a3Sopenharmony_ci) {
1151cc1dc7a3Sopenharmony_ci	int best_integer_count = 0;
1152cc1dc7a3Sopenharmony_ci	float best_integer_count_error = ERROR_CALC_DEFAULT;
1153cc1dc7a3Sopenharmony_ci
1154cc1dc7a3Sopenharmony_ci	for (int integer_count = 4; integer_count <= 9; integer_count++)
1155cc1dc7a3Sopenharmony_ci	{
1156cc1dc7a3Sopenharmony_ci		// Compute the quantization level for a given number of integers and a given number of bits
1157cc1dc7a3Sopenharmony_ci		int quant_level = quant_mode_table[integer_count][bits_available];
1158cc1dc7a3Sopenharmony_ci
1159cc1dc7a3Sopenharmony_ci		// Don't have enough bits to represent a given endpoint format at all!
1160cc1dc7a3Sopenharmony_ci		if (quant_level < QUANT_6)
1161cc1dc7a3Sopenharmony_ci		{
1162cc1dc7a3Sopenharmony_ci			break;
1163cc1dc7a3Sopenharmony_ci		}
1164cc1dc7a3Sopenharmony_ci
1165cc1dc7a3Sopenharmony_ci		float integer_count_error = best_combined_error[quant_level][integer_count - 4];
1166cc1dc7a3Sopenharmony_ci		if (integer_count_error < best_integer_count_error)
1167cc1dc7a3Sopenharmony_ci		{
1168cc1dc7a3Sopenharmony_ci			best_integer_count_error = integer_count_error;
1169cc1dc7a3Sopenharmony_ci			best_integer_count = integer_count;
1170cc1dc7a3Sopenharmony_ci		}
1171cc1dc7a3Sopenharmony_ci	}
1172cc1dc7a3Sopenharmony_ci
1173cc1dc7a3Sopenharmony_ci	int ql = quant_mode_table[best_integer_count][bits_available];
1174cc1dc7a3Sopenharmony_ci	int ql_mod = quant_mode_table[best_integer_count][bits_available + 8];
1175cc1dc7a3Sopenharmony_ci
1176cc1dc7a3Sopenharmony_ci	best_quant_level = static_cast<uint8_t>(ql);
1177cc1dc7a3Sopenharmony_ci	best_quant_level_mod = static_cast<uint8_t>(ql_mod);
1178cc1dc7a3Sopenharmony_ci
1179cc1dc7a3Sopenharmony_ci	if (ql >= QUANT_6)
1180cc1dc7a3Sopenharmony_ci	{
1181cc1dc7a3Sopenharmony_ci		for (int i = 0; i < 4; i++)
1182cc1dc7a3Sopenharmony_ci		{
1183cc1dc7a3Sopenharmony_ci			best_formats[i] = best_combined_format[ql][best_integer_count - 4][i];
1184cc1dc7a3Sopenharmony_ci		}
1185cc1dc7a3Sopenharmony_ci	}
1186cc1dc7a3Sopenharmony_ci	else
1187cc1dc7a3Sopenharmony_ci	{
1188cc1dc7a3Sopenharmony_ci		for (int i = 0; i < 4; i++)
1189cc1dc7a3Sopenharmony_ci		{
1190cc1dc7a3Sopenharmony_ci			best_formats[i] = FMT_LUMINANCE;
1191cc1dc7a3Sopenharmony_ci		}
1192cc1dc7a3Sopenharmony_ci	}
1193cc1dc7a3Sopenharmony_ci
1194cc1dc7a3Sopenharmony_ci	return best_integer_count_error;
1195cc1dc7a3Sopenharmony_ci}
1196cc1dc7a3Sopenharmony_ci
1197cc1dc7a3Sopenharmony_ci/* See header for documentation. */
1198cc1dc7a3Sopenharmony_ciunsigned int compute_ideal_endpoint_formats(
1199cc1dc7a3Sopenharmony_ci	QualityProfile privateProfile,
1200cc1dc7a3Sopenharmony_ci	const partition_info& pi,
1201cc1dc7a3Sopenharmony_ci	const image_block& blk,
1202cc1dc7a3Sopenharmony_ci	const endpoints& ep,
1203cc1dc7a3Sopenharmony_ci	 // bitcounts and errors computed for the various quantization methods
1204cc1dc7a3Sopenharmony_ci	const int8_t* qwt_bitcounts,
1205cc1dc7a3Sopenharmony_ci	const float* qwt_errors,
1206cc1dc7a3Sopenharmony_ci	unsigned int tune_candidate_limit,
1207cc1dc7a3Sopenharmony_ci	unsigned int start_block_mode,
1208cc1dc7a3Sopenharmony_ci	unsigned int end_block_mode,
1209cc1dc7a3Sopenharmony_ci	// output data
1210cc1dc7a3Sopenharmony_ci	uint8_t partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS],
1211cc1dc7a3Sopenharmony_ci	int block_mode[TUNE_MAX_TRIAL_CANDIDATES],
1212cc1dc7a3Sopenharmony_ci	quant_method quant_level[TUNE_MAX_TRIAL_CANDIDATES],
1213cc1dc7a3Sopenharmony_ci	quant_method quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES],
1214cc1dc7a3Sopenharmony_ci	compression_working_buffers& tmpbuf
1215cc1dc7a3Sopenharmony_ci) {
1216cc1dc7a3Sopenharmony_ci	int partition_count = pi.partition_count;
1217cc1dc7a3Sopenharmony_ci
1218cc1dc7a3Sopenharmony_ci	promise(partition_count > 0);
1219cc1dc7a3Sopenharmony_ci
1220cc1dc7a3Sopenharmony_ci	bool encode_hdr_rgb = static_cast<bool>(blk.rgb_lns[0]);
1221cc1dc7a3Sopenharmony_ci	bool encode_hdr_alpha = static_cast<bool>(blk.alpha_lns[0]);
1222cc1dc7a3Sopenharmony_ci
1223cc1dc7a3Sopenharmony_ci	// Compute the errors that result from various encoding choices (such as using luminance instead
1224cc1dc7a3Sopenharmony_ci	// of RGB, discarding Alpha, using RGB-scale in place of two separate RGB endpoints and so on)
1225cc1dc7a3Sopenharmony_ci	encoding_choice_errors eci[BLOCK_MAX_PARTITIONS];
1226cc1dc7a3Sopenharmony_ci	compute_encoding_choice_errors(privateProfile, blk, pi, ep, eci);
1227cc1dc7a3Sopenharmony_ci
1228cc1dc7a3Sopenharmony_ci	float best_error[BLOCK_MAX_PARTITIONS][21][4];
1229cc1dc7a3Sopenharmony_ci	uint8_t format_of_choice[BLOCK_MAX_PARTITIONS][21][4];
1230cc1dc7a3Sopenharmony_ci	for (int i = 0; i < partition_count; i++)
1231cc1dc7a3Sopenharmony_ci	{
1232cc1dc7a3Sopenharmony_ci		compute_color_error_for_every_integer_count_and_quant_level(
1233cc1dc7a3Sopenharmony_ci		    encode_hdr_rgb, encode_hdr_alpha, i,
1234cc1dc7a3Sopenharmony_ci		    pi, eci[i], ep, blk.channel_weight, best_error[i],
1235cc1dc7a3Sopenharmony_ci		    format_of_choice[i]);
1236cc1dc7a3Sopenharmony_ci	}
1237cc1dc7a3Sopenharmony_ci
1238cc1dc7a3Sopenharmony_ci	float* errors_of_best_combination = tmpbuf.errors_of_best_combination;
1239cc1dc7a3Sopenharmony_ci	uint8_t* best_quant_levels = tmpbuf.best_quant_levels;
1240cc1dc7a3Sopenharmony_ci	uint8_t* best_quant_levels_mod = tmpbuf.best_quant_levels_mod;
1241cc1dc7a3Sopenharmony_ci	uint8_t (&best_ep_formats)[WEIGHTS_MAX_BLOCK_MODES][BLOCK_MAX_PARTITIONS] = tmpbuf.best_ep_formats;
1242cc1dc7a3Sopenharmony_ci
1243cc1dc7a3Sopenharmony_ci	// Ensure that the first iteration understep contains data that will never be picked
1244cc1dc7a3Sopenharmony_ci	vfloat clear_error(ERROR_CALC_DEFAULT);
1245cc1dc7a3Sopenharmony_ci	vint clear_quant(0);
1246cc1dc7a3Sopenharmony_ci
1247cc1dc7a3Sopenharmony_ci	unsigned int packed_start_block_mode = round_down_to_simd_multiple_vla(start_block_mode);
1248cc1dc7a3Sopenharmony_ci	storea(clear_error, errors_of_best_combination + packed_start_block_mode);
1249cc1dc7a3Sopenharmony_ci	store_nbytes(clear_quant, best_quant_levels + packed_start_block_mode);
1250cc1dc7a3Sopenharmony_ci	store_nbytes(clear_quant, best_quant_levels_mod + packed_start_block_mode);
1251cc1dc7a3Sopenharmony_ci
1252cc1dc7a3Sopenharmony_ci	// Ensure that last iteration overstep contains data that will never be picked
1253cc1dc7a3Sopenharmony_ci	unsigned int packed_end_block_mode = round_down_to_simd_multiple_vla(end_block_mode - 1);
1254cc1dc7a3Sopenharmony_ci	storea(clear_error, errors_of_best_combination + packed_end_block_mode);
1255cc1dc7a3Sopenharmony_ci	store_nbytes(clear_quant, best_quant_levels + packed_end_block_mode);
1256cc1dc7a3Sopenharmony_ci	store_nbytes(clear_quant, best_quant_levels_mod + packed_end_block_mode);
1257cc1dc7a3Sopenharmony_ci
1258cc1dc7a3Sopenharmony_ci	// Track a scalar best to avoid expensive search at least once ...
1259cc1dc7a3Sopenharmony_ci	float error_of_best_combination = ERROR_CALC_DEFAULT;
1260cc1dc7a3Sopenharmony_ci	int index_of_best_combination = -1;
1261cc1dc7a3Sopenharmony_ci
1262cc1dc7a3Sopenharmony_ci	// The block contains 1 partition
1263cc1dc7a3Sopenharmony_ci	if (partition_count == 1)
1264cc1dc7a3Sopenharmony_ci	{
1265cc1dc7a3Sopenharmony_ci		for (unsigned int i = start_block_mode; i < end_block_mode; i++)
1266cc1dc7a3Sopenharmony_ci		{
1267cc1dc7a3Sopenharmony_ci			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
1268cc1dc7a3Sopenharmony_ci			{
1269cc1dc7a3Sopenharmony_ci				errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
1270cc1dc7a3Sopenharmony_ci				continue;
1271cc1dc7a3Sopenharmony_ci			}
1272cc1dc7a3Sopenharmony_ci
1273cc1dc7a3Sopenharmony_ci			float error_of_best = one_partition_find_best_combination_for_bitcount(
1274cc1dc7a3Sopenharmony_ci			    privateProfile,
1275cc1dc7a3Sopenharmony_ci			    best_error[0], format_of_choice[0], qwt_bitcounts[i],
1276cc1dc7a3Sopenharmony_ci			    best_quant_levels[i], best_ep_formats[i][0]);
1277cc1dc7a3Sopenharmony_ci
1278cc1dc7a3Sopenharmony_ci			float total_error = error_of_best + qwt_errors[i];
1279cc1dc7a3Sopenharmony_ci			errors_of_best_combination[i] = total_error;
1280cc1dc7a3Sopenharmony_ci			best_quant_levels_mod[i] = best_quant_levels[i];
1281cc1dc7a3Sopenharmony_ci
1282cc1dc7a3Sopenharmony_ci			if (total_error < error_of_best_combination)
1283cc1dc7a3Sopenharmony_ci			{
1284cc1dc7a3Sopenharmony_ci				error_of_best_combination = total_error;
1285cc1dc7a3Sopenharmony_ci				index_of_best_combination = i;
1286cc1dc7a3Sopenharmony_ci			}
1287cc1dc7a3Sopenharmony_ci		}
1288cc1dc7a3Sopenharmony_ci	}
1289cc1dc7a3Sopenharmony_ci	// The block contains 2 partitions
1290cc1dc7a3Sopenharmony_ci	else if (partition_count == 2)
1291cc1dc7a3Sopenharmony_ci	{
1292cc1dc7a3Sopenharmony_ci		float combined_best_error[21][7];
1293cc1dc7a3Sopenharmony_ci		uint8_t formats_of_choice[21][7][2];
1294cc1dc7a3Sopenharmony_ci
1295cc1dc7a3Sopenharmony_ci		two_partitions_find_best_combination_for_every_quantization_and_integer_count(
1296cc1dc7a3Sopenharmony_ci		    best_error, format_of_choice, combined_best_error, formats_of_choice);
1297cc1dc7a3Sopenharmony_ci
1298cc1dc7a3Sopenharmony_ci		assert(start_block_mode == 0);
1299cc1dc7a3Sopenharmony_ci		for (unsigned int i = 0; i < end_block_mode; i++)
1300cc1dc7a3Sopenharmony_ci		{
1301cc1dc7a3Sopenharmony_ci			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
1302cc1dc7a3Sopenharmony_ci			{
1303cc1dc7a3Sopenharmony_ci				errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
1304cc1dc7a3Sopenharmony_ci				continue;
1305cc1dc7a3Sopenharmony_ci			}
1306cc1dc7a3Sopenharmony_ci
1307cc1dc7a3Sopenharmony_ci			float error_of_best = two_partitions_find_best_combination_for_bitcount(
1308cc1dc7a3Sopenharmony_ci				privateProfile,
1309cc1dc7a3Sopenharmony_ci				combined_best_error, formats_of_choice, qwt_bitcounts[i],
1310cc1dc7a3Sopenharmony_ci				best_quant_levels[i], best_quant_levels_mod[i],
1311cc1dc7a3Sopenharmony_ci				best_ep_formats[i]);
1312cc1dc7a3Sopenharmony_ci
1313cc1dc7a3Sopenharmony_ci			float total_error = error_of_best + qwt_errors[i];
1314cc1dc7a3Sopenharmony_ci			errors_of_best_combination[i] = total_error;
1315cc1dc7a3Sopenharmony_ci
1316cc1dc7a3Sopenharmony_ci			if (total_error < error_of_best_combination)
1317cc1dc7a3Sopenharmony_ci			{
1318cc1dc7a3Sopenharmony_ci				error_of_best_combination = total_error;
1319cc1dc7a3Sopenharmony_ci				index_of_best_combination = i;
1320cc1dc7a3Sopenharmony_ci			}
1321cc1dc7a3Sopenharmony_ci		}
1322cc1dc7a3Sopenharmony_ci	}
1323cc1dc7a3Sopenharmony_ci	// The block contains 3 partitions
1324cc1dc7a3Sopenharmony_ci	else if (partition_count == 3)
1325cc1dc7a3Sopenharmony_ci	{
1326cc1dc7a3Sopenharmony_ci		float combined_best_error[21][10];
1327cc1dc7a3Sopenharmony_ci		uint8_t formats_of_choice[21][10][3];
1328cc1dc7a3Sopenharmony_ci
1329cc1dc7a3Sopenharmony_ci		three_partitions_find_best_combination_for_every_quantization_and_integer_count(
1330cc1dc7a3Sopenharmony_ci		    best_error, format_of_choice, combined_best_error, formats_of_choice);
1331cc1dc7a3Sopenharmony_ci
1332cc1dc7a3Sopenharmony_ci		assert(start_block_mode == 0);
1333cc1dc7a3Sopenharmony_ci		for (unsigned int i = 0; i < end_block_mode; i++)
1334cc1dc7a3Sopenharmony_ci		{
1335cc1dc7a3Sopenharmony_ci			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
1336cc1dc7a3Sopenharmony_ci			{
1337cc1dc7a3Sopenharmony_ci				errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
1338cc1dc7a3Sopenharmony_ci				continue;
1339cc1dc7a3Sopenharmony_ci			}
1340cc1dc7a3Sopenharmony_ci
1341cc1dc7a3Sopenharmony_ci			float error_of_best = three_partitions_find_best_combination_for_bitcount(
1342cc1dc7a3Sopenharmony_ci			    combined_best_error, formats_of_choice, qwt_bitcounts[i],
1343cc1dc7a3Sopenharmony_ci			    best_quant_levels[i], best_quant_levels_mod[i],
1344cc1dc7a3Sopenharmony_ci			    best_ep_formats[i]);
1345cc1dc7a3Sopenharmony_ci
1346cc1dc7a3Sopenharmony_ci			float total_error = error_of_best + qwt_errors[i];
1347cc1dc7a3Sopenharmony_ci			errors_of_best_combination[i] = total_error;
1348cc1dc7a3Sopenharmony_ci
1349cc1dc7a3Sopenharmony_ci			if (total_error < error_of_best_combination)
1350cc1dc7a3Sopenharmony_ci			{
1351cc1dc7a3Sopenharmony_ci				error_of_best_combination = total_error;
1352cc1dc7a3Sopenharmony_ci				index_of_best_combination = i;
1353cc1dc7a3Sopenharmony_ci			}
1354cc1dc7a3Sopenharmony_ci		}
1355cc1dc7a3Sopenharmony_ci	}
1356cc1dc7a3Sopenharmony_ci	// The block contains 4 partitions
1357cc1dc7a3Sopenharmony_ci	else // if (partition_count == 4)
1358cc1dc7a3Sopenharmony_ci	{
1359cc1dc7a3Sopenharmony_ci		assert(partition_count == 4);
1360cc1dc7a3Sopenharmony_ci		float combined_best_error[21][13];
1361cc1dc7a3Sopenharmony_ci		uint8_t formats_of_choice[21][13][4];
1362cc1dc7a3Sopenharmony_ci
1363cc1dc7a3Sopenharmony_ci		four_partitions_find_best_combination_for_every_quantization_and_integer_count(
1364cc1dc7a3Sopenharmony_ci		    best_error, format_of_choice, combined_best_error, formats_of_choice);
1365cc1dc7a3Sopenharmony_ci
1366cc1dc7a3Sopenharmony_ci		assert(start_block_mode == 0);
1367cc1dc7a3Sopenharmony_ci		for (unsigned int i = 0; i < end_block_mode; i++)
1368cc1dc7a3Sopenharmony_ci		{
1369cc1dc7a3Sopenharmony_ci			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
1370cc1dc7a3Sopenharmony_ci			{
1371cc1dc7a3Sopenharmony_ci				errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
1372cc1dc7a3Sopenharmony_ci				continue;
1373cc1dc7a3Sopenharmony_ci			}
1374cc1dc7a3Sopenharmony_ci
1375cc1dc7a3Sopenharmony_ci			float error_of_best = four_partitions_find_best_combination_for_bitcount(
1376cc1dc7a3Sopenharmony_ci			    combined_best_error, formats_of_choice, qwt_bitcounts[i],
1377cc1dc7a3Sopenharmony_ci			    best_quant_levels[i], best_quant_levels_mod[i],
1378cc1dc7a3Sopenharmony_ci			    best_ep_formats[i]);
1379cc1dc7a3Sopenharmony_ci
1380cc1dc7a3Sopenharmony_ci			float total_error = error_of_best + qwt_errors[i];
1381cc1dc7a3Sopenharmony_ci			errors_of_best_combination[i] = total_error;
1382cc1dc7a3Sopenharmony_ci
1383cc1dc7a3Sopenharmony_ci			if (total_error < error_of_best_combination)
1384cc1dc7a3Sopenharmony_ci			{
1385cc1dc7a3Sopenharmony_ci				error_of_best_combination = total_error;
1386cc1dc7a3Sopenharmony_ci				index_of_best_combination = i;
1387cc1dc7a3Sopenharmony_ci			}
1388cc1dc7a3Sopenharmony_ci		}
1389cc1dc7a3Sopenharmony_ci	}
1390cc1dc7a3Sopenharmony_ci
1391cc1dc7a3Sopenharmony_ci	int best_error_weights[TUNE_MAX_TRIAL_CANDIDATES];
1392cc1dc7a3Sopenharmony_ci
1393cc1dc7a3Sopenharmony_ci	// Fast path the first result and avoid the list search for trial 0
1394cc1dc7a3Sopenharmony_ci	best_error_weights[0] = index_of_best_combination;
1395cc1dc7a3Sopenharmony_ci	if (index_of_best_combination >= 0)
1396cc1dc7a3Sopenharmony_ci	{
1397cc1dc7a3Sopenharmony_ci		errors_of_best_combination[index_of_best_combination] = ERROR_CALC_DEFAULT;
1398cc1dc7a3Sopenharmony_ci	}
1399cc1dc7a3Sopenharmony_ci
1400cc1dc7a3Sopenharmony_ci	// Search the remaining results and pick the best candidate modes for trial 1+
1401cc1dc7a3Sopenharmony_ci	for (unsigned int i = 1; i < tune_candidate_limit; i++)
1402cc1dc7a3Sopenharmony_ci	{
1403cc1dc7a3Sopenharmony_ci		vint vbest_error_index(-1);
1404cc1dc7a3Sopenharmony_ci		vfloat vbest_ep_error(ERROR_CALC_DEFAULT);
1405cc1dc7a3Sopenharmony_ci
1406cc1dc7a3Sopenharmony_ci		start_block_mode = round_down_to_simd_multiple_vla(start_block_mode);
1407cc1dc7a3Sopenharmony_ci		vint lane_ids = vint::lane_id() + vint(start_block_mode);
1408cc1dc7a3Sopenharmony_ci		for (unsigned int j = start_block_mode; j < end_block_mode; j += ASTCENC_SIMD_WIDTH)
1409cc1dc7a3Sopenharmony_ci		{
1410cc1dc7a3Sopenharmony_ci			vfloat err = vfloat(errors_of_best_combination + j);
1411cc1dc7a3Sopenharmony_ci			vmask mask = err < vbest_ep_error;
1412cc1dc7a3Sopenharmony_ci			vbest_ep_error = select(vbest_ep_error, err, mask);
1413cc1dc7a3Sopenharmony_ci			vbest_error_index = select(vbest_error_index, lane_ids, mask);
1414cc1dc7a3Sopenharmony_ci			lane_ids += vint(ASTCENC_SIMD_WIDTH);
1415cc1dc7a3Sopenharmony_ci		}
1416cc1dc7a3Sopenharmony_ci
1417cc1dc7a3Sopenharmony_ci		// Pick best mode from the SIMD result, using lowest matching index to ensure invariance
1418cc1dc7a3Sopenharmony_ci		vmask lanes_min_error = vbest_ep_error == hmin(vbest_ep_error);
1419cc1dc7a3Sopenharmony_ci		vbest_error_index = select(vint(0x7FFFFFFF), vbest_error_index, lanes_min_error);
1420cc1dc7a3Sopenharmony_ci		vbest_error_index = hmin(vbest_error_index);
1421cc1dc7a3Sopenharmony_ci		int best_error_index = vbest_error_index.lane<0>();
1422cc1dc7a3Sopenharmony_ci
1423cc1dc7a3Sopenharmony_ci		best_error_weights[i] = best_error_index;
1424cc1dc7a3Sopenharmony_ci
1425cc1dc7a3Sopenharmony_ci		// Max the error for this candidate so we don't pick it again
1426cc1dc7a3Sopenharmony_ci		if (best_error_index >= 0)
1427cc1dc7a3Sopenharmony_ci		{
1428cc1dc7a3Sopenharmony_ci			errors_of_best_combination[best_error_index] = ERROR_CALC_DEFAULT;
1429cc1dc7a3Sopenharmony_ci		}
1430cc1dc7a3Sopenharmony_ci		// Early-out if no more candidates are valid
1431cc1dc7a3Sopenharmony_ci		else
1432cc1dc7a3Sopenharmony_ci		{
1433cc1dc7a3Sopenharmony_ci			break;
1434cc1dc7a3Sopenharmony_ci		}
1435cc1dc7a3Sopenharmony_ci	}
1436cc1dc7a3Sopenharmony_ci
1437cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < tune_candidate_limit; i++)
1438cc1dc7a3Sopenharmony_ci	{
1439cc1dc7a3Sopenharmony_ci		if (best_error_weights[i] < 0)
1440cc1dc7a3Sopenharmony_ci		{
1441cc1dc7a3Sopenharmony_ci			return i;
1442cc1dc7a3Sopenharmony_ci		}
1443cc1dc7a3Sopenharmony_ci
1444cc1dc7a3Sopenharmony_ci		block_mode[i] = best_error_weights[i];
1445cc1dc7a3Sopenharmony_ci
1446cc1dc7a3Sopenharmony_ci		quant_level[i] = static_cast<quant_method>(best_quant_levels[best_error_weights[i]]);
1447cc1dc7a3Sopenharmony_ci		quant_level_mod[i] = static_cast<quant_method>(best_quant_levels_mod[best_error_weights[i]]);
1448cc1dc7a3Sopenharmony_ci
1449cc1dc7a3Sopenharmony_ci		assert(quant_level[i] >= QUANT_6 && quant_level[i] <= QUANT_256);
1450cc1dc7a3Sopenharmony_ci		assert(quant_level_mod[i] >= QUANT_6 && quant_level_mod[i] <= QUANT_256);
1451cc1dc7a3Sopenharmony_ci
1452cc1dc7a3Sopenharmony_ci		for (int j = 0; j < partition_count; j++)
1453cc1dc7a3Sopenharmony_ci		{
1454cc1dc7a3Sopenharmony_ci			partition_format_specifiers[i][j] = best_ep_formats[best_error_weights[i]][j];
1455cc1dc7a3Sopenharmony_ci		}
1456cc1dc7a3Sopenharmony_ci	}
1457cc1dc7a3Sopenharmony_ci
1458cc1dc7a3Sopenharmony_ci	return tune_candidate_limit;
1459cc1dc7a3Sopenharmony_ci}
1460cc1dc7a3Sopenharmony_ci
1461cc1dc7a3Sopenharmony_ci#endif
1462