1cc1dc7a3Sopenharmony_ci// SPDX-License-Identifier: Apache-2.0
2cc1dc7a3Sopenharmony_ci// ----------------------------------------------------------------------------
3cc1dc7a3Sopenharmony_ci// Copyright 2011-2023 Arm Limited
4cc1dc7a3Sopenharmony_ci//
5cc1dc7a3Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6cc1dc7a3Sopenharmony_ci// use this file except in compliance with the License. You may obtain a copy
7cc1dc7a3Sopenharmony_ci// of the License at:
8cc1dc7a3Sopenharmony_ci//
9cc1dc7a3Sopenharmony_ci//     http://www.apache.org/licenses/LICENSE-2.0
10cc1dc7a3Sopenharmony_ci//
11cc1dc7a3Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software
12cc1dc7a3Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13cc1dc7a3Sopenharmony_ci// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14cc1dc7a3Sopenharmony_ci// License for the specific language governing permissions and limitations
15cc1dc7a3Sopenharmony_ci// under the License.
16cc1dc7a3Sopenharmony_ci// ----------------------------------------------------------------------------
17cc1dc7a3Sopenharmony_ci
18cc1dc7a3Sopenharmony_ci/**
19cc1dc7a3Sopenharmony_ci * @brief Functions to generate block size descriptor and decimation tables.
20cc1dc7a3Sopenharmony_ci */
21cc1dc7a3Sopenharmony_ci
22cc1dc7a3Sopenharmony_ci#include "astcenc_internal.h"
23cc1dc7a3Sopenharmony_ci
24cc1dc7a3Sopenharmony_ci/**
25cc1dc7a3Sopenharmony_ci * @brief Decode the properties of an encoded 2D block mode.
26cc1dc7a3Sopenharmony_ci *
27cc1dc7a3Sopenharmony_ci * @param      block_mode      The encoded block mode.
28cc1dc7a3Sopenharmony_ci * @param[out] x_weights       The number of weights in the X dimension.
29cc1dc7a3Sopenharmony_ci * @param[out] y_weights       The number of weights in the Y dimension.
30cc1dc7a3Sopenharmony_ci * @param[out] is_dual_plane   True if this block mode has two weight planes.
31cc1dc7a3Sopenharmony_ci * @param[out] quant_mode      The quantization level for the weights.
32cc1dc7a3Sopenharmony_ci * @param[out] weight_bits     The storage bit count for the weights.
33cc1dc7a3Sopenharmony_ci *
34cc1dc7a3Sopenharmony_ci * @return Returns true if a valid mode, false otherwise.
35cc1dc7a3Sopenharmony_ci */
36cc1dc7a3Sopenharmony_cistatic bool decode_block_mode_2d(
37cc1dc7a3Sopenharmony_ci	unsigned int block_mode,
38cc1dc7a3Sopenharmony_ci	unsigned int& x_weights,
39cc1dc7a3Sopenharmony_ci	unsigned int& y_weights,
40cc1dc7a3Sopenharmony_ci	bool& is_dual_plane,
41cc1dc7a3Sopenharmony_ci	unsigned int& quant_mode,
42cc1dc7a3Sopenharmony_ci	unsigned int& weight_bits
43cc1dc7a3Sopenharmony_ci) {
44cc1dc7a3Sopenharmony_ci	unsigned int base_quant_mode = (block_mode >> 4) & 1;
45cc1dc7a3Sopenharmony_ci	unsigned int H = (block_mode >> 9) & 1;
46cc1dc7a3Sopenharmony_ci	unsigned int D = (block_mode >> 10) & 1;
47cc1dc7a3Sopenharmony_ci	unsigned int A = (block_mode >> 5) & 0x3;
48cc1dc7a3Sopenharmony_ci
49cc1dc7a3Sopenharmony_ci	x_weights = 0;
50cc1dc7a3Sopenharmony_ci	y_weights = 0;
51cc1dc7a3Sopenharmony_ci
52cc1dc7a3Sopenharmony_ci	if ((block_mode & 3) != 0)
53cc1dc7a3Sopenharmony_ci	{
54cc1dc7a3Sopenharmony_ci		base_quant_mode |= (block_mode & 3) << 1;
55cc1dc7a3Sopenharmony_ci		unsigned int B = (block_mode >> 7) & 3;
56cc1dc7a3Sopenharmony_ci		switch ((block_mode >> 2) & 3)
57cc1dc7a3Sopenharmony_ci		{
58cc1dc7a3Sopenharmony_ci		case 0:
59cc1dc7a3Sopenharmony_ci			x_weights = B + 4;
60cc1dc7a3Sopenharmony_ci			y_weights = A + 2;
61cc1dc7a3Sopenharmony_ci			break;
62cc1dc7a3Sopenharmony_ci		case 1:
63cc1dc7a3Sopenharmony_ci			x_weights = B + 8;
64cc1dc7a3Sopenharmony_ci			y_weights = A + 2;
65cc1dc7a3Sopenharmony_ci			break;
66cc1dc7a3Sopenharmony_ci		case 2:
67cc1dc7a3Sopenharmony_ci			x_weights = A + 2;
68cc1dc7a3Sopenharmony_ci			y_weights = B + 8;
69cc1dc7a3Sopenharmony_ci			break;
70cc1dc7a3Sopenharmony_ci		case 3:
71cc1dc7a3Sopenharmony_ci			B &= 1;
72cc1dc7a3Sopenharmony_ci			if (block_mode & 0x100)
73cc1dc7a3Sopenharmony_ci			{
74cc1dc7a3Sopenharmony_ci				x_weights = B + 2;
75cc1dc7a3Sopenharmony_ci				y_weights = A + 2;
76cc1dc7a3Sopenharmony_ci			}
77cc1dc7a3Sopenharmony_ci			else
78cc1dc7a3Sopenharmony_ci			{
79cc1dc7a3Sopenharmony_ci				x_weights = A + 2;
80cc1dc7a3Sopenharmony_ci				y_weights = B + 6;
81cc1dc7a3Sopenharmony_ci			}
82cc1dc7a3Sopenharmony_ci			break;
83cc1dc7a3Sopenharmony_ci		}
84cc1dc7a3Sopenharmony_ci	}
85cc1dc7a3Sopenharmony_ci	else
86cc1dc7a3Sopenharmony_ci	{
87cc1dc7a3Sopenharmony_ci		base_quant_mode |= ((block_mode >> 2) & 3) << 1;
88cc1dc7a3Sopenharmony_ci		if (((block_mode >> 2) & 3) == 0)
89cc1dc7a3Sopenharmony_ci		{
90cc1dc7a3Sopenharmony_ci			return false;
91cc1dc7a3Sopenharmony_ci		}
92cc1dc7a3Sopenharmony_ci
93cc1dc7a3Sopenharmony_ci		unsigned int B = (block_mode >> 9) & 3;
94cc1dc7a3Sopenharmony_ci		switch ((block_mode >> 7) & 3)
95cc1dc7a3Sopenharmony_ci		{
96cc1dc7a3Sopenharmony_ci		case 0:
97cc1dc7a3Sopenharmony_ci			x_weights = 12;
98cc1dc7a3Sopenharmony_ci			y_weights = A + 2;
99cc1dc7a3Sopenharmony_ci			break;
100cc1dc7a3Sopenharmony_ci		case 1:
101cc1dc7a3Sopenharmony_ci			x_weights = A + 2;
102cc1dc7a3Sopenharmony_ci			y_weights = 12;
103cc1dc7a3Sopenharmony_ci			break;
104cc1dc7a3Sopenharmony_ci		case 2:
105cc1dc7a3Sopenharmony_ci			x_weights = A + 6;
106cc1dc7a3Sopenharmony_ci			y_weights = B + 6;
107cc1dc7a3Sopenharmony_ci			D = 0;
108cc1dc7a3Sopenharmony_ci			H = 0;
109cc1dc7a3Sopenharmony_ci			break;
110cc1dc7a3Sopenharmony_ci		case 3:
111cc1dc7a3Sopenharmony_ci			switch ((block_mode >> 5) & 3)
112cc1dc7a3Sopenharmony_ci			{
113cc1dc7a3Sopenharmony_ci			case 0:
114cc1dc7a3Sopenharmony_ci				x_weights = 6;
115cc1dc7a3Sopenharmony_ci				y_weights = 10;
116cc1dc7a3Sopenharmony_ci				break;
117cc1dc7a3Sopenharmony_ci			case 1:
118cc1dc7a3Sopenharmony_ci				x_weights = 10;
119cc1dc7a3Sopenharmony_ci				y_weights = 6;
120cc1dc7a3Sopenharmony_ci				break;
121cc1dc7a3Sopenharmony_ci			case 2:
122cc1dc7a3Sopenharmony_ci			case 3:
123cc1dc7a3Sopenharmony_ci				return false;
124cc1dc7a3Sopenharmony_ci			}
125cc1dc7a3Sopenharmony_ci			break;
126cc1dc7a3Sopenharmony_ci		}
127cc1dc7a3Sopenharmony_ci	}
128cc1dc7a3Sopenharmony_ci
129cc1dc7a3Sopenharmony_ci	unsigned int weight_count = x_weights * y_weights * (D + 1);
130cc1dc7a3Sopenharmony_ci	quant_mode = (base_quant_mode - 2) + 6 * H;
131cc1dc7a3Sopenharmony_ci	is_dual_plane = D != 0;
132cc1dc7a3Sopenharmony_ci
133cc1dc7a3Sopenharmony_ci	weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
134cc1dc7a3Sopenharmony_ci	return (weight_count <= BLOCK_MAX_WEIGHTS &&
135cc1dc7a3Sopenharmony_ci	        weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
136cc1dc7a3Sopenharmony_ci	        weight_bits <= BLOCK_MAX_WEIGHT_BITS);
137cc1dc7a3Sopenharmony_ci}
138cc1dc7a3Sopenharmony_ci
139cc1dc7a3Sopenharmony_ci/**
140cc1dc7a3Sopenharmony_ci * @brief Decode the properties of an encoded 3D block mode.
141cc1dc7a3Sopenharmony_ci *
142cc1dc7a3Sopenharmony_ci * @param      block_mode      The encoded block mode.
143cc1dc7a3Sopenharmony_ci * @param[out] x_weights       The number of weights in the X dimension.
144cc1dc7a3Sopenharmony_ci * @param[out] y_weights       The number of weights in the Y dimension.
145cc1dc7a3Sopenharmony_ci * @param[out] z_weights       The number of weights in the Z dimension.
146cc1dc7a3Sopenharmony_ci * @param[out] is_dual_plane   True if this block mode has two weight planes.
147cc1dc7a3Sopenharmony_ci * @param[out] quant_mode      The quantization level for the weights.
148cc1dc7a3Sopenharmony_ci * @param[out] weight_bits     The storage bit count for the weights.
149cc1dc7a3Sopenharmony_ci *
150cc1dc7a3Sopenharmony_ci * @return Returns true if a valid mode, false otherwise.
151cc1dc7a3Sopenharmony_ci */
152cc1dc7a3Sopenharmony_cistatic bool decode_block_mode_3d(
153cc1dc7a3Sopenharmony_ci	unsigned int block_mode,
154cc1dc7a3Sopenharmony_ci	unsigned int& x_weights,
155cc1dc7a3Sopenharmony_ci	unsigned int& y_weights,
156cc1dc7a3Sopenharmony_ci	unsigned int& z_weights,
157cc1dc7a3Sopenharmony_ci	bool& is_dual_plane,
158cc1dc7a3Sopenharmony_ci	unsigned int& quant_mode,
159cc1dc7a3Sopenharmony_ci	unsigned int& weight_bits
160cc1dc7a3Sopenharmony_ci) {
161cc1dc7a3Sopenharmony_ci	unsigned int base_quant_mode = (block_mode >> 4) & 1;
162cc1dc7a3Sopenharmony_ci	unsigned int H = (block_mode >> 9) & 1;
163cc1dc7a3Sopenharmony_ci	unsigned int D = (block_mode >> 10) & 1;
164cc1dc7a3Sopenharmony_ci	unsigned int A = (block_mode >> 5) & 0x3;
165cc1dc7a3Sopenharmony_ci
166cc1dc7a3Sopenharmony_ci	x_weights = 0;
167cc1dc7a3Sopenharmony_ci	y_weights = 0;
168cc1dc7a3Sopenharmony_ci	z_weights = 0;
169cc1dc7a3Sopenharmony_ci
170cc1dc7a3Sopenharmony_ci	if ((block_mode & 3) != 0)
171cc1dc7a3Sopenharmony_ci	{
172cc1dc7a3Sopenharmony_ci		base_quant_mode |= (block_mode & 3) << 1;
173cc1dc7a3Sopenharmony_ci		unsigned int B = (block_mode >> 7) & 3;
174cc1dc7a3Sopenharmony_ci		unsigned int C = (block_mode >> 2) & 0x3;
175cc1dc7a3Sopenharmony_ci		x_weights = A + 2;
176cc1dc7a3Sopenharmony_ci		y_weights = B + 2;
177cc1dc7a3Sopenharmony_ci		z_weights = C + 2;
178cc1dc7a3Sopenharmony_ci	}
179cc1dc7a3Sopenharmony_ci	else
180cc1dc7a3Sopenharmony_ci	{
181cc1dc7a3Sopenharmony_ci		base_quant_mode |= ((block_mode >> 2) & 3) << 1;
182cc1dc7a3Sopenharmony_ci		if (((block_mode >> 2) & 3) == 0)
183cc1dc7a3Sopenharmony_ci		{
184cc1dc7a3Sopenharmony_ci			return false;
185cc1dc7a3Sopenharmony_ci		}
186cc1dc7a3Sopenharmony_ci
187cc1dc7a3Sopenharmony_ci		int B = (block_mode >> 9) & 3;
188cc1dc7a3Sopenharmony_ci		if (((block_mode >> 7) & 3) != 3)
189cc1dc7a3Sopenharmony_ci		{
190cc1dc7a3Sopenharmony_ci			D = 0;
191cc1dc7a3Sopenharmony_ci			H = 0;
192cc1dc7a3Sopenharmony_ci		}
193cc1dc7a3Sopenharmony_ci		switch ((block_mode >> 7) & 3)
194cc1dc7a3Sopenharmony_ci		{
195cc1dc7a3Sopenharmony_ci		case 0:
196cc1dc7a3Sopenharmony_ci			x_weights = 6;
197cc1dc7a3Sopenharmony_ci			y_weights = B + 2;
198cc1dc7a3Sopenharmony_ci			z_weights = A + 2;
199cc1dc7a3Sopenharmony_ci			break;
200cc1dc7a3Sopenharmony_ci		case 1:
201cc1dc7a3Sopenharmony_ci			x_weights = A + 2;
202cc1dc7a3Sopenharmony_ci			y_weights = 6;
203cc1dc7a3Sopenharmony_ci			z_weights = B + 2;
204cc1dc7a3Sopenharmony_ci			break;
205cc1dc7a3Sopenharmony_ci		case 2:
206cc1dc7a3Sopenharmony_ci			x_weights = A + 2;
207cc1dc7a3Sopenharmony_ci			y_weights = B + 2;
208cc1dc7a3Sopenharmony_ci			z_weights = 6;
209cc1dc7a3Sopenharmony_ci			break;
210cc1dc7a3Sopenharmony_ci		case 3:
211cc1dc7a3Sopenharmony_ci			x_weights = 2;
212cc1dc7a3Sopenharmony_ci			y_weights = 2;
213cc1dc7a3Sopenharmony_ci			z_weights = 2;
214cc1dc7a3Sopenharmony_ci			switch ((block_mode >> 5) & 3)
215cc1dc7a3Sopenharmony_ci			{
216cc1dc7a3Sopenharmony_ci			case 0:
217cc1dc7a3Sopenharmony_ci				x_weights = 6;
218cc1dc7a3Sopenharmony_ci				break;
219cc1dc7a3Sopenharmony_ci			case 1:
220cc1dc7a3Sopenharmony_ci				y_weights = 6;
221cc1dc7a3Sopenharmony_ci				break;
222cc1dc7a3Sopenharmony_ci			case 2:
223cc1dc7a3Sopenharmony_ci				z_weights = 6;
224cc1dc7a3Sopenharmony_ci				break;
225cc1dc7a3Sopenharmony_ci			case 3:
226cc1dc7a3Sopenharmony_ci				return false;
227cc1dc7a3Sopenharmony_ci			}
228cc1dc7a3Sopenharmony_ci			break;
229cc1dc7a3Sopenharmony_ci		}
230cc1dc7a3Sopenharmony_ci	}
231cc1dc7a3Sopenharmony_ci
232cc1dc7a3Sopenharmony_ci	unsigned int weight_count = x_weights * y_weights * z_weights * (D + 1);
233cc1dc7a3Sopenharmony_ci	quant_mode = (base_quant_mode - 2) + 6 * H;
234cc1dc7a3Sopenharmony_ci	is_dual_plane = D != 0;
235cc1dc7a3Sopenharmony_ci
236cc1dc7a3Sopenharmony_ci	weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
237cc1dc7a3Sopenharmony_ci	return (weight_count <= BLOCK_MAX_WEIGHTS &&
238cc1dc7a3Sopenharmony_ci	        weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
239cc1dc7a3Sopenharmony_ci	        weight_bits <= BLOCK_MAX_WEIGHT_BITS);
240cc1dc7a3Sopenharmony_ci}
241cc1dc7a3Sopenharmony_ci
242cc1dc7a3Sopenharmony_ci/**
243cc1dc7a3Sopenharmony_ci * @brief Create a 2D decimation entry for a block-size and weight-decimation pair.
244cc1dc7a3Sopenharmony_ci *
245cc1dc7a3Sopenharmony_ci * @param      x_texels    The number of texels in the X dimension.
246cc1dc7a3Sopenharmony_ci * @param      y_texels    The number of texels in the Y dimension.
247cc1dc7a3Sopenharmony_ci * @param      x_weights   The number of weights in the X dimension.
248cc1dc7a3Sopenharmony_ci * @param      y_weights   The number of weights in the Y dimension.
249cc1dc7a3Sopenharmony_ci * @param[out] di          The decimation info structure to populate.
250cc1dc7a3Sopenharmony_ci * @param[out] wb          The decimation table init scratch working buffers.
251cc1dc7a3Sopenharmony_ci */
252cc1dc7a3Sopenharmony_cistatic void init_decimation_info_2d(
253cc1dc7a3Sopenharmony_ci	unsigned int x_texels,
254cc1dc7a3Sopenharmony_ci	unsigned int y_texels,
255cc1dc7a3Sopenharmony_ci	unsigned int x_weights,
256cc1dc7a3Sopenharmony_ci	unsigned int y_weights,
257cc1dc7a3Sopenharmony_ci	decimation_info& di,
258cc1dc7a3Sopenharmony_ci	dt_init_working_buffers& wb
259cc1dc7a3Sopenharmony_ci) {
260cc1dc7a3Sopenharmony_ci	unsigned int texels_per_block = x_texels * y_texels;
261cc1dc7a3Sopenharmony_ci	unsigned int weights_per_block = x_weights * y_weights;
262cc1dc7a3Sopenharmony_ci
263cc1dc7a3Sopenharmony_ci	uint8_t max_texel_count_of_weight = 0;
264cc1dc7a3Sopenharmony_ci
265cc1dc7a3Sopenharmony_ci	promise(weights_per_block > 0);
266cc1dc7a3Sopenharmony_ci	promise(texels_per_block > 0);
267cc1dc7a3Sopenharmony_ci	promise(x_texels > 0);
268cc1dc7a3Sopenharmony_ci	promise(y_texels > 0);
269cc1dc7a3Sopenharmony_ci
270cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < weights_per_block; i++)
271cc1dc7a3Sopenharmony_ci	{
272cc1dc7a3Sopenharmony_ci		wb.texel_count_of_weight[i] = 0;
273cc1dc7a3Sopenharmony_ci	}
274cc1dc7a3Sopenharmony_ci
275cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < texels_per_block; i++)
276cc1dc7a3Sopenharmony_ci	{
277cc1dc7a3Sopenharmony_ci		wb.weight_count_of_texel[i] = 0;
278cc1dc7a3Sopenharmony_ci	}
279cc1dc7a3Sopenharmony_ci
280cc1dc7a3Sopenharmony_ci	for (unsigned int y = 0; y < y_texels; y++)
281cc1dc7a3Sopenharmony_ci	{
282cc1dc7a3Sopenharmony_ci		for (unsigned int x = 0; x < x_texels; x++)
283cc1dc7a3Sopenharmony_ci		{
284cc1dc7a3Sopenharmony_ci			unsigned int texel = y * x_texels + x;
285cc1dc7a3Sopenharmony_ci
286cc1dc7a3Sopenharmony_ci			unsigned int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
287cc1dc7a3Sopenharmony_ci			unsigned int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
288cc1dc7a3Sopenharmony_ci
289cc1dc7a3Sopenharmony_ci			unsigned int x_weight_frac = x_weight & 0xF;
290cc1dc7a3Sopenharmony_ci			unsigned int y_weight_frac = y_weight & 0xF;
291cc1dc7a3Sopenharmony_ci			unsigned int x_weight_int = x_weight >> 4;
292cc1dc7a3Sopenharmony_ci			unsigned int y_weight_int = y_weight >> 4;
293cc1dc7a3Sopenharmony_ci
294cc1dc7a3Sopenharmony_ci			unsigned int qweight[4];
295cc1dc7a3Sopenharmony_ci			qweight[0] = x_weight_int + y_weight_int * x_weights;
296cc1dc7a3Sopenharmony_ci			qweight[1] = qweight[0] + 1;
297cc1dc7a3Sopenharmony_ci			qweight[2] = qweight[0] + x_weights;
298cc1dc7a3Sopenharmony_ci			qweight[3] = qweight[2] + 1;
299cc1dc7a3Sopenharmony_ci
300cc1dc7a3Sopenharmony_ci			// Truncated-precision bilinear interpolation
301cc1dc7a3Sopenharmony_ci			unsigned int prod = x_weight_frac * y_weight_frac;
302cc1dc7a3Sopenharmony_ci
303cc1dc7a3Sopenharmony_ci			unsigned int weight[4];
304cc1dc7a3Sopenharmony_ci			weight[3] = (prod + 8) >> 4;
305cc1dc7a3Sopenharmony_ci			weight[1] = x_weight_frac - weight[3];
306cc1dc7a3Sopenharmony_ci			weight[2] = y_weight_frac - weight[3];
307cc1dc7a3Sopenharmony_ci			weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
308cc1dc7a3Sopenharmony_ci
309cc1dc7a3Sopenharmony_ci			for (unsigned int i = 0; i < 4; i++)
310cc1dc7a3Sopenharmony_ci			{
311cc1dc7a3Sopenharmony_ci				if (weight[i] != 0)
312cc1dc7a3Sopenharmony_ci				{
313cc1dc7a3Sopenharmony_ci					wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
314cc1dc7a3Sopenharmony_ci					wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
315cc1dc7a3Sopenharmony_ci					wb.weight_count_of_texel[texel]++;
316cc1dc7a3Sopenharmony_ci					wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
317cc1dc7a3Sopenharmony_ci					wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
318cc1dc7a3Sopenharmony_ci					wb.texel_count_of_weight[qweight[i]]++;
319cc1dc7a3Sopenharmony_ci					max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
320cc1dc7a3Sopenharmony_ci				}
321cc1dc7a3Sopenharmony_ci			}
322cc1dc7a3Sopenharmony_ci		}
323cc1dc7a3Sopenharmony_ci	}
324cc1dc7a3Sopenharmony_ci
325cc1dc7a3Sopenharmony_ci	uint8_t max_texel_weight_count = 0;
326cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < texels_per_block; i++)
327cc1dc7a3Sopenharmony_ci	{
328cc1dc7a3Sopenharmony_ci		di.texel_weight_count[i] = wb.weight_count_of_texel[i];
329cc1dc7a3Sopenharmony_ci		max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
330cc1dc7a3Sopenharmony_ci
331cc1dc7a3Sopenharmony_ci		for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
332cc1dc7a3Sopenharmony_ci		{
333cc1dc7a3Sopenharmony_ci			di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
334cc1dc7a3Sopenharmony_ci			di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
335cc1dc7a3Sopenharmony_ci			di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];
336cc1dc7a3Sopenharmony_ci		}
337cc1dc7a3Sopenharmony_ci
338cc1dc7a3Sopenharmony_ci		// Init all 4 entries so we can rely on zeros for vectorization
339cc1dc7a3Sopenharmony_ci		for (unsigned int j = wb.weight_count_of_texel[i]; j < 4; j++)
340cc1dc7a3Sopenharmony_ci		{
341cc1dc7a3Sopenharmony_ci			di.texel_weight_contribs_int_tr[j][i] = 0;
342cc1dc7a3Sopenharmony_ci			di.texel_weight_contribs_float_tr[j][i] = 0.0f;
343cc1dc7a3Sopenharmony_ci			di.texel_weights_tr[j][i] = 0;
344cc1dc7a3Sopenharmony_ci		}
345cc1dc7a3Sopenharmony_ci	}
346cc1dc7a3Sopenharmony_ci
347cc1dc7a3Sopenharmony_ci	di.max_texel_weight_count = max_texel_weight_count;
348cc1dc7a3Sopenharmony_ci
349cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < weights_per_block; i++)
350cc1dc7a3Sopenharmony_ci	{
351cc1dc7a3Sopenharmony_ci		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
352cc1dc7a3Sopenharmony_ci		di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
353cc1dc7a3Sopenharmony_ci
354cc1dc7a3Sopenharmony_ci		for (unsigned int j = 0; j < texel_count_wt; j++)
355cc1dc7a3Sopenharmony_ci		{
356cc1dc7a3Sopenharmony_ci			uint8_t texel = wb.texels_of_weight[i][j];
357cc1dc7a3Sopenharmony_ci
358cc1dc7a3Sopenharmony_ci			// Create transposed versions of these for better vectorization
359cc1dc7a3Sopenharmony_ci			di.weight_texels_tr[j][i] = texel;
360cc1dc7a3Sopenharmony_ci			di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
361cc1dc7a3Sopenharmony_ci
362cc1dc7a3Sopenharmony_ci			// Store the per-texel contribution of this weight for each texel it contributes to
363cc1dc7a3Sopenharmony_ci			di.texel_contrib_for_weight[j][i] = 0.0f;
364cc1dc7a3Sopenharmony_ci			for (unsigned int k = 0; k < 4; k++)
365cc1dc7a3Sopenharmony_ci			{
366cc1dc7a3Sopenharmony_ci				uint8_t dttw = di.texel_weights_tr[k][texel];
367cc1dc7a3Sopenharmony_ci				float dttwf = di.texel_weight_contribs_float_tr[k][texel];
368cc1dc7a3Sopenharmony_ci				if (dttw == i && dttwf != 0.0f)
369cc1dc7a3Sopenharmony_ci				{
370cc1dc7a3Sopenharmony_ci					di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];
371cc1dc7a3Sopenharmony_ci					break;
372cc1dc7a3Sopenharmony_ci				}
373cc1dc7a3Sopenharmony_ci			}
374cc1dc7a3Sopenharmony_ci		}
375cc1dc7a3Sopenharmony_ci
376cc1dc7a3Sopenharmony_ci		// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
377cc1dc7a3Sopenharmony_ci		// Match last texel in active lane in SIMD group, for better gathers
378cc1dc7a3Sopenharmony_ci		uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
379cc1dc7a3Sopenharmony_ci		for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
380cc1dc7a3Sopenharmony_ci		{
381cc1dc7a3Sopenharmony_ci			di.weight_texels_tr[j][i] = last_texel;
382cc1dc7a3Sopenharmony_ci			di.weights_texel_contribs_tr[j][i] = 0.0f;
383cc1dc7a3Sopenharmony_ci		}
384cc1dc7a3Sopenharmony_ci	}
385cc1dc7a3Sopenharmony_ci
386cc1dc7a3Sopenharmony_ci	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
387cc1dc7a3Sopenharmony_ci	unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
388cc1dc7a3Sopenharmony_ci	for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++)
389cc1dc7a3Sopenharmony_ci	{
390cc1dc7a3Sopenharmony_ci		di.texel_weight_count[i] = 0;
391cc1dc7a3Sopenharmony_ci
392cc1dc7a3Sopenharmony_ci		for (unsigned int j = 0; j < 4; j++)
393cc1dc7a3Sopenharmony_ci		{
394cc1dc7a3Sopenharmony_ci			di.texel_weight_contribs_float_tr[j][i] = 0;
395cc1dc7a3Sopenharmony_ci			di.texel_weights_tr[j][i] = 0;
396cc1dc7a3Sopenharmony_ci			di.texel_weight_contribs_int_tr[j][i] = 0;
397cc1dc7a3Sopenharmony_ci		}
398cc1dc7a3Sopenharmony_ci	}
399cc1dc7a3Sopenharmony_ci
400cc1dc7a3Sopenharmony_ci	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
401cc1dc7a3Sopenharmony_ci	// Match last texel in active lane in SIMD group, for better gathers
402cc1dc7a3Sopenharmony_ci	unsigned int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
403cc1dc7a3Sopenharmony_ci	uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
404cc1dc7a3Sopenharmony_ci
405cc1dc7a3Sopenharmony_ci	unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
406cc1dc7a3Sopenharmony_ci	for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++)
407cc1dc7a3Sopenharmony_ci	{
408cc1dc7a3Sopenharmony_ci		di.weight_texel_count[i] = 0;
409cc1dc7a3Sopenharmony_ci
410cc1dc7a3Sopenharmony_ci		for (unsigned int j = 0; j < max_texel_count_of_weight; j++)
411cc1dc7a3Sopenharmony_ci		{
412cc1dc7a3Sopenharmony_ci			di.weight_texels_tr[j][i] = last_texel;
413cc1dc7a3Sopenharmony_ci			di.weights_texel_contribs_tr[j][i] = 0.0f;
414cc1dc7a3Sopenharmony_ci		}
415cc1dc7a3Sopenharmony_ci	}
416cc1dc7a3Sopenharmony_ci
417cc1dc7a3Sopenharmony_ci	di.texel_count = static_cast<uint8_t>(texels_per_block);
418cc1dc7a3Sopenharmony_ci	di.weight_count = static_cast<uint8_t>(weights_per_block);
419cc1dc7a3Sopenharmony_ci	di.weight_x = static_cast<uint8_t>(x_weights);
420cc1dc7a3Sopenharmony_ci	di.weight_y = static_cast<uint8_t>(y_weights);
421cc1dc7a3Sopenharmony_ci	di.weight_z = 1;
422cc1dc7a3Sopenharmony_ci}
423cc1dc7a3Sopenharmony_ci
424cc1dc7a3Sopenharmony_ci/**
425cc1dc7a3Sopenharmony_ci * @brief Create a 3D decimation entry for a block-size and weight-decimation pair.
426cc1dc7a3Sopenharmony_ci *
427cc1dc7a3Sopenharmony_ci * @param      x_texels    The number of texels in the X dimension.
428cc1dc7a3Sopenharmony_ci * @param      y_texels    The number of texels in the Y dimension.
429cc1dc7a3Sopenharmony_ci * @param      z_texels    The number of texels in the Z dimension.
430cc1dc7a3Sopenharmony_ci * @param      x_weights   The number of weights in the X dimension.
431cc1dc7a3Sopenharmony_ci * @param      y_weights   The number of weights in the Y dimension.
432cc1dc7a3Sopenharmony_ci * @param      z_weights   The number of weights in the Z dimension.
433cc1dc7a3Sopenharmony_ci * @param[out] di          The decimation info structure to populate.
434cc1dc7a3Sopenharmony_ci   @param[out] wb          The decimation table init scratch working buffers.
435cc1dc7a3Sopenharmony_ci */
436cc1dc7a3Sopenharmony_cistatic void init_decimation_info_3d(
437cc1dc7a3Sopenharmony_ci	unsigned int x_texels,
438cc1dc7a3Sopenharmony_ci	unsigned int y_texels,
439cc1dc7a3Sopenharmony_ci	unsigned int z_texels,
440cc1dc7a3Sopenharmony_ci	unsigned int x_weights,
441cc1dc7a3Sopenharmony_ci	unsigned int y_weights,
442cc1dc7a3Sopenharmony_ci	unsigned int z_weights,
443cc1dc7a3Sopenharmony_ci	decimation_info& di,
444cc1dc7a3Sopenharmony_ci	dt_init_working_buffers& wb
445cc1dc7a3Sopenharmony_ci) {
446cc1dc7a3Sopenharmony_ci	unsigned int texels_per_block = x_texels * y_texels * z_texels;
447cc1dc7a3Sopenharmony_ci	unsigned int weights_per_block = x_weights * y_weights * z_weights;
448cc1dc7a3Sopenharmony_ci
449cc1dc7a3Sopenharmony_ci	uint8_t max_texel_count_of_weight = 0;
450cc1dc7a3Sopenharmony_ci
451cc1dc7a3Sopenharmony_ci	promise(weights_per_block > 0);
452cc1dc7a3Sopenharmony_ci	promise(texels_per_block > 0);
453cc1dc7a3Sopenharmony_ci
454cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < weights_per_block; i++)
455cc1dc7a3Sopenharmony_ci	{
456cc1dc7a3Sopenharmony_ci		wb.texel_count_of_weight[i] = 0;
457cc1dc7a3Sopenharmony_ci	}
458cc1dc7a3Sopenharmony_ci
459cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < texels_per_block; i++)
460cc1dc7a3Sopenharmony_ci	{
461cc1dc7a3Sopenharmony_ci		wb.weight_count_of_texel[i] = 0;
462cc1dc7a3Sopenharmony_ci	}
463cc1dc7a3Sopenharmony_ci
464cc1dc7a3Sopenharmony_ci	for (unsigned int z = 0; z < z_texels; z++)
465cc1dc7a3Sopenharmony_ci	{
466cc1dc7a3Sopenharmony_ci		for (unsigned int y = 0; y < y_texels; y++)
467cc1dc7a3Sopenharmony_ci		{
468cc1dc7a3Sopenharmony_ci			for (unsigned int x = 0; x < x_texels; x++)
469cc1dc7a3Sopenharmony_ci			{
470cc1dc7a3Sopenharmony_ci				int texel = (z * y_texels + y) * x_texels + x;
471cc1dc7a3Sopenharmony_ci
472cc1dc7a3Sopenharmony_ci				int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
473cc1dc7a3Sopenharmony_ci				int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
474cc1dc7a3Sopenharmony_ci				int z_weight = (((1024 + z_texels / 2) / (z_texels - 1)) * z * (z_weights - 1) + 32) >> 6;
475cc1dc7a3Sopenharmony_ci
476cc1dc7a3Sopenharmony_ci				int x_weight_frac = x_weight & 0xF;
477cc1dc7a3Sopenharmony_ci				int y_weight_frac = y_weight & 0xF;
478cc1dc7a3Sopenharmony_ci				int z_weight_frac = z_weight & 0xF;
479cc1dc7a3Sopenharmony_ci				int x_weight_int = x_weight >> 4;
480cc1dc7a3Sopenharmony_ci				int y_weight_int = y_weight >> 4;
481cc1dc7a3Sopenharmony_ci				int z_weight_int = z_weight >> 4;
482cc1dc7a3Sopenharmony_ci				int qweight[4];
483cc1dc7a3Sopenharmony_ci				int weight[4];
484cc1dc7a3Sopenharmony_ci				qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
485cc1dc7a3Sopenharmony_ci				qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
486cc1dc7a3Sopenharmony_ci
487cc1dc7a3Sopenharmony_ci				// simplex interpolation
488cc1dc7a3Sopenharmony_ci				int fs = x_weight_frac;
489cc1dc7a3Sopenharmony_ci				int ft = y_weight_frac;
490cc1dc7a3Sopenharmony_ci				int fp = z_weight_frac;
491cc1dc7a3Sopenharmony_ci
492cc1dc7a3Sopenharmony_ci				int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
493cc1dc7a3Sopenharmony_ci				int N = x_weights;
494cc1dc7a3Sopenharmony_ci				int NM = x_weights * y_weights;
495cc1dc7a3Sopenharmony_ci
496cc1dc7a3Sopenharmony_ci				int s1, s2, w0, w1, w2, w3;
497cc1dc7a3Sopenharmony_ci				switch (cas)
498cc1dc7a3Sopenharmony_ci				{
499cc1dc7a3Sopenharmony_ci				case 7:
500cc1dc7a3Sopenharmony_ci					s1 = 1;
501cc1dc7a3Sopenharmony_ci					s2 = N;
502cc1dc7a3Sopenharmony_ci					w0 = 16 - fs;
503cc1dc7a3Sopenharmony_ci					w1 = fs - ft;
504cc1dc7a3Sopenharmony_ci					w2 = ft - fp;
505cc1dc7a3Sopenharmony_ci					w3 = fp;
506cc1dc7a3Sopenharmony_ci					break;
507cc1dc7a3Sopenharmony_ci				case 3:
508cc1dc7a3Sopenharmony_ci					s1 = N;
509cc1dc7a3Sopenharmony_ci					s2 = 1;
510cc1dc7a3Sopenharmony_ci					w0 = 16 - ft;
511cc1dc7a3Sopenharmony_ci					w1 = ft - fs;
512cc1dc7a3Sopenharmony_ci					w2 = fs - fp;
513cc1dc7a3Sopenharmony_ci					w3 = fp;
514cc1dc7a3Sopenharmony_ci					break;
515cc1dc7a3Sopenharmony_ci				case 5:
516cc1dc7a3Sopenharmony_ci					s1 = 1;
517cc1dc7a3Sopenharmony_ci					s2 = NM;
518cc1dc7a3Sopenharmony_ci					w0 = 16 - fs;
519cc1dc7a3Sopenharmony_ci					w1 = fs - fp;
520cc1dc7a3Sopenharmony_ci					w2 = fp - ft;
521cc1dc7a3Sopenharmony_ci					w3 = ft;
522cc1dc7a3Sopenharmony_ci					break;
523cc1dc7a3Sopenharmony_ci				case 4:
524cc1dc7a3Sopenharmony_ci					s1 = NM;
525cc1dc7a3Sopenharmony_ci					s2 = 1;
526cc1dc7a3Sopenharmony_ci					w0 = 16 - fp;
527cc1dc7a3Sopenharmony_ci					w1 = fp - fs;
528cc1dc7a3Sopenharmony_ci					w2 = fs - ft;
529cc1dc7a3Sopenharmony_ci					w3 = ft;
530cc1dc7a3Sopenharmony_ci					break;
531cc1dc7a3Sopenharmony_ci				case 2:
532cc1dc7a3Sopenharmony_ci					s1 = N;
533cc1dc7a3Sopenharmony_ci					s2 = NM;
534cc1dc7a3Sopenharmony_ci					w0 = 16 - ft;
535cc1dc7a3Sopenharmony_ci					w1 = ft - fp;
536cc1dc7a3Sopenharmony_ci					w2 = fp - fs;
537cc1dc7a3Sopenharmony_ci					w3 = fs;
538cc1dc7a3Sopenharmony_ci					break;
539cc1dc7a3Sopenharmony_ci				case 0:
540cc1dc7a3Sopenharmony_ci					s1 = NM;
541cc1dc7a3Sopenharmony_ci					s2 = N;
542cc1dc7a3Sopenharmony_ci					w0 = 16 - fp;
543cc1dc7a3Sopenharmony_ci					w1 = fp - ft;
544cc1dc7a3Sopenharmony_ci					w2 = ft - fs;
545cc1dc7a3Sopenharmony_ci					w3 = fs;
546cc1dc7a3Sopenharmony_ci					break;
547cc1dc7a3Sopenharmony_ci				default:
548cc1dc7a3Sopenharmony_ci					s1 = NM;
549cc1dc7a3Sopenharmony_ci					s2 = N;
550cc1dc7a3Sopenharmony_ci					w0 = 16 - fp;
551cc1dc7a3Sopenharmony_ci					w1 = fp - ft;
552cc1dc7a3Sopenharmony_ci					w2 = ft - fs;
553cc1dc7a3Sopenharmony_ci					w3 = fs;
554cc1dc7a3Sopenharmony_ci					break;
555cc1dc7a3Sopenharmony_ci				}
556cc1dc7a3Sopenharmony_ci
557cc1dc7a3Sopenharmony_ci				qweight[1] = qweight[0] + s1;
558cc1dc7a3Sopenharmony_ci				qweight[2] = qweight[1] + s2;
559cc1dc7a3Sopenharmony_ci				weight[0] = w0;
560cc1dc7a3Sopenharmony_ci				weight[1] = w1;
561cc1dc7a3Sopenharmony_ci				weight[2] = w2;
562cc1dc7a3Sopenharmony_ci				weight[3] = w3;
563cc1dc7a3Sopenharmony_ci
564cc1dc7a3Sopenharmony_ci				for (unsigned int i = 0; i < 4; i++)
565cc1dc7a3Sopenharmony_ci				{
566cc1dc7a3Sopenharmony_ci					if (weight[i] != 0)
567cc1dc7a3Sopenharmony_ci					{
568cc1dc7a3Sopenharmony_ci						wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
569cc1dc7a3Sopenharmony_ci						wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
570cc1dc7a3Sopenharmony_ci						wb.weight_count_of_texel[texel]++;
571cc1dc7a3Sopenharmony_ci						wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
572cc1dc7a3Sopenharmony_ci						wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
573cc1dc7a3Sopenharmony_ci						wb.texel_count_of_weight[qweight[i]]++;
574cc1dc7a3Sopenharmony_ci						max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
575cc1dc7a3Sopenharmony_ci					}
576cc1dc7a3Sopenharmony_ci				}
577cc1dc7a3Sopenharmony_ci			}
578cc1dc7a3Sopenharmony_ci		}
579cc1dc7a3Sopenharmony_ci	}
580cc1dc7a3Sopenharmony_ci
581cc1dc7a3Sopenharmony_ci	uint8_t max_texel_weight_count = 0;
582cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < texels_per_block; i++)
583cc1dc7a3Sopenharmony_ci	{
584cc1dc7a3Sopenharmony_ci		di.texel_weight_count[i] = wb.weight_count_of_texel[i];
585cc1dc7a3Sopenharmony_ci		max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
586cc1dc7a3Sopenharmony_ci
587cc1dc7a3Sopenharmony_ci		// Init all 4 entries so we can rely on zeros for vectorization
588cc1dc7a3Sopenharmony_ci		for (unsigned int j = 0; j < 4; j++)
589cc1dc7a3Sopenharmony_ci		{
590cc1dc7a3Sopenharmony_ci			di.texel_weight_contribs_int_tr[j][i] = 0;
591cc1dc7a3Sopenharmony_ci			di.texel_weight_contribs_float_tr[j][i] = 0.0f;
592cc1dc7a3Sopenharmony_ci			di.texel_weights_tr[j][i] = 0;
593cc1dc7a3Sopenharmony_ci		}
594cc1dc7a3Sopenharmony_ci
595cc1dc7a3Sopenharmony_ci		for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
596cc1dc7a3Sopenharmony_ci		{
597cc1dc7a3Sopenharmony_ci			di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
598cc1dc7a3Sopenharmony_ci			di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
599cc1dc7a3Sopenharmony_ci			di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];
600cc1dc7a3Sopenharmony_ci		}
601cc1dc7a3Sopenharmony_ci	}
602cc1dc7a3Sopenharmony_ci
603cc1dc7a3Sopenharmony_ci	di.max_texel_weight_count = max_texel_weight_count;
604cc1dc7a3Sopenharmony_ci
605cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < weights_per_block; i++)
606cc1dc7a3Sopenharmony_ci	{
607cc1dc7a3Sopenharmony_ci		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
608cc1dc7a3Sopenharmony_ci		di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
609cc1dc7a3Sopenharmony_ci
610cc1dc7a3Sopenharmony_ci		for (unsigned int j = 0; j < texel_count_wt; j++)
611cc1dc7a3Sopenharmony_ci		{
612cc1dc7a3Sopenharmony_ci			unsigned int texel = wb.texels_of_weight[i][j];
613cc1dc7a3Sopenharmony_ci
614cc1dc7a3Sopenharmony_ci			// Create transposed versions of these for better vectorization
615cc1dc7a3Sopenharmony_ci			di.weight_texels_tr[j][i] = static_cast<uint8_t>(texel);
616cc1dc7a3Sopenharmony_ci			di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
617cc1dc7a3Sopenharmony_ci
618cc1dc7a3Sopenharmony_ci			// Store the per-texel contribution of this weight for each texel it contributes to
619cc1dc7a3Sopenharmony_ci			di.texel_contrib_for_weight[j][i] = 0.0f;
620cc1dc7a3Sopenharmony_ci			for (unsigned int k = 0; k < 4; k++)
621cc1dc7a3Sopenharmony_ci			{
622cc1dc7a3Sopenharmony_ci				uint8_t dttw = di.texel_weights_tr[k][texel];
623cc1dc7a3Sopenharmony_ci				float dttwf = di.texel_weight_contribs_float_tr[k][texel];
624cc1dc7a3Sopenharmony_ci				if (dttw == i && dttwf != 0.0f)
625cc1dc7a3Sopenharmony_ci				{
626cc1dc7a3Sopenharmony_ci					di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];
627cc1dc7a3Sopenharmony_ci					break;
628cc1dc7a3Sopenharmony_ci				}
629cc1dc7a3Sopenharmony_ci			}
630cc1dc7a3Sopenharmony_ci		}
631cc1dc7a3Sopenharmony_ci
632cc1dc7a3Sopenharmony_ci		// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
633cc1dc7a3Sopenharmony_ci		// Match last texel in active lane in SIMD group, for better gathers
634cc1dc7a3Sopenharmony_ci		uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
635cc1dc7a3Sopenharmony_ci		for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
636cc1dc7a3Sopenharmony_ci		{
637cc1dc7a3Sopenharmony_ci			di.weight_texels_tr[j][i] = last_texel;
638cc1dc7a3Sopenharmony_ci			di.weights_texel_contribs_tr[j][i] = 0.0f;
639cc1dc7a3Sopenharmony_ci		}
640cc1dc7a3Sopenharmony_ci	}
641cc1dc7a3Sopenharmony_ci
642cc1dc7a3Sopenharmony_ci	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
643cc1dc7a3Sopenharmony_ci	unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
644cc1dc7a3Sopenharmony_ci	for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++)
645cc1dc7a3Sopenharmony_ci	{
646cc1dc7a3Sopenharmony_ci		di.texel_weight_count[i] = 0;
647cc1dc7a3Sopenharmony_ci
648cc1dc7a3Sopenharmony_ci		for (unsigned int j = 0; j < 4; j++)
649cc1dc7a3Sopenharmony_ci		{
650cc1dc7a3Sopenharmony_ci			di.texel_weight_contribs_float_tr[j][i] = 0;
651cc1dc7a3Sopenharmony_ci			di.texel_weights_tr[j][i] = 0;
652cc1dc7a3Sopenharmony_ci			di.texel_weight_contribs_int_tr[j][i] = 0;
653cc1dc7a3Sopenharmony_ci		}
654cc1dc7a3Sopenharmony_ci	}
655cc1dc7a3Sopenharmony_ci
656cc1dc7a3Sopenharmony_ci	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
657cc1dc7a3Sopenharmony_ci	// Match last texel in active lane in SIMD group, for better gathers
658cc1dc7a3Sopenharmony_ci	int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
659cc1dc7a3Sopenharmony_ci	uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
660cc1dc7a3Sopenharmony_ci
661cc1dc7a3Sopenharmony_ci	unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
662cc1dc7a3Sopenharmony_ci	for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++)
663cc1dc7a3Sopenharmony_ci	{
664cc1dc7a3Sopenharmony_ci		di.weight_texel_count[i] = 0;
665cc1dc7a3Sopenharmony_ci
666cc1dc7a3Sopenharmony_ci		for (int j = 0; j < max_texel_count_of_weight; j++)
667cc1dc7a3Sopenharmony_ci		{
668cc1dc7a3Sopenharmony_ci			di.weight_texels_tr[j][i] = last_texel;
669cc1dc7a3Sopenharmony_ci			di.weights_texel_contribs_tr[j][i] = 0.0f;
670cc1dc7a3Sopenharmony_ci		}
671cc1dc7a3Sopenharmony_ci	}
672cc1dc7a3Sopenharmony_ci
673cc1dc7a3Sopenharmony_ci	di.texel_count = static_cast<uint8_t>(texels_per_block);
674cc1dc7a3Sopenharmony_ci	di.weight_count = static_cast<uint8_t>(weights_per_block);
675cc1dc7a3Sopenharmony_ci	di.weight_x = static_cast<uint8_t>(x_weights);
676cc1dc7a3Sopenharmony_ci	di.weight_y = static_cast<uint8_t>(y_weights);
677cc1dc7a3Sopenharmony_ci	di.weight_z = static_cast<uint8_t>(z_weights);
678cc1dc7a3Sopenharmony_ci}
679cc1dc7a3Sopenharmony_ci
680cc1dc7a3Sopenharmony_ci/**
681cc1dc7a3Sopenharmony_ci * @brief Assign the texels to use for kmeans clustering.
682cc1dc7a3Sopenharmony_ci *
683cc1dc7a3Sopenharmony_ci * The max limit is @c BLOCK_MAX_KMEANS_TEXELS; above this a random selection is used.
684cc1dc7a3Sopenharmony_ci * The @c bsd.texel_count is an input and must be populated beforehand.
685cc1dc7a3Sopenharmony_ci *
686cc1dc7a3Sopenharmony_ci * @param[in,out] bsd   The block size descriptor to populate.
687cc1dc7a3Sopenharmony_ci */
688cc1dc7a3Sopenharmony_cistatic void assign_kmeans_texels(
689cc1dc7a3Sopenharmony_ci	block_size_descriptor& bsd
690cc1dc7a3Sopenharmony_ci) {
691cc1dc7a3Sopenharmony_ci	// Use all texels for kmeans on a small block
692cc1dc7a3Sopenharmony_ci	if (bsd.texel_count <= BLOCK_MAX_KMEANS_TEXELS)
693cc1dc7a3Sopenharmony_ci	{
694cc1dc7a3Sopenharmony_ci		for (uint8_t i = 0; i < bsd.texel_count; i++)
695cc1dc7a3Sopenharmony_ci		{
696cc1dc7a3Sopenharmony_ci			bsd.kmeans_texels[i] = i;
697cc1dc7a3Sopenharmony_ci		}
698cc1dc7a3Sopenharmony_ci
699cc1dc7a3Sopenharmony_ci		return;
700cc1dc7a3Sopenharmony_ci	}
701cc1dc7a3Sopenharmony_ci
702cc1dc7a3Sopenharmony_ci	// Select a random subset of BLOCK_MAX_KMEANS_TEXELS for kmeans on a large block
703cc1dc7a3Sopenharmony_ci	uint64_t rng_state[2];
704cc1dc7a3Sopenharmony_ci	astc::rand_init(rng_state);
705cc1dc7a3Sopenharmony_ci
706cc1dc7a3Sopenharmony_ci	// Initialize array used for tracking used indices
707cc1dc7a3Sopenharmony_ci	bool seen[BLOCK_MAX_TEXELS];
708cc1dc7a3Sopenharmony_ci	for (uint8_t i = 0; i < bsd.texel_count; i++)
709cc1dc7a3Sopenharmony_ci	{
710cc1dc7a3Sopenharmony_ci		seen[i] = false;
711cc1dc7a3Sopenharmony_ci	}
712cc1dc7a3Sopenharmony_ci
713cc1dc7a3Sopenharmony_ci	// Assign 64 random indices, retrying if we see repeats
714cc1dc7a3Sopenharmony_ci	unsigned int arr_elements_set = 0;
715cc1dc7a3Sopenharmony_ci	while (arr_elements_set < BLOCK_MAX_KMEANS_TEXELS)
716cc1dc7a3Sopenharmony_ci	{
717cc1dc7a3Sopenharmony_ci		uint8_t texel = static_cast<uint8_t>(astc::rand(rng_state));
718cc1dc7a3Sopenharmony_ci		texel = texel % bsd.texel_count;
719cc1dc7a3Sopenharmony_ci		if (!seen[texel])
720cc1dc7a3Sopenharmony_ci		{
721cc1dc7a3Sopenharmony_ci			bsd.kmeans_texels[arr_elements_set++] = texel;
722cc1dc7a3Sopenharmony_ci			seen[texel] = true;
723cc1dc7a3Sopenharmony_ci		}
724cc1dc7a3Sopenharmony_ci	}
725cc1dc7a3Sopenharmony_ci}
726cc1dc7a3Sopenharmony_ci
727cc1dc7a3Sopenharmony_ci/**
728cc1dc7a3Sopenharmony_ci * @brief Allocate a single 2D decimation table entry.
729cc1dc7a3Sopenharmony_ci *
730cc1dc7a3Sopenharmony_ci * @param x_texels    The number of texels in the X dimension.
731cc1dc7a3Sopenharmony_ci * @param y_texels    The number of texels in the Y dimension.
732cc1dc7a3Sopenharmony_ci * @param x_weights   The number of weights in the X dimension.
733cc1dc7a3Sopenharmony_ci * @param y_weights   The number of weights in the Y dimension.
734cc1dc7a3Sopenharmony_ci * @param bsd         The block size descriptor we are populating.
735cc1dc7a3Sopenharmony_ci * @param wb          The decimation table init scratch working buffers.
736cc1dc7a3Sopenharmony_ci * @param index       The packed array index to populate.
737cc1dc7a3Sopenharmony_ci */
738cc1dc7a3Sopenharmony_cistatic void construct_dt_entry_2d(
739cc1dc7a3Sopenharmony_ci	unsigned int x_texels,
740cc1dc7a3Sopenharmony_ci	unsigned int y_texels,
741cc1dc7a3Sopenharmony_ci	unsigned int x_weights,
742cc1dc7a3Sopenharmony_ci	unsigned int y_weights,
743cc1dc7a3Sopenharmony_ci	block_size_descriptor& bsd,
744cc1dc7a3Sopenharmony_ci	dt_init_working_buffers& wb,
745cc1dc7a3Sopenharmony_ci	unsigned int index
746cc1dc7a3Sopenharmony_ci) {
747cc1dc7a3Sopenharmony_ci	unsigned int weight_count = x_weights * y_weights;
748cc1dc7a3Sopenharmony_ci	assert(weight_count <= BLOCK_MAX_WEIGHTS);
749cc1dc7a3Sopenharmony_ci
750cc1dc7a3Sopenharmony_ci	bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS;
751cc1dc7a3Sopenharmony_ci
752cc1dc7a3Sopenharmony_ci	decimation_info& di = bsd.decimation_tables[index];
753cc1dc7a3Sopenharmony_ci	init_decimation_info_2d(x_texels, y_texels, x_weights, y_weights, di, wb);
754cc1dc7a3Sopenharmony_ci
755cc1dc7a3Sopenharmony_ci	int maxprec_1plane = -1;
756cc1dc7a3Sopenharmony_ci	int maxprec_2planes = -1;
757cc1dc7a3Sopenharmony_ci	for (int i = 0; i < 12; i++)
758cc1dc7a3Sopenharmony_ci	{
759cc1dc7a3Sopenharmony_ci		unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
760cc1dc7a3Sopenharmony_ci		if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
761cc1dc7a3Sopenharmony_ci		{
762cc1dc7a3Sopenharmony_ci			maxprec_1plane = i;
763cc1dc7a3Sopenharmony_ci		}
764cc1dc7a3Sopenharmony_ci
765cc1dc7a3Sopenharmony_ci		if (try_2planes)
766cc1dc7a3Sopenharmony_ci		{
767cc1dc7a3Sopenharmony_ci			unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
768cc1dc7a3Sopenharmony_ci			if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
769cc1dc7a3Sopenharmony_ci			{
770cc1dc7a3Sopenharmony_ci				maxprec_2planes = i;
771cc1dc7a3Sopenharmony_ci			}
772cc1dc7a3Sopenharmony_ci		}
773cc1dc7a3Sopenharmony_ci	}
774cc1dc7a3Sopenharmony_ci
775cc1dc7a3Sopenharmony_ci	// At least one of the two should be valid ...
776cc1dc7a3Sopenharmony_ci	assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
777cc1dc7a3Sopenharmony_ci	bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
778cc1dc7a3Sopenharmony_ci	bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
779cc1dc7a3Sopenharmony_ci	bsd.decimation_modes[index].refprec_1plane = 0;
780cc1dc7a3Sopenharmony_ci	bsd.decimation_modes[index].refprec_2planes = 0;
781cc1dc7a3Sopenharmony_ci}
782cc1dc7a3Sopenharmony_ci
783cc1dc7a3Sopenharmony_ci/**
784cc1dc7a3Sopenharmony_ci * @brief Allocate block modes and decimation tables for a single 2D block size.
785cc1dc7a3Sopenharmony_ci *
786cc1dc7a3Sopenharmony_ci * @param      x_texels         The number of texels in the X dimension.
787cc1dc7a3Sopenharmony_ci * @param      y_texels         The number of texels in the Y dimension.
788cc1dc7a3Sopenharmony_ci * @param      can_omit_modes   Can we discard modes that astcenc won't use, even if legal?
789cc1dc7a3Sopenharmony_ci * @param      mode_cutoff      Percentile cutoff in range [0,1]. Low values more likely to be used.
790cc1dc7a3Sopenharmony_ci * @param[out] bsd              The block size descriptor to populate.
791cc1dc7a3Sopenharmony_ci */
792cc1dc7a3Sopenharmony_ci#ifdef ASTC_CUSTOMIZED_ENABLE
793cc1dc7a3Sopenharmony_cistatic bool construct_block_size_descriptor_2d(
794cc1dc7a3Sopenharmony_ci#else
795cc1dc7a3Sopenharmony_cistatic void construct_block_size_descriptor_2d(
796cc1dc7a3Sopenharmony_ci#endif
797cc1dc7a3Sopenharmony_ci	QualityProfile privateProfile,
798cc1dc7a3Sopenharmony_ci	unsigned int x_texels,
799cc1dc7a3Sopenharmony_ci	unsigned int y_texels,
800cc1dc7a3Sopenharmony_ci	bool can_omit_modes,
801cc1dc7a3Sopenharmony_ci	float mode_cutoff,
802cc1dc7a3Sopenharmony_ci	block_size_descriptor& bsd
803cc1dc7a3Sopenharmony_ci) {
804cc1dc7a3Sopenharmony_ci	// Store a remap table for storing packed decimation modes.
805cc1dc7a3Sopenharmony_ci	// Indexing uses [Y * 16 + X] and max size for each axis is 12.
806cc1dc7a3Sopenharmony_ci	static const unsigned int MAX_DMI = 12 * 16 + 12;
807cc1dc7a3Sopenharmony_ci	int decimation_mode_index[MAX_DMI];
808cc1dc7a3Sopenharmony_ci
809cc1dc7a3Sopenharmony_ci	dt_init_working_buffers* wb = new dt_init_working_buffers;
810cc1dc7a3Sopenharmony_ci
811cc1dc7a3Sopenharmony_ci	bsd.xdim = static_cast<uint8_t>(x_texels);
812cc1dc7a3Sopenharmony_ci	bsd.ydim = static_cast<uint8_t>(y_texels);
813cc1dc7a3Sopenharmony_ci	bsd.zdim = 1;
814cc1dc7a3Sopenharmony_ci	bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels);
815cc1dc7a3Sopenharmony_ci
816cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < MAX_DMI; i++)
817cc1dc7a3Sopenharmony_ci	{
818cc1dc7a3Sopenharmony_ci		decimation_mode_index[i] = -1;
819cc1dc7a3Sopenharmony_ci	}
820cc1dc7a3Sopenharmony_ci
821cc1dc7a3Sopenharmony_ci	// Gather all the decimation grids that can be used with the current block
822cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY)
823cc1dc7a3Sopenharmony_ci	const float *percentiles = get_2d_percentile_table(x_texels, y_texels);
824cc1dc7a3Sopenharmony_ci	float always_cutoff = (privateProfile != HIGH_QUALITY_PROFILE) ? 1.0f : 0.0f;
825cc1dc7a3Sopenharmony_ci#else
826cc1dc7a3Sopenharmony_ci	// Unused in decompress-only builds
827cc1dc7a3Sopenharmony_ci	(void)can_omit_modes;
828cc1dc7a3Sopenharmony_ci	(void)mode_cutoff;
829cc1dc7a3Sopenharmony_ci#endif
830cc1dc7a3Sopenharmony_ci
831cc1dc7a3Sopenharmony_ci	// Construct the list of block formats referencing the decimation tables
832cc1dc7a3Sopenharmony_ci	unsigned int packed_bm_idx = 0;
833cc1dc7a3Sopenharmony_ci	unsigned int packed_dm_idx = 0;
834cc1dc7a3Sopenharmony_ci
835cc1dc7a3Sopenharmony_ci	// Trackers
836cc1dc7a3Sopenharmony_ci	unsigned int bm_counts[4] { 0 };
837cc1dc7a3Sopenharmony_ci	unsigned int dm_counts[4] { 0 };
838cc1dc7a3Sopenharmony_ci
839cc1dc7a3Sopenharmony_ci	// Clear the list to a known-bad value
840cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
841cc1dc7a3Sopenharmony_ci	{
842cc1dc7a3Sopenharmony_ci		bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
843cc1dc7a3Sopenharmony_ci	}
844cc1dc7a3Sopenharmony_ci
845cc1dc7a3Sopenharmony_ci	// Iterate four times to build a usefully ordered list:
846cc1dc7a3Sopenharmony_ci	//   - Pass 0 - keep selected single plane "always" block modes
847cc1dc7a3Sopenharmony_ci	//   - Pass 1 - keep selected single plane "non-always" block modes
848cc1dc7a3Sopenharmony_ci	//   - Pass 2 - keep select dual plane block modes
849cc1dc7a3Sopenharmony_ci	//   - Pass 3 - keep everything else that's legal
850cc1dc7a3Sopenharmony_ci	unsigned int limit = can_omit_modes ? 3 : 4;
851cc1dc7a3Sopenharmony_ci	for (unsigned int j = 0; j < limit; j ++)
852cc1dc7a3Sopenharmony_ci	{
853cc1dc7a3Sopenharmony_ci		for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
854cc1dc7a3Sopenharmony_ci		{
855cc1dc7a3Sopenharmony_ci			// Skip modes we've already included in a previous pass
856cc1dc7a3Sopenharmony_ci			if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
857cc1dc7a3Sopenharmony_ci			{
858cc1dc7a3Sopenharmony_ci				continue;
859cc1dc7a3Sopenharmony_ci			}
860cc1dc7a3Sopenharmony_ci			if ((privateProfile == HIGH_SPEED_PROFILE) && (i != HIGH_SPEED_PROFILE_BLOCK_MODE))
861cc1dc7a3Sopenharmony_ci			{
862cc1dc7a3Sopenharmony_ci				continue;
863cc1dc7a3Sopenharmony_ci			}
864cc1dc7a3Sopenharmony_ci#ifdef ASTC_CUSTOMIZED_ENABLE
865cc1dc7a3Sopenharmony_ci			if (privateProfile == CUSTOMIZED_PROFILE)
866cc1dc7a3Sopenharmony_ci			{
867cc1dc7a3Sopenharmony_ci				if (!g_astcCustomizedSoManager.LoadSutCustomizedSo() ||
868cc1dc7a3Sopenharmony_ci					g_astcCustomizedSoManager.isCustomizedBlockModeFunc_ == nullptr)
869cc1dc7a3Sopenharmony_ci				{
870cc1dc7a3Sopenharmony_ci					printf("astcenc customized so dlopen failed or isCustomizedBlockModeFunc_ is nullptr!\n");
871cc1dc7a3Sopenharmony_ci					delete wb;
872cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY)
873cc1dc7a3Sopenharmony_ci					delete[] percentiles;
874cc1dc7a3Sopenharmony_ci#endif
875cc1dc7a3Sopenharmony_ci					return false;
876cc1dc7a3Sopenharmony_ci				}
877cc1dc7a3Sopenharmony_ci				if (!g_astcCustomizedSoManager.isCustomizedBlockModeFunc_(i))
878cc1dc7a3Sopenharmony_ci				{
879cc1dc7a3Sopenharmony_ci					continue;
880cc1dc7a3Sopenharmony_ci				}
881cc1dc7a3Sopenharmony_ci			}
882cc1dc7a3Sopenharmony_ci#endif
883cc1dc7a3Sopenharmony_ci			// Decode parameters
884cc1dc7a3Sopenharmony_ci			unsigned int x_weights;
885cc1dc7a3Sopenharmony_ci			unsigned int y_weights;
886cc1dc7a3Sopenharmony_ci			bool is_dual_plane;
887cc1dc7a3Sopenharmony_ci			unsigned int quant_mode;
888cc1dc7a3Sopenharmony_ci			unsigned int weight_bits;
889cc1dc7a3Sopenharmony_ci			bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits);
890cc1dc7a3Sopenharmony_ci
891cc1dc7a3Sopenharmony_ci			// Always skip invalid encodings for the current block size
892cc1dc7a3Sopenharmony_ci			if (!valid || (x_weights > x_texels) || (y_weights > y_texels))
893cc1dc7a3Sopenharmony_ci			{
894cc1dc7a3Sopenharmony_ci				continue;
895cc1dc7a3Sopenharmony_ci			}
896cc1dc7a3Sopenharmony_ci
897cc1dc7a3Sopenharmony_ci			// Selectively skip dual plane encodings
898cc1dc7a3Sopenharmony_ci			if (((j <= 1) && is_dual_plane) || (j == 2 && !is_dual_plane))
899cc1dc7a3Sopenharmony_ci			{
900cc1dc7a3Sopenharmony_ci				continue;
901cc1dc7a3Sopenharmony_ci			}
902cc1dc7a3Sopenharmony_ci
903cc1dc7a3Sopenharmony_ci			// Always skip encodings we can't physically encode based on
904cc1dc7a3Sopenharmony_ci			// generic encoding bit availability
905cc1dc7a3Sopenharmony_ci			if (is_dual_plane)
906cc1dc7a3Sopenharmony_ci			{
907cc1dc7a3Sopenharmony_ci				 // This is the only check we need as only support 1 partition
908cc1dc7a3Sopenharmony_ci				 if ((109 - weight_bits) <= 0)
909cc1dc7a3Sopenharmony_ci				 {
910cc1dc7a3Sopenharmony_ci					continue;
911cc1dc7a3Sopenharmony_ci				 }
912cc1dc7a3Sopenharmony_ci			}
913cc1dc7a3Sopenharmony_ci			else
914cc1dc7a3Sopenharmony_ci			{
915cc1dc7a3Sopenharmony_ci				// This is conservative - fewer bits may be available for > 1 partition
916cc1dc7a3Sopenharmony_ci				 if ((111 - weight_bits) <= 0)
917cc1dc7a3Sopenharmony_ci				 {
918cc1dc7a3Sopenharmony_ci					continue;
919cc1dc7a3Sopenharmony_ci				 }
920cc1dc7a3Sopenharmony_ci			}
921cc1dc7a3Sopenharmony_ci
922cc1dc7a3Sopenharmony_ci			// Selectively skip encodings based on percentile
923cc1dc7a3Sopenharmony_ci			bool percentile_hit = false;
924cc1dc7a3Sopenharmony_ci	#if !defined(ASTCENC_DECOMPRESS_ONLY)
925cc1dc7a3Sopenharmony_ci			if (j == 0)
926cc1dc7a3Sopenharmony_ci			{
927cc1dc7a3Sopenharmony_ci				percentile_hit = percentiles[i] <= always_cutoff;
928cc1dc7a3Sopenharmony_ci			}
929cc1dc7a3Sopenharmony_ci			else
930cc1dc7a3Sopenharmony_ci			{
931cc1dc7a3Sopenharmony_ci				percentile_hit = percentiles[i] <= mode_cutoff;
932cc1dc7a3Sopenharmony_ci			}
933cc1dc7a3Sopenharmony_ci	#endif
934cc1dc7a3Sopenharmony_ci
935cc1dc7a3Sopenharmony_ci			if (j != 3 && !percentile_hit)
936cc1dc7a3Sopenharmony_ci			{
937cc1dc7a3Sopenharmony_ci				continue;
938cc1dc7a3Sopenharmony_ci			}
939cc1dc7a3Sopenharmony_ci
940cc1dc7a3Sopenharmony_ci			// Allocate and initialize the decimation table entry if we've not used it yet
941cc1dc7a3Sopenharmony_ci			int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
942cc1dc7a3Sopenharmony_ci			if (decimation_mode < 0)
943cc1dc7a3Sopenharmony_ci			{
944cc1dc7a3Sopenharmony_ci				construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb, packed_dm_idx);
945cc1dc7a3Sopenharmony_ci				if (privateProfile == HIGH_SPEED_PROFILE)
946cc1dc7a3Sopenharmony_ci				{
947cc1dc7a3Sopenharmony_ci					bsd.decimation_modes[packed_dm_idx].maxprec_1plane = 4; // Speed optimization: max prec num is limited to 4
948cc1dc7a3Sopenharmony_ci				}
949cc1dc7a3Sopenharmony_ci				decimation_mode_index[y_weights * 16 + x_weights] = packed_dm_idx;
950cc1dc7a3Sopenharmony_ci				decimation_mode = packed_dm_idx;
951cc1dc7a3Sopenharmony_ci
952cc1dc7a3Sopenharmony_ci				dm_counts[j]++;
953cc1dc7a3Sopenharmony_ci				packed_dm_idx++;
954cc1dc7a3Sopenharmony_ci			}
955cc1dc7a3Sopenharmony_ci
956cc1dc7a3Sopenharmony_ci			auto& bm = bsd.block_modes[packed_bm_idx];
957cc1dc7a3Sopenharmony_ci
958cc1dc7a3Sopenharmony_ci			bm.decimation_mode = static_cast<uint8_t>(decimation_mode);
959cc1dc7a3Sopenharmony_ci			bm.quant_mode = static_cast<uint8_t>(quant_mode);
960cc1dc7a3Sopenharmony_ci			bm.is_dual_plane = static_cast<uint8_t>(is_dual_plane);
961cc1dc7a3Sopenharmony_ci			bm.weight_bits = static_cast<uint8_t>(weight_bits);
962cc1dc7a3Sopenharmony_ci			bm.mode_index = static_cast<uint16_t>(i);
963cc1dc7a3Sopenharmony_ci
964cc1dc7a3Sopenharmony_ci			auto& dm = bsd.decimation_modes[decimation_mode];
965cc1dc7a3Sopenharmony_ci
966cc1dc7a3Sopenharmony_ci			if (is_dual_plane)
967cc1dc7a3Sopenharmony_ci			{
968cc1dc7a3Sopenharmony_ci				dm.set_ref_2plane(bm.get_weight_quant_mode());
969cc1dc7a3Sopenharmony_ci			}
970cc1dc7a3Sopenharmony_ci			else
971cc1dc7a3Sopenharmony_ci			{
972cc1dc7a3Sopenharmony_ci				dm.set_ref_1plane(bm.get_weight_quant_mode());
973cc1dc7a3Sopenharmony_ci			}
974cc1dc7a3Sopenharmony_ci
975cc1dc7a3Sopenharmony_ci			bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_bm_idx);
976cc1dc7a3Sopenharmony_ci
977cc1dc7a3Sopenharmony_ci			packed_bm_idx++;
978cc1dc7a3Sopenharmony_ci			bm_counts[j]++;
979cc1dc7a3Sopenharmony_ci		}
980cc1dc7a3Sopenharmony_ci	}
981cc1dc7a3Sopenharmony_ci
982cc1dc7a3Sopenharmony_ci	bsd.block_mode_count_1plane_always = bm_counts[0];
983cc1dc7a3Sopenharmony_ci	bsd.block_mode_count_1plane_selected = bm_counts[0] + bm_counts[1];
984cc1dc7a3Sopenharmony_ci	bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1] + bm_counts[2];
985cc1dc7a3Sopenharmony_ci	bsd.block_mode_count_all = bm_counts[0] + bm_counts[1] + bm_counts[2] + bm_counts[3];
986cc1dc7a3Sopenharmony_ci
987cc1dc7a3Sopenharmony_ci	bsd.decimation_mode_count_always = dm_counts[0];
988cc1dc7a3Sopenharmony_ci	bsd.decimation_mode_count_selected = dm_counts[0] + dm_counts[1] + dm_counts[2];
989cc1dc7a3Sopenharmony_ci	bsd.decimation_mode_count_all = dm_counts[0] + dm_counts[1] + dm_counts[2] + dm_counts[3];
990cc1dc7a3Sopenharmony_ci
991cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY)
992cc1dc7a3Sopenharmony_ci	assert(bsd.block_mode_count_1plane_always > 0);
993cc1dc7a3Sopenharmony_ci	assert(bsd.decimation_mode_count_always > 0);
994cc1dc7a3Sopenharmony_ci
995cc1dc7a3Sopenharmony_ci	delete[] percentiles;
996cc1dc7a3Sopenharmony_ci#endif
997cc1dc7a3Sopenharmony_ci
998cc1dc7a3Sopenharmony_ci	// Ensure the end of the array contains valid data (should never get read)
999cc1dc7a3Sopenharmony_ci	for (unsigned int i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
1000cc1dc7a3Sopenharmony_ci	{
1001cc1dc7a3Sopenharmony_ci		bsd.decimation_modes[i].maxprec_1plane = -1;
1002cc1dc7a3Sopenharmony_ci		bsd.decimation_modes[i].maxprec_2planes = -1;
1003cc1dc7a3Sopenharmony_ci		bsd.decimation_modes[i].refprec_1plane = 0;
1004cc1dc7a3Sopenharmony_ci		bsd.decimation_modes[i].refprec_2planes = 0;
1005cc1dc7a3Sopenharmony_ci	}
1006cc1dc7a3Sopenharmony_ci
1007cc1dc7a3Sopenharmony_ci	// Determine the texels to use for kmeans clustering.
1008cc1dc7a3Sopenharmony_ci	assign_kmeans_texels(bsd);
1009cc1dc7a3Sopenharmony_ci
1010cc1dc7a3Sopenharmony_ci	delete wb;
1011cc1dc7a3Sopenharmony_ci#ifdef ASTC_CUSTOMIZED_ENABLE
1012cc1dc7a3Sopenharmony_ci	return true;
1013cc1dc7a3Sopenharmony_ci#endif
1014cc1dc7a3Sopenharmony_ci}
1015cc1dc7a3Sopenharmony_ci
1016cc1dc7a3Sopenharmony_ci/**
1017cc1dc7a3Sopenharmony_ci * @brief Allocate block modes and decimation tables for a single 3D block size.
1018cc1dc7a3Sopenharmony_ci *
1019cc1dc7a3Sopenharmony_ci * TODO: This function doesn't include all of the heuristics that we use for 2D block sizes such as
1020cc1dc7a3Sopenharmony_ci * the percentile mode cutoffs. If 3D becomes more widely used we should look at this.
1021cc1dc7a3Sopenharmony_ci *
1022cc1dc7a3Sopenharmony_ci * @param      x_texels   The number of texels in the X dimension.
1023cc1dc7a3Sopenharmony_ci * @param      y_texels   The number of texels in the Y dimension.
1024cc1dc7a3Sopenharmony_ci * @param      z_texels   The number of texels in the Z dimension.
1025cc1dc7a3Sopenharmony_ci * @param[out] bsd        The block size descriptor to populate.
1026cc1dc7a3Sopenharmony_ci */
1027cc1dc7a3Sopenharmony_cistatic void construct_block_size_descriptor_3d(
1028cc1dc7a3Sopenharmony_ci	unsigned int x_texels,
1029cc1dc7a3Sopenharmony_ci	unsigned int y_texels,
1030cc1dc7a3Sopenharmony_ci	unsigned int z_texels,
1031cc1dc7a3Sopenharmony_ci	block_size_descriptor& bsd
1032cc1dc7a3Sopenharmony_ci) {
1033cc1dc7a3Sopenharmony_ci	// Store a remap table for storing packed decimation modes.
1034cc1dc7a3Sopenharmony_ci	// Indexing uses [Z * 64 + Y *  8 + X] and max size for each axis is 6.
1035cc1dc7a3Sopenharmony_ci	static constexpr unsigned int MAX_DMI = 6 * 64 + 6 * 8 + 6;
1036cc1dc7a3Sopenharmony_ci	int decimation_mode_index[MAX_DMI];
1037cc1dc7a3Sopenharmony_ci	unsigned int decimation_mode_count = 0;
1038cc1dc7a3Sopenharmony_ci
1039cc1dc7a3Sopenharmony_ci	dt_init_working_buffers* wb = new dt_init_working_buffers;
1040cc1dc7a3Sopenharmony_ci
1041cc1dc7a3Sopenharmony_ci	bsd.xdim = static_cast<uint8_t>(x_texels);
1042cc1dc7a3Sopenharmony_ci	bsd.ydim = static_cast<uint8_t>(y_texels);
1043cc1dc7a3Sopenharmony_ci	bsd.zdim = static_cast<uint8_t>(z_texels);
1044cc1dc7a3Sopenharmony_ci	bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels * z_texels);
1045cc1dc7a3Sopenharmony_ci
1046cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < MAX_DMI; i++)
1047cc1dc7a3Sopenharmony_ci	{
1048cc1dc7a3Sopenharmony_ci		decimation_mode_index[i] = -1;
1049cc1dc7a3Sopenharmony_ci	}
1050cc1dc7a3Sopenharmony_ci
1051cc1dc7a3Sopenharmony_ci	// gather all the infill-modes that can be used with the current block size
1052cc1dc7a3Sopenharmony_ci	for (unsigned int x_weights = 2; x_weights <= x_texels; x_weights++)
1053cc1dc7a3Sopenharmony_ci	{
1054cc1dc7a3Sopenharmony_ci		for (unsigned int y_weights = 2; y_weights <= y_texels; y_weights++)
1055cc1dc7a3Sopenharmony_ci		{
1056cc1dc7a3Sopenharmony_ci			for (unsigned int z_weights = 2; z_weights <= z_texels; z_weights++)
1057cc1dc7a3Sopenharmony_ci			{
1058cc1dc7a3Sopenharmony_ci				unsigned int weight_count = x_weights * y_weights * z_weights;
1059cc1dc7a3Sopenharmony_ci				if (weight_count > BLOCK_MAX_WEIGHTS)
1060cc1dc7a3Sopenharmony_ci				{
1061cc1dc7a3Sopenharmony_ci					continue;
1062cc1dc7a3Sopenharmony_ci				}
1063cc1dc7a3Sopenharmony_ci
1064cc1dc7a3Sopenharmony_ci				decimation_info& di = bsd.decimation_tables[decimation_mode_count];
1065cc1dc7a3Sopenharmony_ci				decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
1066cc1dc7a3Sopenharmony_ci				init_decimation_info_3d(x_texels, y_texels, z_texels, x_weights, y_weights, z_weights, di, *wb);
1067cc1dc7a3Sopenharmony_ci
1068cc1dc7a3Sopenharmony_ci				int maxprec_1plane = -1;
1069cc1dc7a3Sopenharmony_ci				int maxprec_2planes = -1;
1070cc1dc7a3Sopenharmony_ci				for (unsigned int i = 0; i < 12; i++)
1071cc1dc7a3Sopenharmony_ci				{
1072cc1dc7a3Sopenharmony_ci					unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
1073cc1dc7a3Sopenharmony_ci					if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
1074cc1dc7a3Sopenharmony_ci					{
1075cc1dc7a3Sopenharmony_ci						maxprec_1plane = i;
1076cc1dc7a3Sopenharmony_ci					}
1077cc1dc7a3Sopenharmony_ci
1078cc1dc7a3Sopenharmony_ci					unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
1079cc1dc7a3Sopenharmony_ci					if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
1080cc1dc7a3Sopenharmony_ci					{
1081cc1dc7a3Sopenharmony_ci						maxprec_2planes = i;
1082cc1dc7a3Sopenharmony_ci					}
1083cc1dc7a3Sopenharmony_ci				}
1084cc1dc7a3Sopenharmony_ci
1085cc1dc7a3Sopenharmony_ci				if ((2 * weight_count) > BLOCK_MAX_WEIGHTS)
1086cc1dc7a3Sopenharmony_ci				{
1087cc1dc7a3Sopenharmony_ci					maxprec_2planes = -1;
1088cc1dc7a3Sopenharmony_ci				}
1089cc1dc7a3Sopenharmony_ci
1090cc1dc7a3Sopenharmony_ci				bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
1091cc1dc7a3Sopenharmony_ci				bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
1092cc1dc7a3Sopenharmony_ci				bsd.decimation_modes[decimation_mode_count].refprec_1plane = maxprec_1plane == -1 ? 0 : 0xFFFF;
1093cc1dc7a3Sopenharmony_ci				bsd.decimation_modes[decimation_mode_count].refprec_2planes = maxprec_2planes == -1 ? 0 : 0xFFFF;
1094cc1dc7a3Sopenharmony_ci				decimation_mode_count++;
1095cc1dc7a3Sopenharmony_ci			}
1096cc1dc7a3Sopenharmony_ci		}
1097cc1dc7a3Sopenharmony_ci	}
1098cc1dc7a3Sopenharmony_ci
1099cc1dc7a3Sopenharmony_ci	// Ensure the end of the array contains valid data (should never get read)
1100cc1dc7a3Sopenharmony_ci	for (unsigned int i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
1101cc1dc7a3Sopenharmony_ci	{
1102cc1dc7a3Sopenharmony_ci		bsd.decimation_modes[i].maxprec_1plane = -1;
1103cc1dc7a3Sopenharmony_ci		bsd.decimation_modes[i].maxprec_2planes = -1;
1104cc1dc7a3Sopenharmony_ci		bsd.decimation_modes[i].refprec_1plane = 0;
1105cc1dc7a3Sopenharmony_ci		bsd.decimation_modes[i].refprec_2planes = 0;
1106cc1dc7a3Sopenharmony_ci	}
1107cc1dc7a3Sopenharmony_ci
1108cc1dc7a3Sopenharmony_ci	bsd.decimation_mode_count_always = 0; // Skipped for 3D modes
1109cc1dc7a3Sopenharmony_ci	bsd.decimation_mode_count_selected = decimation_mode_count;
1110cc1dc7a3Sopenharmony_ci	bsd.decimation_mode_count_all = decimation_mode_count;
1111cc1dc7a3Sopenharmony_ci
1112cc1dc7a3Sopenharmony_ci	// Construct the list of block formats referencing the decimation tables
1113cc1dc7a3Sopenharmony_ci
1114cc1dc7a3Sopenharmony_ci	// Clear the list to a known-bad value
1115cc1dc7a3Sopenharmony_ci	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
1116cc1dc7a3Sopenharmony_ci	{
1117cc1dc7a3Sopenharmony_ci		bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
1118cc1dc7a3Sopenharmony_ci	}
1119cc1dc7a3Sopenharmony_ci
1120cc1dc7a3Sopenharmony_ci	unsigned int packed_idx = 0;
1121cc1dc7a3Sopenharmony_ci	unsigned int bm_counts[2] { 0 };
1122cc1dc7a3Sopenharmony_ci
1123cc1dc7a3Sopenharmony_ci	// Iterate two times to build a usefully ordered list:
1124cc1dc7a3Sopenharmony_ci	//   - Pass 0 - keep valid single plane block modes
1125cc1dc7a3Sopenharmony_ci	//   - Pass 1 - keep valid dual plane block modes
1126cc1dc7a3Sopenharmony_ci	for (unsigned int j = 0; j < 2; j++)
1127cc1dc7a3Sopenharmony_ci	{
1128cc1dc7a3Sopenharmony_ci		for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
1129cc1dc7a3Sopenharmony_ci		{
1130cc1dc7a3Sopenharmony_ci			// Skip modes we've already included in a previous pass
1131cc1dc7a3Sopenharmony_ci			if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
1132cc1dc7a3Sopenharmony_ci			{
1133cc1dc7a3Sopenharmony_ci				continue;
1134cc1dc7a3Sopenharmony_ci			}
1135cc1dc7a3Sopenharmony_ci
1136cc1dc7a3Sopenharmony_ci			unsigned int x_weights;
1137cc1dc7a3Sopenharmony_ci			unsigned int y_weights;
1138cc1dc7a3Sopenharmony_ci			unsigned int z_weights;
1139cc1dc7a3Sopenharmony_ci			bool is_dual_plane;
1140cc1dc7a3Sopenharmony_ci			unsigned int quant_mode;
1141cc1dc7a3Sopenharmony_ci			unsigned int weight_bits;
1142cc1dc7a3Sopenharmony_ci
1143cc1dc7a3Sopenharmony_ci			bool valid = decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits);
1144cc1dc7a3Sopenharmony_ci			// Skip invalid encodings
1145cc1dc7a3Sopenharmony_ci			if (!valid || x_weights > x_texels || y_weights > y_texels || z_weights > z_texels)
1146cc1dc7a3Sopenharmony_ci			{
1147cc1dc7a3Sopenharmony_ci				continue;
1148cc1dc7a3Sopenharmony_ci			}
1149cc1dc7a3Sopenharmony_ci
1150cc1dc7a3Sopenharmony_ci			// Skip encodings in the wrong iteration
1151cc1dc7a3Sopenharmony_ci			if ((j == 0 && is_dual_plane) || (j == 1 && !is_dual_plane))
1152cc1dc7a3Sopenharmony_ci			{
1153cc1dc7a3Sopenharmony_ci				continue;
1154cc1dc7a3Sopenharmony_ci			}
1155cc1dc7a3Sopenharmony_ci
1156cc1dc7a3Sopenharmony_ci			// Always skip encodings we can't physically encode based on bit availability
1157cc1dc7a3Sopenharmony_ci			if (is_dual_plane)
1158cc1dc7a3Sopenharmony_ci			{
1159cc1dc7a3Sopenharmony_ci				 // This is the only check we need as only support 1 partition
1160cc1dc7a3Sopenharmony_ci				 if ((109 - weight_bits) <= 0)
1161cc1dc7a3Sopenharmony_ci				 {
1162cc1dc7a3Sopenharmony_ci					continue;
1163cc1dc7a3Sopenharmony_ci				 }
1164cc1dc7a3Sopenharmony_ci			}
1165cc1dc7a3Sopenharmony_ci			else
1166cc1dc7a3Sopenharmony_ci			{
1167cc1dc7a3Sopenharmony_ci				// This is conservative - fewer bits may be available for > 1 partition
1168cc1dc7a3Sopenharmony_ci				 if ((111 - weight_bits) <= 0)
1169cc1dc7a3Sopenharmony_ci				 {
1170cc1dc7a3Sopenharmony_ci					continue;
1171cc1dc7a3Sopenharmony_ci				 }
1172cc1dc7a3Sopenharmony_ci			}
1173cc1dc7a3Sopenharmony_ci
1174cc1dc7a3Sopenharmony_ci			int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
1175cc1dc7a3Sopenharmony_ci			bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);
1176cc1dc7a3Sopenharmony_ci			bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);
1177cc1dc7a3Sopenharmony_ci			bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);
1178cc1dc7a3Sopenharmony_ci			bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);
1179cc1dc7a3Sopenharmony_ci			bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);
1180cc1dc7a3Sopenharmony_ci
1181cc1dc7a3Sopenharmony_ci			bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx);
1182cc1dc7a3Sopenharmony_ci			bm_counts[j]++;
1183cc1dc7a3Sopenharmony_ci			packed_idx++;
1184cc1dc7a3Sopenharmony_ci		}
1185cc1dc7a3Sopenharmony_ci	}
1186cc1dc7a3Sopenharmony_ci
1187cc1dc7a3Sopenharmony_ci	bsd.block_mode_count_1plane_always = 0;  // Skipped for 3D modes
1188cc1dc7a3Sopenharmony_ci	bsd.block_mode_count_1plane_selected = bm_counts[0];
1189cc1dc7a3Sopenharmony_ci	bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1];
1190cc1dc7a3Sopenharmony_ci	bsd.block_mode_count_all = bm_counts[0] + bm_counts[1];
1191cc1dc7a3Sopenharmony_ci
1192cc1dc7a3Sopenharmony_ci	// Determine the texels to use for kmeans clustering.
1193cc1dc7a3Sopenharmony_ci	assign_kmeans_texels(bsd);
1194cc1dc7a3Sopenharmony_ci
1195cc1dc7a3Sopenharmony_ci	delete wb;
1196cc1dc7a3Sopenharmony_ci}
1197cc1dc7a3Sopenharmony_ci
1198cc1dc7a3Sopenharmony_ci/* See header for documentation. */
1199cc1dc7a3Sopenharmony_ci#ifdef ASTC_CUSTOMIZED_ENABLE
1200cc1dc7a3Sopenharmony_cibool init_block_size_descriptor(
1201cc1dc7a3Sopenharmony_ci#else
1202cc1dc7a3Sopenharmony_civoid init_block_size_descriptor(
1203cc1dc7a3Sopenharmony_ci#endif
1204cc1dc7a3Sopenharmony_ci	QualityProfile privateProfile,
1205cc1dc7a3Sopenharmony_ci	unsigned int x_texels,
1206cc1dc7a3Sopenharmony_ci	unsigned int y_texels,
1207cc1dc7a3Sopenharmony_ci	unsigned int z_texels,
1208cc1dc7a3Sopenharmony_ci	bool can_omit_modes,
1209cc1dc7a3Sopenharmony_ci	unsigned int partition_count_cutoff,
1210cc1dc7a3Sopenharmony_ci	float mode_cutoff,
1211cc1dc7a3Sopenharmony_ci	block_size_descriptor& bsd
1212cc1dc7a3Sopenharmony_ci) {
1213cc1dc7a3Sopenharmony_ci	if (z_texels > 1)
1214cc1dc7a3Sopenharmony_ci	{
1215cc1dc7a3Sopenharmony_ci		construct_block_size_descriptor_3d(x_texels, y_texels, z_texels, bsd);
1216cc1dc7a3Sopenharmony_ci	}
1217cc1dc7a3Sopenharmony_ci	else
1218cc1dc7a3Sopenharmony_ci	{
1219cc1dc7a3Sopenharmony_ci#ifdef ASTC_CUSTOMIZED_ENABLE
1220cc1dc7a3Sopenharmony_ci		if (!construct_block_size_descriptor_2d(privateProfile, x_texels, y_texels, can_omit_modes, mode_cutoff, bsd))
1221cc1dc7a3Sopenharmony_ci		{
1222cc1dc7a3Sopenharmony_ci			return false;
1223cc1dc7a3Sopenharmony_ci		}
1224cc1dc7a3Sopenharmony_ci#else
1225cc1dc7a3Sopenharmony_ci		construct_block_size_descriptor_2d(privateProfile, x_texels, y_texels, can_omit_modes, mode_cutoff, bsd);
1226cc1dc7a3Sopenharmony_ci#endif
1227cc1dc7a3Sopenharmony_ci	}
1228cc1dc7a3Sopenharmony_ci
1229cc1dc7a3Sopenharmony_ci	init_partition_tables(bsd, can_omit_modes, partition_count_cutoff);
1230cc1dc7a3Sopenharmony_ci#ifdef ASTC_CUSTOMIZED_ENABLE
1231cc1dc7a3Sopenharmony_ci	return true;
1232cc1dc7a3Sopenharmony_ci#endif
1233cc1dc7a3Sopenharmony_ci}
1234