xref: /third_party/astc-encoder/Source/astcenc.h (revision cc1dc7a3)
1// SPDX-License-Identifier: Apache-2.0
2// ----------------------------------------------------------------------------
3// Copyright 2020-2024 Arm Limited
4//
5// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6// use this file except in compliance with the License. You may obtain a copy
7// of the License at:
8//
9//     http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14// License for the specific language governing permissions and limitations
15// under the License.
16// ----------------------------------------------------------------------------
17
18/**
19 * @brief The core astcenc codec library interface.
20 *
21 * This interface is the entry point to the core astcenc codec. It aims to be easy to use for
22 * non-experts, but also to allow experts to have fine control over the compressor heuristics if
23 * needed. The core codec only handles compression and decompression, transferring all inputs and
24 * outputs via memory buffers. To catch obvious input/output buffer sizing issues, which can cause
25 * security and stability problems, all transfer buffers are explicitly sized.
26 *
27 * While the aim is that we keep this interface mostly stable, it should be viewed as a mutable
28 * interface tied to a specific source version. We are not trying to maintain backwards
29 * compatibility across codec versions.
30 *
31 * The API state management is based around an explicit context object, which is the context for all
32 * allocated memory resources needed to compress and decompress a single image. A context can be
33 * used to sequentially compress multiple images using the same configuration, allowing setup
34 * overheads to be amortized over multiple images, which is particularly important when images are
35 * small.
36 *
37 * Multi-threading can be used two ways.
38 *
39 *     * An application wishing to process multiple images in parallel can allocate multiple
40 *       contexts and assign each context to a thread.
41 *     * An application wishing to process a single image in using multiple threads can configure
42 *       contexts for multi-threaded use, and invoke astcenc_compress/decompress() once per thread
43 *       for faster processing. The caller is responsible for creating the worker threads, and
44 *       synchronizing between images.
45 *
46 * Extended instruction set support
47 * ================================
48 *
49 * This library supports use of extended instruction sets, such as SSE4.1 and AVX2. These are
50 * enabled at compile time when building the library. There is no runtime checking in the core
51 * library that the instruction sets used are actually available. Checking compatibility is the
52 * responsibility of the calling code.
53 *
54 * Threading
55 * =========
56 *
57 * In pseudo-code, the usage for manual user threading looks like this:
58 *
59 *     // Configure the compressor run
60 *     astcenc_config my_config;
61 *     astcenc_config_init(..., &my_config);
62 *
63 *     // Power users can tweak <my_config> settings here ...
64 *
65 *     // Allocate working state given config and thread_count
66 *     astcenc_context* my_context;
67 *     astcenc_context_alloc(&my_config, thread_count, &my_context);
68 *
69 *     // Compress each image using these config settings
70 *     foreach image:
71 *         // For each thread in the thread pool
72 *         for i in range(0, thread_count):
73 *             astcenc_compress_image(my_context, &my_input, my_output, i);
74 *
75 *         astcenc_compress_reset(my_context);
76 *
77 *     // Clean up
78 *     astcenc_context_free(my_context);
79 *
80 * Images
81 * ======
82 *
83 * The codec supports compressing single images, which can be either 2D images or volumetric 3D
84 * images. Calling code is responsible for any handling of aggregate types, such as mipmap chains,
85 * texture arrays, or sliced 3D textures.
86 *
87 * Images are passed in as an astcenc_image structure. Inputs can be either 8-bit unorm, 16-bit
88 * half-float, or 32-bit float, as indicated by the data_type field.
89 *
90 * Images can be any dimension; there is no requirement to be a multiple of the ASTC block size.
91 *
92 * Data is always passed in as 4 color components, and accessed as an array of 2D image slices. Data
93 * within an image slice is always tightly packed without padding. Addressing looks like this:
94 *
95 *     data[z_coord][y_coord * x_dim * 4 + x_coord * 4    ]   // Red
96 *     data[z_coord][y_coord * x_dim * 4 + x_coord * 4 + 1]   // Green
97 *     data[z_coord][y_coord * x_dim * 4 + x_coord * 4 + 2]   // Blue
98 *     data[z_coord][y_coord * x_dim * 4 + x_coord * 4 + 3]   // Alpha
99 *
100 * Common compressor usage
101 * =======================
102 *
103 * One of the most important things for coding image quality is to align the input data component
104 * count with the ASTC color endpoint mode. This avoids wasting bits encoding components you don't
105 * actually need in the endpoint colors.
106 *
107 *         | Input data   | Encoding swizzle | Sampling swizzle |
108 *         | ------------ | ---------------- | ---------------- |
109 *         | 1 component  | RRR1             | .[rgb]           |
110 *         | 2 components | RRRG             | .[rgb]a          |
111 *         | 3 components | RGB1             | .rgb             |
112 *         | 4 components | RGBA             | .rgba            |
113 *
114 * The 1 and 2 component modes recommend sampling from "g" to recover the luminance value as this
115 * provide best compatibility with other texture formats where the green component may be stored at
116 * higher precision than the others, such as RGB565. For ASTC any of the RGB components can be used;
117 * the luminance endpoint component will be returned for all three.
118 *
119 * When using the normal map compression mode ASTC will store normals as a two component X+Y map.
120 * Input images must contain unit-length normalized and should be passed in using a two component
121 * swizzle. The astcenc command line tool defaults to an RRRG swizzle, but some developers prefer
122 * to use GGGR for compatability with BC5n which will work just as well. The Z component can be
123 * recovered programmatically in shader code, using knowledge that the vector is unit length and
124 * that Z must be positive for a tangent-space normal map.
125 *
126 * Decompress-only usage
127 * =====================
128 *
129 * For some use cases it is useful to have a cut-down context and/or library which supports
130 * decompression but not compression.
131 *
132 * A context can be made decompress-only using the ASTCENC_FLG_DECOMPRESS_ONLY flag when the context
133 * is allocated. These contexts have lower dynamic memory footprint than a full context.
134 *
135 * The entire library can be made decompress-only by building the files with the define
136 * ASTCENC_DECOMPRESS_ONLY set. In this build the context will be smaller, and the library will
137 * exclude the functionality which is only needed for compression. This reduces the binary size by
138 * ~180KB. For these builds contexts must be created with the ASTCENC_FLG_DECOMPRESS_ONLY flag.
139 *
140 * Note that context structures returned by a library built as decompress-only are incompatible with
141 * a library built with compression included, and visa versa, as they have different sizes and
142 * memory layout.
143 *
144 * Self-decompress-only usage
145 * ==========================
146 *
147 * ASTC is a complex format with a large search space. The parts of this search space that are
148 * searched is determined by heuristics that are, in part, tied to the quality level used when
149 * creating the context.
150 *
151 * A normal context is capable of decompressing any ASTC texture, including those generated by other
152 * compressors with unknown heuristics. This is the most flexible implementation, but forces the
153 * data tables used by the codec to include entries that are not needed during compression. This
154 * can slow down context creation by a significant amount, especially for the faster compression
155 * modes where few data table entries are actually used. To optimize this use case the context can
156 * be created with the ASTCENC_FLG_SELF_DECOMPRESS_ONLY flag. This tells the compressor that it will
157 * only be asked to decompress images that it compressed itself, allowing the data tables to
158 * exclude entries that are not needed by the current compression configuration. This reduces the
159 * size of the context data tables in memory and improves context creation performance. Note that,
160 * as of the 3.6 release, this flag no longer affects compression performance.
161 *
162 * Using this flag while attempting to decompress an valid image which was created by another
163 * compressor, or even another astcenc compressor version or configuration, may result in blocks
164 * returning as solid magenta or NaN value error blocks.
165 */
166
167#ifndef ASTCENC_INCLUDED
168#define ASTCENC_INCLUDED
169
170#include <cstddef>
171#include <cstdint>
172#if defined(__aarch64__)
173	#define ASTCENC_NEON 1
174#else
175	#define ASTCENC_NEON 0
176#endif
177
178enum QualityProfile {
179	HIGH_QUALITY_PROFILE = 0, // default profile
180	HIGH_SPEED_PROFILE,
181	CUSTOMIZED_PROFILE
182};
183
184static const int HIGH_SPEED_PROFILE_BLOCK_MODE = 67; // keep openSource type, example
185static const int BYTE_MASK = 0xFF;
186
187#define QUALITY_CONTROL (1)
188#if QUALITY_CONTROL
189enum ComponentRGBA {
190	R_COM = 0,
191	G_COM,
192	B_COM,
193	A_COM,
194	RGBA_COM
195};
196#endif
197
198#if defined(ASTCENC_DYNAMIC_LIBRARY)
199	#if defined(_MSC_VER)
200		#define ASTCENC_PUBLIC extern "C" __declspec(dllexport)
201	#else
202		#define ASTCENC_PUBLIC extern "C" __attribute__ ((visibility ("default")))
203	#endif
204#else
205	#define ASTCENC_PUBLIC
206#endif
207
208/* ============================================================================
209    Data declarations
210============================================================================ */
211
212/**
213 * @brief An opaque structure; see astcenc_internal.h for definition.
214 */
215struct astcenc_context;
216
217/**
218 * @brief A codec API error code.
219 */
220enum astcenc_error {
221	/** @brief The call was successful. */
222	ASTCENC_SUCCESS = 0,
223	/** @brief The call failed due to low memory, or undersized I/O buffers. */
224	ASTCENC_ERR_OUT_OF_MEM,
225	/** @brief The call failed due to the build using fast math. */
226	ASTCENC_ERR_BAD_CPU_FLOAT,
227	/** @brief The call failed due to an out-of-spec parameter. */
228	ASTCENC_ERR_BAD_PARAM,
229	/** @brief The call failed due to an out-of-spec block size. */
230	ASTCENC_ERR_BAD_BLOCK_SIZE,
231	/** @brief The call failed due to an out-of-spec color profile. */
232	ASTCENC_ERR_BAD_PROFILE,
233	/** @brief The call failed due to an out-of-spec quality value. */
234	ASTCENC_ERR_BAD_QUALITY,
235	/** @brief The call failed due to an out-of-spec component swizzle. */
236	ASTCENC_ERR_BAD_SWIZZLE,
237	/** @brief The call failed due to an out-of-spec flag set. */
238	ASTCENC_ERR_BAD_FLAGS,
239	/** @brief The call failed due to the context not supporting the operation. */
240	ASTCENC_ERR_BAD_CONTEXT,
241	/** @brief The call failed due to unimplemented functionality. */
242	ASTCENC_ERR_NOT_IMPLEMENTED,
243	/** @brief The call failed due to an out-of-spec decode mode flag set. */
244	ASTCENC_ERR_BAD_DECODE_MODE,
245#if defined(ASTCENC_DIAGNOSTICS)
246	/** @brief The call failed due to an issue with diagnostic tracing. */
247	ASTCENC_ERR_DTRACE_FAILURE,
248#endif
249#if QUALITY_CONTROL
250	ASTCENC_ERR_BAD_QUALITY_CHECK,
251#endif
252#ifdef ASTC_CUSTOMIZED_ENABLE
253	ASTCENC_ERR_DLOPEN_FAILED
254#endif
255};
256
257/**
258 * @brief A codec color profile.
259 */
260enum astcenc_profile {
261	/** @brief The LDR sRGB color profile. */
262	ASTCENC_PRF_LDR_SRGB = 0,
263	/** @brief The LDR linear color profile. */
264	ASTCENC_PRF_LDR,
265	/** @brief The HDR RGB with LDR alpha color profile. */
266	ASTCENC_PRF_HDR_RGB_LDR_A,
267	/** @brief The HDR RGBA color profile. */
268	ASTCENC_PRF_HDR
269};
270
271/** @brief The fastest, lowest quality, search preset. */
272static const float ASTCENC_PRE_FASTEST = 0.0f;
273
274/** @brief The fast search preset. */
275static const float ASTCENC_PRE_FAST = 10.0f;
276
277/** @brief The medium quality search preset. */
278static const float ASTCENC_PRE_MEDIUM = 60.0f;
279
280/** @brief The thorough quality search preset. */
281static const float ASTCENC_PRE_THOROUGH = 98.0f;
282
283/** @brief The thorough quality search preset. */
284static const float ASTCENC_PRE_VERYTHOROUGH = 99.0f;
285
286/** @brief The exhaustive, highest quality, search preset. */
287static const float ASTCENC_PRE_EXHAUSTIVE = 100.0f;
288
289/**
290 * @brief A codec component swizzle selector.
291 */
292enum astcenc_swz
293{
294	/** @brief Select the red component. */
295	ASTCENC_SWZ_R = 0,
296	/** @brief Select the green component. */
297	ASTCENC_SWZ_G = 1,
298	/** @brief Select the blue component. */
299	ASTCENC_SWZ_B = 2,
300	/** @brief Select the alpha component. */
301	ASTCENC_SWZ_A = 3,
302	/** @brief Use a constant zero component. */
303	ASTCENC_SWZ_0 = 4,
304	/** @brief Use a constant one component. */
305	ASTCENC_SWZ_1 = 5,
306	/** @brief Use a reconstructed normal vector Z component. */
307	ASTCENC_SWZ_Z = 6
308};
309
310/**
311 * @brief A texel component swizzle.
312 */
313struct astcenc_swizzle
314{
315	/** @brief The red component selector. */
316	astcenc_swz r;
317	/** @brief The green component selector. */
318	astcenc_swz g;
319	/** @brief The blue component selector. */
320	astcenc_swz b;
321	/** @brief The alpha component selector. */
322	astcenc_swz a;
323};
324
325/**
326 * @brief A texel component data format.
327 */
328enum astcenc_type
329{
330	/** @brief Unorm 8-bit data per component. */
331	ASTCENC_TYPE_U8 = 0,
332	/** @brief 16-bit float per component. */
333	ASTCENC_TYPE_F16 = 1,
334	/** @brief 32-bit float per component. */
335	ASTCENC_TYPE_F32 = 2
336};
337
338/**
339 * @brief Function pointer type for compression progress reporting callback.
340 */
341extern "C" typedef void (*astcenc_progress_callback)(float);
342
343/**
344 * @brief Enable normal map compression.
345 *
346 * Input data will be treated a two component normal map, storing X and Y, and the codec will
347 * optimize for angular error rather than simple linear PSNR. In this mode the input swizzle should
348 * be e.g. rrrg (the default ordering for ASTC normals on the command line) or gggr (the ordering
349 * used by BC5n).
350 */
351static const unsigned int ASTCENC_FLG_MAP_NORMAL          = 1 << 0;
352
353/**
354 * @brief Enable compression heuristics that assume use of decode_unorm8 decode mode.
355 *
356 * The decode_unorm8 decode mode rounds differently to the decode_fp16 decode mode, so enabling this
357 * flag during compression will allow the compressor to use the correct rounding when selecting
358 * encodings. This will improve the compressed image quality if your application is using the
359 * decode_unorm8 decode mode, but will reduce image quality if using decode_fp16.
360 *
361 * Note that LDR_SRGB images will always use decode_unorm8 for the RGB channels, irrespective of
362 * this setting.
363 */
364static const unsigned int ASTCENC_FLG_USE_DECODE_UNORM8        = 1 << 1;
365
366/**
367 * @brief Enable alpha weighting.
368 *
369 * The input alpha value is used for transparency, so errors in the RGB components are weighted by
370 * the transparency level. This allows the codec to more accurately encode the alpha value in areas
371 * where the color value is less significant.
372 */
373static const unsigned int ASTCENC_FLG_USE_ALPHA_WEIGHT     = 1 << 2;
374
375/**
376 * @brief Enable perceptual error metrics.
377 *
378 * This mode enables perceptual compression mode, which will optimize for perceptual error rather
379 * than best PSNR. Only some input modes support perceptual error metrics.
380 */
381static const unsigned int ASTCENC_FLG_USE_PERCEPTUAL       = 1 << 3;
382
383/**
384 * @brief Create a decompression-only context.
385 *
386 * This mode disables support for compression. This enables context allocation to skip some
387 * transient buffer allocation, resulting in lower memory usage.
388 */
389static const unsigned int ASTCENC_FLG_DECOMPRESS_ONLY      = 1 << 4;
390
391/**
392 * @brief Create a self-decompression context.
393 *
394 * This mode configures the compressor so that it is only guaranteed to be able to decompress images
395 * that were actually created using the current context. This is the common case for compression use
396 * cases, and setting this flag enables additional optimizations, but does mean that the context
397 * cannot reliably decompress arbitrary ASTC images.
398 */
399static const unsigned int ASTCENC_FLG_SELF_DECOMPRESS_ONLY = 1 << 5;
400
401/**
402 * @brief Enable RGBM map compression.
403 *
404 * Input data will be treated as HDR data that has been stored in an LDR RGBM-encoded wrapper
405 * format. Data must be preprocessed by the user to be in LDR RGBM format before calling the
406 * compression function, this flag is only used to control the use of RGBM-specific heuristics and
407 * error metrics.
408 *
409 * IMPORTANT: The ASTC format is prone to bad failure modes with unconstrained RGBM data; very small
410 * M values can round to zero due to quantization and result in black or white pixels. It is highly
411 * recommended that the minimum value of M used in the encoding is kept above a lower threshold (try
412 * 16 or 32). Applying this threshold reduces the number of very dark colors that can be
413 * represented, but is still higher precision than 8-bit LDR.
414 *
415 * When this flag is set the value of @c rgbm_m_scale in the context must be set to the RGBM scale
416 * factor used during reconstruction. This defaults to 5 when in RGBM mode.
417 *
418 * It is recommended that the value of @c cw_a_weight is set to twice the value of the multiplier
419 * scale, ensuring that the M value is accurately encoded. This defaults to 10 when in RGBM mode,
420 * matching the default scale factor.
421 */
422static const unsigned int ASTCENC_FLG_MAP_RGBM             = 1 << 6;
423
424/**
425 * @brief The bit mask of all valid flags.
426 */
427static const unsigned int ASTCENC_ALL_FLAGS =
428                              ASTCENC_FLG_MAP_NORMAL |
429                              ASTCENC_FLG_MAP_RGBM |
430                              ASTCENC_FLG_USE_ALPHA_WEIGHT |
431                              ASTCENC_FLG_USE_PERCEPTUAL |
432                              ASTCENC_FLG_USE_DECODE_UNORM8 |
433                              ASTCENC_FLG_DECOMPRESS_ONLY |
434                              ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
435
436/**
437 * @brief The config structure.
438 *
439 * This structure will initially be populated by a call to astcenc_config_init, but power users may
440 * modify it before calling astcenc_context_alloc. See astcenccli_toplevel_help.cpp for full user
441 * documentation of the power-user settings.
442 *
443 * Note for any settings which are associated with a specific color component, the value in the
444 * config applies to the component that exists after any compression data swizzle is applied.
445 */
446struct astcenc_config
447{
448	/** @brief The color profile. */
449	astcenc_profile profile;
450
451	/** @brief The set of set flags. */
452	unsigned int flags;
453
454	/** @brief The ASTC block size X dimension. */
455	unsigned int block_x;
456
457	/** @brief The ASTC block size Y dimension. */
458	unsigned int block_y;
459
460	/** @brief The ASTC block size Z dimension. */
461	unsigned int block_z;
462
463	/** @brief The red component weight scale for error weighting (-cw). */
464	float cw_r_weight;
465
466	/** @brief The green component weight scale for error weighting (-cw). */
467	float cw_g_weight;
468
469	/** @brief The blue component weight scale for error weighting (-cw). */
470	float cw_b_weight;
471
472	/** @brief The alpha component weight scale for error weighting (-cw). */
473	float cw_a_weight;
474
475	/**
476	 * @brief The radius for any alpha-weight scaling (-a).
477	 *
478	 * It is recommended that this is set to 1 when using FLG_USE_ALPHA_WEIGHT on a texture that
479	 * will be sampled using linear texture filtering to minimize color bleed out of transparent
480	 * texels that are adjacent to non-transparent texels.
481	 */
482	unsigned int a_scale_radius;
483
484	/** @brief The RGBM scale factor for the shared multiplier (-rgbm). */
485	float rgbm_m_scale;
486
487	/**
488	 * @brief The maximum number of partitions searched (-partitioncountlimit).
489	 *
490	 * Valid values are between 1 and 4.
491	 */
492	unsigned int tune_partition_count_limit;
493
494	/**
495	 * @brief The maximum number of partitions searched (-2partitionindexlimit).
496	 *
497	 * Valid values are between 1 and 1024.
498	 */
499	unsigned int tune_2partition_index_limit;
500
501	/**
502	 * @brief The maximum number of partitions searched (-3partitionindexlimit).
503	 *
504	 * Valid values are between 1 and 1024.
505	 */
506	unsigned int tune_3partition_index_limit;
507
508	/**
509	 * @brief The maximum number of partitions searched (-4partitionindexlimit).
510	 *
511	 * Valid values are between 1 and 1024.
512	 */
513	unsigned int tune_4partition_index_limit;
514
515	/**
516	 * @brief The maximum centile for block modes searched (-blockmodelimit).
517	 *
518	 * Valid values are between 1 and 100.
519	 */
520	unsigned int tune_block_mode_limit;
521
522	/**
523	 * @brief The maximum iterative refinements applied (-refinementlimit).
524	 *
525	 * Valid values are between 1 and N; there is no technical upper limit
526	 * but little benefit is expected after N=4.
527	 */
528	unsigned int tune_refinement_limit;
529
530	/**
531	 * @brief The number of trial candidates per mode search (-candidatelimit).
532	 *
533	 * Valid values are between 1 and TUNE_MAX_TRIAL_CANDIDATES.
534	 */
535	unsigned int tune_candidate_limit;
536
537	/**
538	 * @brief The number of trial partitionings per search (-2partitioncandidatelimit).
539	 *
540	 * Valid values are between 1 and TUNE_MAX_PARTITIONING_CANDIDATES.
541	 */
542	unsigned int tune_2partitioning_candidate_limit;
543
544	/**
545	 * @brief The number of trial partitionings per search (-3partitioncandidatelimit).
546	 *
547	 * Valid values are between 1 and TUNE_MAX_PARTITIONING_CANDIDATES.
548	 */
549	unsigned int tune_3partitioning_candidate_limit;
550
551	/**
552	 * @brief The number of trial partitionings per search (-4partitioncandidatelimit).
553	 *
554	 * Valid values are between 1 and TUNE_MAX_PARTITIONING_CANDIDATES.
555	 */
556	unsigned int tune_4partitioning_candidate_limit;
557
558	/**
559	 * @brief The dB threshold for stopping block search (-dblimit).
560	 *
561	 * This option is ineffective for HDR textures.
562	 */
563	float tune_db_limit;
564
565	/**
566	 * @brief The amount of MSE overshoot needed to early-out trials.
567	 *
568	 * The first early-out is for 1 partition, 1 plane trials, where we try a minimal encode using
569	 * the high probability block modes. This can short-cut compression for simple blocks.
570	 *
571	 * The second early-out is for refinement trials, where we can exit refinement once quality is
572	 * reached.
573	 */
574	float tune_mse_overshoot;
575
576	/**
577	 * @brief The threshold for skipping 3.1/4.1 trials (-2partitionlimitfactor).
578	 *
579	 * This option is further scaled for normal maps, so it skips less often.
580	 */
581	float tune_2partition_early_out_limit_factor;
582
583	/**
584	 * @brief The threshold for skipping 4.1 trials (-3partitionlimitfactor).
585	 *
586	 * This option is further scaled for normal maps, so it skips less often.
587	 */
588	float tune_3partition_early_out_limit_factor;
589
590	/**
591	 * @brief The threshold for skipping two weight planes (-2planelimitcorrelation).
592	 *
593	 * This option is ineffective for normal maps.
594	 */
595	float tune_2plane_early_out_limit_correlation;
596
597	/**
598	 * @brief The config enable for the mode0 fast-path search.
599	 *
600	 * If this is set to TUNE_MIN_TEXELS_MODE0 or higher then the early-out fast mode0
601	 * search is enabled. This option is ineffective for 3D block sizes.
602	 */
603	float tune_search_mode0_enable;
604
605	/**
606	 * @brief The progress callback, can be @c nullptr.
607	 *
608	 * If this is specified the codec will peridocially report progress for
609	 * compression as a percentage between 0 and 100. The callback is called from one
610	 * of the compressor threads, so doing significant work in the callback will
611	 * reduce compression performance.
612	 */
613	astcenc_progress_callback progress_callback;
614
615#if defined(ASTCENC_DIAGNOSTICS)
616	/**
617	 * @brief The path to save the diagnostic trace data to.
618	 *
619	 * This option is not part of the public API, and requires special builds
620	 * of the library.
621	 */
622	const char* trace_file_path;
623#endif
624	QualityProfile privateProfile;
625};
626
627/**
628 * @brief An uncompressed 2D or 3D image.
629 *
630 * 3D image are passed in as an array of 2D slices. Each slice has identical
631 * size and color format.
632 */
633struct astcenc_image
634{
635	/** @brief The stride dimension of the image, in texels. */
636	unsigned int dim_stride;
637
638	/** @brief The X dimension of the image, in texels. */
639	unsigned int dim_x;
640
641	/** @brief The Y dimension of the image, in texels. */
642	unsigned int dim_y;
643
644	/** @brief The Z dimension of the image, in texels. */
645	unsigned int dim_z;
646
647	/** @brief The data type per component. */
648	astcenc_type data_type;
649
650	/** @brief The array of 2D slices, of length @c dim_z. */
651	void** data;
652};
653
654/**
655 * @brief A block encoding metadata query result.
656 *
657 * If the block is an error block or a constant color block or an error block all fields other than
658 * the profile, block dimensions, and error/constant indicator will be zero.
659 */
660struct astcenc_block_info
661{
662	/** @brief The block encoding color profile. */
663	astcenc_profile profile;
664
665	/** @brief The number of texels in the X dimension. */
666	unsigned int block_x;
667
668	/** @brief The number of texels in the Y dimension. */
669	unsigned int block_y;
670
671	/** @brief The number of texel in the Z dimension. */
672	unsigned int block_z;
673
674	/** @brief The number of texels in the block. */
675	unsigned int texel_count;
676
677	/** @brief True if this block is an error block. */
678	bool is_error_block;
679
680	/** @brief True if this block is a constant color block. */
681	bool is_constant_block;
682
683	/** @brief True if this block is an HDR block. */
684	bool is_hdr_block;
685
686	/** @brief True if this block uses two weight planes. */
687	bool is_dual_plane_block;
688
689	/** @brief The number of partitions if not constant color. */
690	unsigned int partition_count;
691
692	/** @brief The partition index if 2 - 4 partitions used. */
693	unsigned int partition_index;
694
695	/** @brief The component index of the second plane if dual plane. */
696	unsigned int dual_plane_component;
697
698	/** @brief The color endpoint encoding mode for each partition. */
699	unsigned int color_endpoint_modes[4];
700
701	/** @brief The number of color endpoint quantization levels. */
702	unsigned int color_level_count;
703
704	/** @brief The number of weight quantization levels. */
705	unsigned int weight_level_count;
706
707	/** @brief The number of weights in the X dimension. */
708	unsigned int weight_x;
709
710	/** @brief The number of weights in the Y dimension. */
711	unsigned int weight_y;
712
713	/** @brief The number of weights in the Z dimension. */
714	unsigned int weight_z;
715
716	/** @brief The unpacked color endpoints for each partition. */
717	float color_endpoints[4][2][4];
718
719	/** @brief The per-texel interpolation weights for the block. */
720	float weight_values_plane1[216];
721
722	/** @brief The per-texel interpolation weights for the block. */
723	float weight_values_plane2[216];
724
725	/** @brief The per-texel partition assignments for the block. */
726	uint8_t partition_assignment[216];
727};
728
729/**
730 * Populate a codec config based on default settings.
731 *
732 * Power users can edit the returned config struct to fine tune before allocating the context.
733 *
734 * @param      profile   Color profile.
735 * @param      block_x   ASTC block size X dimension.
736 * @param      block_y   ASTC block size Y dimension.
737 * @param      block_z   ASTC block size Z dimension.
738 * @param      quality   Search quality preset / effort level. Either an
739 *                       @c ASTCENC_PRE_* value, or a effort level between 0
740 *                       and 100. Performance is not linear between 0 and 100.
741
742 * @param      flags     A valid set of @c ASTCENC_FLG_* flag bits.
743 * @param[out] config    Output config struct to populate.
744 *
745 * @return @c ASTCENC_SUCCESS on success, or an error if the inputs are invalid
746 * either individually, or in combination.
747 */
748ASTCENC_PUBLIC astcenc_error astcenc_config_init(
749	astcenc_profile profile,
750	unsigned int block_x,
751	unsigned int block_y,
752	unsigned int block_z,
753	float quality,
754	unsigned int flags,
755	astcenc_config* config);
756
757/**
758 * @brief Allocate a new codec context based on a config.
759 *
760 * This function allocates all of the memory resources and threads needed by the codec. This can be
761 * slow, so it is recommended that contexts are reused to serially compress or decompress multiple
762 * images to amortize setup cost.
763 *
764 * Contexts can be allocated to support only decompression using the @c ASTCENC_FLG_DECOMPRESS_ONLY
765 * flag when creating the configuration. The compression functions will fail if invoked. For a
766 * decompress-only library build the @c ASTCENC_FLG_DECOMPRESS_ONLY flag must be set when creating
767 * any context.
768 *
769 * @param[in]  config         Codec config.
770 * @param      thread_count   Thread count to configure for.
771 * @param[out] context        Location to store an opaque context pointer.
772 *
773 * @return @c ASTCENC_SUCCESS on success, or an error if context creation failed.
774 */
775ASTCENC_PUBLIC astcenc_error astcenc_context_alloc(
776	const astcenc_config* config,
777	unsigned int thread_count,
778	astcenc_context** context);
779
780/**
781 * @brief Compress an image.
782 *
783 * A single context can only compress or decompress a single image at a time.
784 *
785 * For a context configured for multi-threading, any set of the N threads can call this function.
786 * Work will be dynamically scheduled across the threads available. Each thread must have a unique
787 * @c thread_index.
788 *
789 * @param         context        Codec context.
790 * @param[in,out] image          An input image, in 2D slices.
791 * @param         swizzle        Compression data swizzle, applied before compression.
792 * @param[out]    data_out       Pointer to output data array.
793 * @param         data_len       Length of the output data array.
794 * @param         thread_index   Thread index [0..N-1] of calling thread.
795 *
796 * @return @c ASTCENC_SUCCESS on success, or an error if compression failed.
797 */
798ASTCENC_PUBLIC astcenc_error astcenc_compress_image(
799	astcenc_context* context,
800	astcenc_image* image,
801	const astcenc_swizzle* swizzle,
802	uint8_t* data_out,
803	size_t data_len,
804#if QUALITY_CONTROL
805	bool calQualityEnable,
806	int32_t *mse[RGBA_COM],
807#endif
808	unsigned int thread_index);
809
810/**
811 * @brief Reset the codec state for a new compression.
812 *
813 * The caller is responsible for synchronizing threads in the worker thread pool. This function must
814 * only be called when all threads have exited the @c astcenc_compress_image() function for image N,
815 * but before any thread enters it for image N + 1.
816 *
817 * Calling this is not required (but won't hurt), if the context is created for single threaded use.
818 *
819 * @param context   Codec context.
820 *
821 * @return @c ASTCENC_SUCCESS on success, or an error if reset failed.
822 */
823ASTCENC_PUBLIC astcenc_error astcenc_compress_reset(
824	astcenc_context* context);
825
826/**
827 * @brief Decompress an image.
828 *
829 * @param         context        Codec context.
830 * @param[in]     data           Pointer to compressed data.
831 * @param         data_len       Length of the compressed data, in bytes.
832 * @param[in,out] image_out      Output image.
833 * @param         swizzle        Decompression data swizzle, applied after decompression.
834 * @param         thread_index   Thread index [0..N-1] of calling thread.
835 *
836 * @return @c ASTCENC_SUCCESS on success, or an error if decompression failed.
837 */
838ASTCENC_PUBLIC astcenc_error astcenc_decompress_image(
839	astcenc_context* context,
840	const uint8_t* data,
841	size_t data_len,
842	astcenc_image* image_out,
843	const astcenc_swizzle* swizzle,
844	unsigned int thread_index);
845
846/**
847 * @brief Reset the codec state for a new decompression.
848 *
849 * The caller is responsible for synchronizing threads in the worker thread pool. This function must
850 * only be called when all threads have exited the @c astcenc_decompress_image() function for image
851 * N, but before any thread enters it for image N + 1.
852 *
853 * Calling this is not required (but won't hurt), if the context is created for single threaded use.
854 *
855 * @param context   Codec context.
856 *
857 * @return @c ASTCENC_SUCCESS on success, or an error if reset failed.
858 */
859ASTCENC_PUBLIC astcenc_error astcenc_decompress_reset(
860	astcenc_context* context);
861
862/**
863 * Free the compressor context.
864 *
865 * @param context   The codec context.
866 */
867ASTCENC_PUBLIC void astcenc_context_free(
868	astcenc_context* context);
869
870/**
871 * @brief Provide a high level summary of a block's encoding.
872 *
873 * This feature is primarily useful for codec developers but may be useful for developers building
874 * advanced content packaging pipelines.
875 *
876 * @param context   Codec context.
877 * @param data      One block of compressed ASTC data.
878 * @param info      The output info structure to populate.
879 *
880 * @return @c ASTCENC_SUCCESS if the block was decoded, or an error otherwise. Note that this
881 *         function will return success even if the block itself was an error block encoding, as the
882 *         decode was correctly handled.
883 */
884ASTCENC_PUBLIC astcenc_error astcenc_get_block_info(
885	astcenc_context* context,
886	const uint8_t data[16],
887	astcenc_block_info* info);
888
889/**
890 * @brief Get a printable string for specific status code.
891 *
892 * @param status   The status value.
893 *
894 * @return A human readable nul-terminated string.
895 */
896ASTCENC_PUBLIC const char* astcenc_get_error_string(
897	astcenc_error status);
898
899#endif
900