1// SPDX-License-Identifier: Apache-2.0
2// ----------------------------------------------------------------------------
3// Copyright 2011-2024 Arm Limited
4//
5// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6// use this file except in compliance with the License. You may obtain a copy
7// of the License at:
8//
9//     http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14// License for the specific language governing permissions and limitations
15// under the License.
16// ----------------------------------------------------------------------------
17
18/**
19 * @brief Functions for codec library front-end.
20 */
21
22#include "astcenc.h"
23#include "astcenccli_internal.h"
24
25#if defined(_WIN32)
26	#include <io.h>
27	#define isatty _isatty
28#else
29	#include <unistd.h>
30#endif
31#include <cassert>
32#include <cstring>
33#include <functional>
34#include <string>
35#include <sstream>
36#include <vector>
37#include <memory>
38
39/* ============================================================================
40	Data structure definitions
41============================================================================ */
42
43typedef unsigned int astcenc_operation;
44
45struct mode_entry
46{
47	const char* opt;
48	astcenc_operation operation;
49	astcenc_profile decode_mode;
50};
51
52/* ============================================================================
53	Constants and literals
54============================================================================ */
55
56/** @brief Stage bit indicating we need to load a compressed image. */
57static const unsigned int ASTCENC_STAGE_LD_COMP    = 1 << 0;
58
59/** @brief Stage bit indicating we need to store a compressed image. */
60static const unsigned int ASTCENC_STAGE_ST_COMP    = 1 << 1;
61
62/** @brief Stage bit indicating we need to load an uncompressed image. */
63static const unsigned int ASTCENC_STAGE_LD_NCOMP   = 1 << 2;
64
65/** @brief Stage bit indicating we need to store an uncompressed image. */
66static const unsigned int ASTCENC_STAGE_ST_NCOMP   = 1 << 3;
67
68/** @brief Stage bit indicating we need compress an image. */
69static const unsigned int ASTCENC_STAGE_COMPRESS   = 1 << 4;
70
71/** @brief Stage bit indicating we need to decompress an image. */
72static const unsigned int ASTCENC_STAGE_DECOMPRESS = 1 << 5;
73
74/** @brief Stage bit indicating we need to compare an image with the original input. */
75static const unsigned int ASTCENC_STAGE_COMPARE    = 1 << 6;
76
77/** @brief Operation indicating an unknown request (should never happen). */
78static const astcenc_operation ASTCENC_OP_UNKNOWN  = 0;
79
80/** @brief Operation indicating the user wants to print long-form help text and version info. */
81static const astcenc_operation ASTCENC_OP_HELP     = 1 << 7;
82
83/** @brief Operation indicating the user wants to print short-form help text and version info. */
84static const astcenc_operation ASTCENC_OP_VERSION  = 1 << 8;
85
86/** @brief Operation indicating the user wants to compress and store an image. */
87static const astcenc_operation ASTCENC_OP_COMPRESS =
88                               ASTCENC_STAGE_LD_NCOMP |
89                               ASTCENC_STAGE_COMPRESS |
90                               ASTCENC_STAGE_ST_COMP;
91
92/** @brief Operation indicating the user wants to decompress and store an image. */
93static const astcenc_operation ASTCENC_OP_DECOMPRESS =
94                               ASTCENC_STAGE_LD_COMP |
95                               ASTCENC_STAGE_DECOMPRESS |
96                               ASTCENC_STAGE_ST_NCOMP;
97
98/** @brief Operation indicating the user wants to test a compression setting on an image. */
99static const astcenc_operation ASTCENC_OP_TEST =
100                               ASTCENC_STAGE_LD_NCOMP |
101                               ASTCENC_STAGE_COMPRESS |
102                               ASTCENC_STAGE_DECOMPRESS |
103                               ASTCENC_STAGE_COMPARE |
104                               ASTCENC_STAGE_ST_NCOMP;
105
106/**
107 * @brief Image preprocesing tasks prior to encoding.
108 */
109enum astcenc_preprocess
110{
111	/** @brief No image preprocessing. */
112	ASTCENC_PP_NONE = 0,
113	/** @brief Normal vector unit-length normalization. */
114	ASTCENC_PP_NORMALIZE,
115	/** @brief Color data alpha premultiplication. */
116	ASTCENC_PP_PREMULTIPLY
117};
118
119/** @brief Decode table for command line operation modes. */
120static const mode_entry modes[] {
121	{"-cl",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_LDR},
122	{"-dl",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_LDR},
123	{"-tl",      ASTCENC_OP_TEST,       ASTCENC_PRF_LDR},
124	{"-cs",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_LDR_SRGB},
125	{"-ds",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_LDR_SRGB},
126	{"-ts",      ASTCENC_OP_TEST,       ASTCENC_PRF_LDR_SRGB},
127	{"-ch",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_HDR_RGB_LDR_A},
128	{"-dh",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_HDR_RGB_LDR_A},
129	{"-th",      ASTCENC_OP_TEST,       ASTCENC_PRF_HDR_RGB_LDR_A},
130	{"-cH",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_HDR},
131	{"-dH",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_HDR},
132	{"-tH",      ASTCENC_OP_TEST,       ASTCENC_PRF_HDR},
133	{"-h",       ASTCENC_OP_HELP,       ASTCENC_PRF_HDR},
134	{"-help",    ASTCENC_OP_HELP,       ASTCENC_PRF_HDR},
135	{"-v",       ASTCENC_OP_VERSION,    ASTCENC_PRF_HDR},
136	{"-version", ASTCENC_OP_VERSION,    ASTCENC_PRF_HDR}
137};
138
139/**
140 * @brief Compression workload definition for worker threads.
141 */
142struct compression_workload
143{
144	astcenc_context* context;
145	astcenc_image* image;
146	astcenc_swizzle swizzle;
147	uint8_t* data_out;
148	size_t data_len;
149	astcenc_error error;
150#if QUALITY_CONTROL
151	bool calQualityEnable;
152	int32_t *mse[RGBA_COM];
153#endif
154};
155
156/**
157 * @brief Decompression workload definition for worker threads.
158 */
159struct decompression_workload
160{
161	astcenc_context* context;
162	uint8_t* data;
163	size_t data_len;
164	astcenc_image* image_out;
165	astcenc_swizzle swizzle;
166	astcenc_error error;
167};
168
169/**
170 * @brief Callback emitting a progress bar
171 */
172extern "C" void progress_emitter(
173	float value
174) {
175	const unsigned int bar_size = 25;
176	unsigned int parts = static_cast<int>(value / 4.0f);
177
178	char buffer[bar_size + 3];
179	buffer[0] = '[';
180
181	for (unsigned int i = 0; i < parts; i++)
182	{
183		buffer[i + 1] = '=';
184	}
185
186	for (unsigned int i = parts; i < bar_size; i++)
187	{
188		buffer[i + 1] = ' ';
189	}
190
191	buffer[bar_size + 1] = ']';
192	buffer[bar_size + 2] = '\0';
193
194	printf("    Progress: %s %03.1f%%\r", buffer, static_cast<double>(value));
195	fflush(stdout);
196}
197
198/**
199 * @brief Test if a string argument is a well formed float.
200 */
201static bool is_float(
202	std::string target
203) {
204	float test;
205	std::istringstream stream(target);
206
207	// Leading whitespace is an error
208	stream >> std::noskipws >> test;
209
210	// Ensure entire no remaining string in addition to parse failure
211	return stream.eof() && !stream.fail();
212}
213
214/**
215 * @brief Test if a string ends with a given suffix.
216 */
217static bool ends_with(
218	const std::string& str,
219	const std::string& suffix
220) {
221	return (str.size() >= suffix.size()) &&
222	       (0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix));
223}
224
225/**
226 * @brief Runner callback function for a compression worker thread.
227 *
228 * @param thread_count   The number of threads in the worker pool.
229 * @param thread_id      The index of this thread in the worker pool.
230 * @param payload        The parameters for this thread.
231 */
232static void compression_workload_runner(
233	int thread_count,
234	int thread_id,
235	void* payload
236) {
237	(void)thread_count;
238
239	compression_workload* work = static_cast<compression_workload*>(payload);
240	astcenc_error error = astcenc_compress_image(
241	                       work->context, work->image, &work->swizzle,
242	                       work->data_out, work->data_len,
243#if QUALITY_CONTROL
244	                       work->calQualityEnable, work->mse,
245#endif
246	                       thread_id);
247
248	// This is a racy update, so which error gets returned is a random, but it
249	// will reliably report an error if an error occurs
250	if (error != ASTCENC_SUCCESS)
251	{
252		work->error = error;
253	}
254}
255
256/**
257 * @brief Runner callback function for a decompression worker thread.
258 *
259 * @param thread_count   The number of threads in the worker pool.
260 * @param thread_id      The index of this thread in the worker pool.
261 * @param payload        The parameters for this thread.
262 */
263static void decompression_workload_runner(
264	int thread_count,
265	int thread_id,
266	void* payload
267) {
268	(void)thread_count;
269
270	decompression_workload* work = static_cast<decompression_workload*>(payload);
271	astcenc_error error = astcenc_decompress_image(
272	                       work->context, work->data, work->data_len,
273	                       work->image_out, &work->swizzle, thread_id);
274
275	// This is a racy update, so which error gets returned is a random, but it
276	// will reliably report an error if an error occurs
277	if (error != ASTCENC_SUCCESS)
278	{
279		work->error = error;
280	}
281}
282
283/**
284 * @brief Utility to generate a slice file name from a pattern.
285 *
286 * Convert "foo/bar.png" in to "foo/bar_<slice>.png"
287 *
288 * @param basename The base pattern; must contain a file extension.
289 * @param index    The slice index.
290 * @param error    Set to true on success, false on error (no extension found).
291 *
292 * @return The slice file name.
293 */
294static std::string get_slice_filename(
295	const std::string& basename,
296	unsigned int index,
297	bool& error
298) {
299	size_t sep = basename.find_last_of('.');
300	if (sep == std::string::npos)
301	{
302		error = true;
303		return "";
304	}
305
306	std::string base = basename.substr(0, sep);
307	std::string ext = basename.substr(sep);
308	std::string name = base + "_" + std::to_string(index) + ext;
309	error = false;
310	return name;
311}
312
313/**
314 * @brief Load a non-astc image file from memory.
315 *
316 * @param filename            The file to load, or a pattern for array loads.
317 * @param dim_z               The number of slices to load.
318 * @param y_flip              Should this image be Y flipped?
319 * @param[out] is_hdr         Is the loaded image HDR?
320 * @param[out] component_count The number of components in the loaded image.
321 *
322 * @return The astc image file, or nullptr on error.
323 */
324static astcenc_image* load_uncomp_file(
325	const char* filename,
326	unsigned int dim_z,
327	bool y_flip,
328	bool& is_hdr,
329	unsigned int& component_count
330) {
331	astcenc_image *image = nullptr;
332
333	// For a 2D image just load the image directly
334	if (dim_z == 1)
335	{
336		image = load_ncimage(filename, y_flip, is_hdr, component_count);
337	}
338	else
339	{
340		bool slice_is_hdr;
341		unsigned int slice_component_count;
342		astcenc_image* slice = nullptr;
343		std::vector<astcenc_image*> slices;
344
345		// For a 3D image load an array of slices
346		for (unsigned int image_index = 0; image_index < dim_z; image_index++)
347		{
348			bool error;
349			std::string slice_name = get_slice_filename(filename, image_index, error);
350			if (error)
351			{
352				print_error("ERROR: Image pattern does not contain file extension: %s\n", filename);
353				break;
354			}
355
356			slice = load_ncimage(slice_name.c_str(), y_flip,
357			                     slice_is_hdr, slice_component_count);
358			if (!slice)
359			{
360				break;
361			}
362
363			slices.push_back(slice);
364
365			// Check it is not a 3D image
366			if (slice->dim_z != 1)
367			{
368				print_error("ERROR: Image arrays do not support 3D sources: %s\n", slice_name.c_str());
369				break;
370			}
371
372			// Check slices are consistent with each other
373			if (image_index != 0)
374			{
375				if ((is_hdr != slice_is_hdr) || (component_count != slice_component_count))
376				{
377					print_error("ERROR: Image array[0] and [%d] are different formats\n", image_index);
378					break;
379				}
380
381				if ((slices[0]->dim_x != slice->dim_x) ||
382				    (slices[0]->dim_y != slice->dim_y) ||
383				    (slices[0]->dim_z != slice->dim_z))
384				{
385					print_error("ERROR: Image array[0] and [%d] are different dimensions\n", image_index);
386					break;
387				}
388			}
389			else
390			{
391				is_hdr = slice_is_hdr;
392				component_count = slice_component_count;
393			}
394		}
395
396		// If all slices loaded correctly then repack them into a single image
397		if (slices.size() == dim_z)
398		{
399			unsigned int dim_x = slices[0]->dim_x;
400			unsigned int dim_y = slices[0]->dim_y;
401			int bitness = is_hdr ? 16 : 8;
402			int slice_size = dim_x * dim_y;
403
404			image = alloc_image(bitness, dim_x, dim_y, dim_z);
405
406			// Combine 2D source images into one 3D image
407			for (unsigned int z = 0; z < dim_z; z++)
408			{
409				if (image->data_type == ASTCENC_TYPE_U8)
410				{
411					uint8_t* data8 = static_cast<uint8_t*>(image->data[z]);
412					uint8_t* data8src = static_cast<uint8_t*>(slices[z]->data[0]);
413					size_t copy_size = slice_size * 4 * sizeof(uint8_t);
414					memcpy(data8, data8src, copy_size);
415				}
416				else if (image->data_type == ASTCENC_TYPE_F16)
417				{
418					uint16_t* data16 = static_cast<uint16_t*>(image->data[z]);
419					uint16_t* data16src = static_cast<uint16_t*>(slices[z]->data[0]);
420					size_t copy_size = slice_size * 4 * sizeof(uint16_t);
421					memcpy(data16, data16src, copy_size);
422				}
423				else // if (image->data_type == ASTCENC_TYPE_F32)
424				{
425					assert(image->data_type == ASTCENC_TYPE_F32);
426					float* data32 = static_cast<float*>(image->data[z]);
427					float* data32src = static_cast<float*>(slices[z]->data[0]);
428					size_t copy_size = slice_size * 4 * sizeof(float);
429					memcpy(data32, data32src, copy_size);
430				}
431			}
432		}
433
434		for (auto &i : slices)
435		{
436			free_image(i);
437		}
438	}
439
440	return image;
441}
442
443/**
444 * @brief Parse the command line.
445 *
446 * @param      argc        Command line argument count.
447 * @param[in]  argv        Command line argument vector.
448 * @param[out] operation   Codec operation mode.
449 * @param[out] profile     Codec color profile.
450 *
451 * @return 0 if everything is okay, 1 if there is some error
452 */
453static int parse_commandline_options(
454	int argc,
455	char **argv,
456	astcenc_operation& operation,
457	astcenc_profile& profile
458) {
459	assert(argc >= 2); (void)argc;
460
461	profile = ASTCENC_PRF_LDR;
462	operation = ASTCENC_OP_UNKNOWN;
463
464	int modes_count = sizeof(modes) / sizeof(modes[0]);
465	for (int i = 0; i < modes_count; i++)
466	{
467		if (!strcmp(modes[i].opt, argv[1]))
468		{
469			operation = modes[i].operation;
470			profile = modes[i].decode_mode;
471			break;
472		}
473	}
474
475	if (operation == ASTCENC_OP_UNKNOWN)
476	{
477		print_error("ERROR: Unrecognized operation '%s'\n", argv[1]);
478		return 1;
479	}
480
481	return 0;
482}
483
484/**
485 * @brief Initialize the astcenc_config
486 *
487 * @param      argc         Command line argument count.
488 * @param[in]  argv         Command line argument vector.
489 * @param      operation    Codec operation mode.
490 * @param[out] profile      Codec color profile.
491 * @param      comp_image   Compressed image if a decompress operation.
492 * @param[out] preprocess   Image preprocess operation.
493 * @param[out] config       Codec configuration.
494 *
495 * @return 0 if everything is okay, 1 if there is some error
496 */
497static int init_astcenc_config(
498	int argc,
499	char **argv,
500	astcenc_profile profile,
501	astcenc_operation operation,
502	astc_compressed_image& comp_image,
503	astcenc_preprocess& preprocess,
504	astcenc_config& config
505) {
506	unsigned int block_x = 0;
507	unsigned int block_y = 0;
508	unsigned int block_z = 1;
509
510	// For decode the block size is set by the incoming image.
511	if (operation == ASTCENC_OP_DECOMPRESS)
512	{
513		block_x = comp_image.block_x;
514		block_y = comp_image.block_y;
515		block_z = comp_image.block_z;
516	}
517
518	float quality = 0.0f;
519	preprocess = ASTCENC_PP_NONE;
520
521	// parse the command line's encoding options.
522	int argidx = 4;
523	if (operation & ASTCENC_STAGE_COMPRESS)
524	{
525		// Read and decode block size
526		if (argc < 5)
527		{
528			print_error("ERROR: Block size must be specified\n");
529			return 1;
530		}
531
532		int cnt2D, cnt3D;
533		int dimensions = sscanf(argv[4], "%ux%u%nx%u%n",
534		                        &block_x, &block_y, &cnt2D, &block_z, &cnt3D);
535		// Character after the last match should be a NUL
536		if (!(((dimensions == 2) && !argv[4][cnt2D]) || ((dimensions == 3) && !argv[4][cnt3D])))
537		{
538			print_error("ERROR: Block size '%s' is invalid\n", argv[4]);
539			return 1;
540		}
541
542		// Read and decode search quality
543		if (argc < 6)
544		{
545			print_error("ERROR: Search quality level must be specified\n");
546			return 1;
547		}
548
549		if (!strcmp(argv[5], "-fastest"))
550		{
551			quality = ASTCENC_PRE_FASTEST;
552		}
553		else if (!strcmp(argv[5], "-fast"))
554		{
555			quality = ASTCENC_PRE_FAST;
556		}
557		else if (!strcmp(argv[5], "-medium"))
558		{
559			quality = ASTCENC_PRE_MEDIUM;
560		}
561		else if (!strcmp(argv[5], "-thorough"))
562		{
563			quality = ASTCENC_PRE_THOROUGH;
564		}
565		else if (!strcmp(argv[5], "-verythorough"))
566		{
567			quality = ASTCENC_PRE_VERYTHOROUGH;
568		}
569		else if (!strcmp(argv[5], "-exhaustive"))
570		{
571			quality = ASTCENC_PRE_EXHAUSTIVE;
572		}
573		else if (is_float(argv[5]))
574		{
575			quality = static_cast<float>(atof(argv[5]));
576		}
577		else
578		{
579			print_error("ERROR: Search quality/preset '%s' is invalid\n", argv[5]);
580			return 1;
581		}
582
583		argidx = 6;
584	}
585
586	unsigned int flags = 0;
587
588	// Gather the flags that we need
589	while (argidx < argc)
590	{
591		if (!strcmp(argv[argidx], "-a"))
592		{
593			// Skip over the data value for now
594			argidx++;
595			flags |= ASTCENC_FLG_USE_ALPHA_WEIGHT;
596		}
597		else if (!strcmp(argv[argidx], "-normal"))
598		{
599			flags |= ASTCENC_FLG_MAP_NORMAL;
600		}
601		else if (!strcmp(argv[argidx], "-decode_unorm8"))
602		{
603			flags |= ASTCENC_FLG_USE_DECODE_UNORM8;
604		}
605		else if (!strcmp(argv[argidx], "-rgbm"))
606		{
607			// Skip over the data value for now
608			argidx++;
609			flags |= ASTCENC_FLG_MAP_RGBM;
610		}
611		else if (!strcmp(argv[argidx], "-perceptual"))
612		{
613			flags |= ASTCENC_FLG_USE_PERCEPTUAL;
614		}
615		else if (!strcmp(argv[argidx], "-pp-normalize"))
616		{
617			if (preprocess != ASTCENC_PP_NONE)
618			{
619				print_error("ERROR: Only a single image preprocess can be used\n");
620				return 1;
621			}
622			preprocess = ASTCENC_PP_NORMALIZE;
623		}
624		else if (!strcmp(argv[argidx], "-pp-premultiply"))
625		{
626			if (preprocess != ASTCENC_PP_NONE)
627			{
628				print_error("ERROR: Only a single image preprocess can be used\n");
629				return 1;
630			}
631			preprocess = ASTCENC_PP_PREMULTIPLY;
632		}
633		argidx ++;
634	}
635
636#if defined(ASTCENC_DECOMPRESS_ONLY)
637	flags |= ASTCENC_FLG_DECOMPRESS_ONLY;
638#else
639	// Decompression can skip some memory allocation, but need full tables
640	if (operation == ASTCENC_OP_DECOMPRESS)
641	{
642		flags |= ASTCENC_FLG_DECOMPRESS_ONLY;
643	}
644	// Compression and test passes can skip some decimation initialization
645	// as we know we are decompressing images that were compressed using the
646	// same settings and heuristics ...
647	else
648	{
649		flags |= ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
650	}
651#endif
652
653	astcenc_error status = astcenc_config_init(profile, block_x, block_y, block_z,
654	                                           quality, flags, &config);
655	if (status == ASTCENC_ERR_BAD_BLOCK_SIZE)
656	{
657		print_error("ERROR: Block size '%s' is invalid\n", argv[4]);
658		return 1;
659	}
660	else if (status == ASTCENC_ERR_BAD_DECODE_MODE)
661	{
662		print_error("ERROR: Decode_unorm8 is not supported by HDR profiles\n", argv[4]);
663		return 1;
664	}
665	else if (status == ASTCENC_ERR_BAD_CPU_FLOAT)
666	{
667		print_error("ERROR: astcenc must not be compiled with -ffast-math\n");
668		return 1;
669	}
670	else if (status != ASTCENC_SUCCESS)
671	{
672		print_error("ERROR: Init config failed with %s\n", astcenc_get_error_string(status));
673		return 1;
674	}
675
676	return 0;
677}
678
679/**
680 * @brief Edit the astcenc_config
681 *
682 * @param         argc         Command line argument count.
683 * @param[in]     argv         Command line argument vector.
684 * @param         operation    Codec operation.
685 * @param[out]    cli_config   Command line config.
686 * @param[in,out] config       Codec configuration.
687 *
688 * @return 0 if everything is OK, 1 if there is some error
689 */
690static int edit_astcenc_config(
691	int argc,
692	char **argv,
693	const astcenc_operation operation,
694	cli_config_options& cli_config,
695	astcenc_config& config
696) {
697
698	int argidx = (operation & ASTCENC_STAGE_COMPRESS) ? 6 : 4;
699	config.privateProfile = HIGH_QUALITY_PROFILE;
700	while (argidx < argc)
701	{
702		if (!strcmp(argv[argidx], "-silent"))
703		{
704			argidx++;
705			cli_config.silentmode = 1;
706		}
707		else if (!strcmp(argv[argidx], "-cw"))
708		{
709			argidx += 5;
710			if (argidx > argc)
711			{
712				print_error("ERROR: -cw switch with less than 4 arguments\n");
713				return 1;
714			}
715
716			config.cw_r_weight = static_cast<float>(atof(argv[argidx - 4]));
717			config.cw_g_weight = static_cast<float>(atof(argv[argidx - 3]));
718			config.cw_b_weight = static_cast<float>(atof(argv[argidx - 2]));
719			config.cw_a_weight = static_cast<float>(atof(argv[argidx - 1]));
720		}
721		else if (!strcmp(argv[argidx], "-a"))
722		{
723			argidx += 2;
724			if (argidx > argc)
725			{
726				print_error("ERROR: -a switch with no argument\n");
727				return 1;
728			}
729
730			config.a_scale_radius = atoi(argv[argidx - 1]);
731		}
732		else if (!strcmp(argv[argidx], "-esw"))
733		{
734			argidx += 2;
735			if (argidx > argc)
736			{
737				print_error("ERROR: -esw switch with no argument\n");
738				return 1;
739			}
740
741			if (strlen(argv[argidx - 1]) != 4)
742			{
743				print_error("ERROR: -esw pattern does not contain 4 characters\n");
744				return 1;
745			}
746
747			astcenc_swz swizzle_components[4];
748			for (int i = 0; i < 4; i++)
749			{
750				switch (argv[argidx - 1][i])
751				{
752				case 'r':
753					swizzle_components[i] = ASTCENC_SWZ_R;
754					break;
755				case 'g':
756					swizzle_components[i] = ASTCENC_SWZ_G;
757					break;
758				case 'b':
759					swizzle_components[i] = ASTCENC_SWZ_B;
760					break;
761				case 'a':
762					swizzle_components[i] = ASTCENC_SWZ_A;
763					break;
764				case '0':
765					swizzle_components[i] = ASTCENC_SWZ_0;
766					break;
767				case '1':
768					swizzle_components[i] = ASTCENC_SWZ_1;
769					break;
770				default:
771					print_error("ERROR: -esw component '%c' is not valid\n", argv[argidx - 1][i]);
772					return 1;
773				}
774			}
775
776			cli_config.swz_encode.r = swizzle_components[0];
777			cli_config.swz_encode.g = swizzle_components[1];
778			cli_config.swz_encode.b = swizzle_components[2];
779			cli_config.swz_encode.a = swizzle_components[3];
780		}
781		else if (!strcmp(argv[argidx], "-ssw"))
782		{
783			argidx += 2;
784			if (argidx > argc)
785			{
786				print_error("ERROR: -ssw switch with no argument\n");
787				return 1;
788			}
789
790			size_t char_count = strlen(argv[argidx - 1]);
791			if (char_count == 0)
792			{
793				print_error("ERROR: -ssw pattern contains no characters\n");
794				return 1;
795			}
796
797			if (char_count > 4)
798			{
799				print_error("ERROR: -ssw pattern contains more than 4 characters\n");
800				return 1;
801			}
802
803			bool found_r = false;
804			bool found_g = false;
805			bool found_b = false;
806			bool found_a = false;
807
808			for (size_t i = 0; i < char_count; i++)
809			{
810				switch (argv[argidx - 1][i])
811				{
812				case 'r':
813					found_r = true;
814					break;
815				case 'g':
816					found_g = true;
817					break;
818				case 'b':
819					found_b = true;
820					break;
821				case 'a':
822					found_a = true;
823					break;
824				default:
825					print_error("ERROR: -ssw component '%c' is not valid\n", argv[argidx - 1][i]);
826					return 1;
827				}
828			}
829
830			config.cw_r_weight = found_r ? 1.0f : 0.0f;
831			config.cw_g_weight = found_g ? 1.0f : 0.0f;
832			config.cw_b_weight = found_b ? 1.0f : 0.0f;
833			config.cw_a_weight = found_a ? 1.0f : 0.0f;
834		}
835		else if (!strcmp(argv[argidx], "-dsw"))
836		{
837			argidx += 2;
838			if (argidx > argc)
839			{
840				print_error("ERROR: -dsw switch with no argument\n");
841				return 1;
842			}
843
844			if (strlen(argv[argidx - 1]) != 4)
845			{
846				print_error("ERROR: -dsw switch does not contain 4 characters\n");
847				return 1;
848			}
849
850			astcenc_swz swizzle_components[4];
851			for (int i = 0; i < 4; i++)
852			{
853				switch (argv[argidx - 1][i])
854				{
855				case 'r':
856					swizzle_components[i] = ASTCENC_SWZ_R;
857					break;
858				case 'g':
859					swizzle_components[i] = ASTCENC_SWZ_G;
860					break;
861				case 'b':
862					swizzle_components[i] = ASTCENC_SWZ_B;
863					break;
864				case 'a':
865					swizzle_components[i] = ASTCENC_SWZ_A;
866					break;
867				case '0':
868					swizzle_components[i] = ASTCENC_SWZ_0;
869					break;
870				case '1':
871					swizzle_components[i] = ASTCENC_SWZ_1;
872					break;
873				case 'z':
874					swizzle_components[i] =  ASTCENC_SWZ_Z;
875					break;
876				default:
877					print_error("ERROR: ERROR: -dsw component '%c' is not valid\n", argv[argidx - 1][i]);
878					return 1;
879				}
880			}
881
882			cli_config.swz_decode.r = swizzle_components[0];
883			cli_config.swz_decode.g = swizzle_components[1];
884			cli_config.swz_decode.b = swizzle_components[2];
885			cli_config.swz_decode.a = swizzle_components[3];
886		}
887		// presets begin here
888		else if (!strcmp(argv[argidx], "-normal"))
889		{
890			argidx++;
891
892			cli_config.swz_encode.r = ASTCENC_SWZ_R;
893			cli_config.swz_encode.g = ASTCENC_SWZ_R;
894			cli_config.swz_encode.b = ASTCENC_SWZ_R;
895			cli_config.swz_encode.a = ASTCENC_SWZ_G;
896
897			cli_config.swz_decode.r = ASTCENC_SWZ_R;
898			cli_config.swz_decode.g = ASTCENC_SWZ_A;
899			cli_config.swz_decode.b = ASTCENC_SWZ_Z;
900			cli_config.swz_decode.a = ASTCENC_SWZ_1;
901		}
902		else if (!strcmp(argv[argidx], "-rgbm"))
903		{
904			argidx += 2;
905			if (argidx > argc)
906			{
907				print_error("ERROR: -rgbm switch with no argument\n");
908				return 1;
909			}
910
911			config.rgbm_m_scale = static_cast<float>(atof(argv[argidx - 1]));
912			config.cw_a_weight = 2.0f * config.rgbm_m_scale;
913		}
914		else if (!strcmp(argv[argidx], "-decode_unorm8"))
915		{
916			argidx++;
917		}
918		else if (!strcmp(argv[argidx], "-perceptual"))
919		{
920			argidx++;
921		}
922		else if (!strcmp(argv[argidx], "-pp-normalize"))
923		{
924			argidx++;
925		}
926		else if (!strcmp(argv[argidx], "-pp-premultiply"))
927		{
928			argidx++;
929		}
930		else if (!strcmp(argv[argidx], "-blockmodelimit"))
931		{
932			argidx += 2;
933			if (argidx > argc)
934			{
935				print_error("ERROR: -blockmodelimit switch with no argument\n");
936				return 1;
937			}
938
939			config.tune_block_mode_limit = atoi(argv[argidx - 1]);
940		}
941		else if (!strcmp(argv[argidx], "-partitioncountlimit"))
942		{
943			argidx += 2;
944			if (argidx > argc)
945			{
946				print_error("ERROR: -partitioncountlimit switch with no argument\n");
947				return 1;
948			}
949
950			config.tune_partition_count_limit = atoi(argv[argidx - 1]);
951		}
952		else if (!strcmp(argv[argidx], "-2partitionindexlimit"))
953		{
954			argidx += 2;
955			if (argidx > argc)
956			{
957				print_error("ERROR: -2partitionindexlimit switch with no argument\n");
958				return 1;
959			}
960
961			config.tune_2partition_index_limit = atoi(argv[argidx - 1]);
962		}
963		else if (!strcmp(argv[argidx], "-3partitionindexlimit"))
964		{
965			argidx += 2;
966			if (argidx > argc)
967			{
968				print_error("ERROR: -3partitionindexlimit switch with no argument\n");
969				return 1;
970			}
971
972			config.tune_3partition_index_limit = atoi(argv[argidx - 1]);
973		}
974		else if (!strcmp(argv[argidx], "-4partitionindexlimit"))
975		{
976			argidx += 2;
977			if (argidx > argc)
978			{
979				print_error("ERROR: -4partitionindexlimit switch with no argument\n");
980				return 1;
981			}
982
983			config.tune_4partition_index_limit = atoi(argv[argidx - 1]);
984		}
985		else if (!strcmp(argv[argidx], "-2partitioncandidatelimit"))
986		{
987			argidx += 2;
988			if (argidx > argc)
989			{
990				print_error("ERROR: -2partitioncandidatelimit switch with no argument\n");
991				return 1;
992			}
993
994			config.tune_2partitioning_candidate_limit = atoi(argv[argidx - 1]);
995		}
996		else if (!strcmp(argv[argidx], "-3partitioncandidatelimit"))
997		{
998			argidx += 2;
999			if (argidx > argc)
1000			{
1001				print_error("ERROR: -3partitioncandidatelimit switch with no argument\n");
1002				return 1;
1003			}
1004
1005			config.tune_3partitioning_candidate_limit = atoi(argv[argidx - 1]);
1006		}
1007		else if (!strcmp(argv[argidx], "-4partitioncandidatelimit"))
1008		{
1009			argidx += 2;
1010			if (argidx > argc)
1011			{
1012				print_error("ERROR: -4partitioncandidatelimit switch with no argument\n");
1013				return 1;
1014			}
1015
1016			config.tune_4partitioning_candidate_limit = atoi(argv[argidx - 1]);
1017		}
1018		else if (!strcmp(argv[argidx], "-dblimit"))
1019		{
1020			argidx += 2;
1021			if (argidx > argc)
1022			{
1023				print_error("ERROR: -dblimit switch with no argument\n");
1024				return 1;
1025			}
1026
1027			if ((config.profile == ASTCENC_PRF_LDR) || (config.profile == ASTCENC_PRF_LDR_SRGB))
1028			{
1029				config.tune_db_limit = static_cast<float>(atof(argv[argidx - 1]));
1030			}
1031		}
1032		else if (!strcmp(argv[argidx], "-2partitionlimitfactor"))
1033		{
1034			argidx += 2;
1035			if (argidx > argc)
1036			{
1037				print_error("ERROR: -2partitionlimitfactor switch with no argument\n");
1038				return 1;
1039			}
1040
1041			config.tune_2partition_early_out_limit_factor = static_cast<float>(atof(argv[argidx - 1]));
1042		}
1043		else if (!strcmp(argv[argidx], "-3partitionlimitfactor"))
1044		{
1045			argidx += 2;
1046			if (argidx > argc)
1047			{
1048				print_error("ERROR: -3partitionlimitfactor switch with no argument\n");
1049				return 1;
1050			}
1051
1052			config.tune_3partition_early_out_limit_factor = static_cast<float>(atof(argv[argidx - 1]));
1053		}
1054		else if (!strcmp(argv[argidx], "-2planelimitcorrelation"))
1055		{
1056			argidx += 2;
1057			if (argidx > argc)
1058			{
1059				print_error("ERROR: -2planelimitcorrelation switch with no argument\n");
1060				return 1;
1061			}
1062
1063			config.tune_2plane_early_out_limit_correlation = static_cast<float>(atof(argv[argidx - 1]));
1064		}
1065		else if (!strcmp(argv[argidx], "-refinementlimit"))
1066		{
1067			argidx += 2;
1068			if (argidx > argc)
1069			{
1070				print_error("ERROR: -refinementlimit switch with no argument\n");
1071				return 1;
1072			}
1073
1074			config.tune_refinement_limit = atoi(argv[argidx - 1]);
1075		}
1076		else if (!strcmp(argv[argidx], "-candidatelimit"))
1077		{
1078			argidx += 2;
1079			if (argidx > argc)
1080			{
1081				print_error("ERROR: -candidatelimit switch with no argument\n");
1082				return 1;
1083			}
1084
1085			config.tune_candidate_limit = atoi(argv[argidx - 1]);
1086		}
1087		else if (!strcmp(argv[argidx], "-j"))
1088		{
1089			argidx += 2;
1090			if (argidx > argc)
1091			{
1092				print_error("ERROR: -j switch with no argument\n");
1093				return 1;
1094			}
1095
1096			cli_config.thread_count = atoi(argv[argidx - 1]);
1097		}
1098		else if (!strcmp(argv[argidx], "-repeats"))
1099		{
1100			argidx += 2;
1101			if (argidx > argc)
1102			{
1103				print_error("ERROR: -repeats switch with no argument\n");
1104				return 1;
1105			}
1106
1107			cli_config.repeat_count = atoi(argv[argidx - 1]);
1108			if (cli_config.repeat_count <= 0)
1109			{
1110				print_error("ERROR: -repeats value must be at least one\n");
1111				return 1;
1112			}
1113		}
1114		else if (!strcmp(argv[argidx], "-yflip"))
1115		{
1116			argidx++;
1117			cli_config.y_flip = 1;
1118		}
1119		else if (!strcmp(argv[argidx], "-mpsnr"))
1120		{
1121			argidx += 3;
1122			if (argidx > argc)
1123			{
1124				print_error("ERROR: -mpsnr switch with less than 2 arguments\n");
1125				return 1;
1126			}
1127
1128			cli_config.low_fstop = atoi(argv[argidx - 2]);
1129			cli_config.high_fstop = atoi(argv[argidx - 1]);
1130			if (cli_config.high_fstop < cli_config.low_fstop)
1131			{
1132				print_error("ERROR: -mpsnr switch <low> is greater than the <high>\n");
1133				return 1;
1134			}
1135		}
1136		// Option: Encode a 3D image from a sequence of 2D images.
1137		else if (!strcmp(argv[argidx], "-zdim"))
1138		{
1139			// Only supports compressing
1140			if (!(operation & ASTCENC_STAGE_COMPRESS))
1141			{
1142				print_error("ERROR: -zdim switch is only valid for compression\n");
1143				return 1;
1144			}
1145
1146			// Image depth must be specified.
1147			if (argidx + 2 > argc)
1148			{
1149				print_error("ERROR: -zdim switch with no argument\n");
1150				return 1;
1151			}
1152			argidx++;
1153
1154			// Read array size (image depth).
1155			if (!sscanf(argv[argidx], "%u", &cli_config.array_size) || cli_config.array_size == 0)
1156			{
1157				print_error("ERROR: -zdim size '%s' is invalid\n", argv[argidx]);
1158				return 1;
1159			}
1160
1161			if ((cli_config.array_size > 1) && (config.block_z == 1))
1162			{
1163				print_error("ERROR: -zdim with 3D input data for a 2D output format\n");
1164				return 1;
1165			}
1166			argidx++;
1167		}
1168#if defined(ASTCENC_DIAGNOSTICS)
1169		else if (!strcmp(argv[argidx], "-dtrace"))
1170		{
1171			argidx += 2;
1172			if (argidx > argc)
1173			{
1174				print_error("ERROR: -dtrace switch with no argument\n");
1175				return 1;
1176			}
1177
1178			config.trace_file_path = argv[argidx - 1];
1179		}
1180#endif
1181		else if (!strcmp(argv[argidx], "-privateProfile"))
1182		{
1183			argidx += 2; // skip 2 chatacters to get next parameter
1184			config.privateProfile = static_cast<QualityProfile>(atoi(argv[argidx - 1]));
1185		}
1186		else if (!strcmp(argv[argidx], "-dimage"))
1187		{
1188			argidx += 1;
1189			cli_config.diagnostic_images = true;
1190		}
1191		else // check others as well
1192		{
1193			print_error("ERROR: Argument '%s' not recognized\n", argv[argidx]);
1194			return 1;
1195		}
1196	}
1197
1198	if (cli_config.thread_count <= 0)
1199	{
1200		cli_config.thread_count = get_cpu_count();
1201	}
1202
1203#if defined(ASTCENC_DIAGNOSTICS)
1204	// Force single threaded for diagnostic builds
1205	cli_config.thread_count = 1;
1206
1207	if (!config.trace_file_path)
1208	{
1209		print_error("ERROR: Diagnostics builds must set -dtrace\n");
1210		return 1;
1211	}
1212#endif
1213
1214	return 0;
1215}
1216
1217/**
1218 * @brief Print the config settings in a human readable form.
1219 *
1220 * @param[in] cli_config   Command line config.
1221 * @param[in] config       Codec configuration.
1222 */
1223static void print_astcenc_config(
1224	const cli_config_options& cli_config,
1225	const astcenc_config& config
1226) {
1227	// Print all encoding settings unless specifically told otherwise
1228	if (!cli_config.silentmode)
1229	{
1230		printf("Compressor settings\n");
1231		printf("===================\n\n");
1232
1233		switch (config.profile)
1234		{
1235		case ASTCENC_PRF_LDR:
1236			printf("    Color profile:              LDR linear\n");
1237			break;
1238		case ASTCENC_PRF_LDR_SRGB:
1239			printf("    Color profile:              LDR sRGB\n");
1240			break;
1241		case ASTCENC_PRF_HDR_RGB_LDR_A:
1242			printf("    Color profile:              HDR RGB + LDR A\n");
1243			break;
1244		case ASTCENC_PRF_HDR:
1245			printf("    Color profile:              HDR RGBA\n");
1246			break;
1247		}
1248
1249		if (config.block_z == 1)
1250		{
1251			printf("    Block size:                 %ux%u\n", config.block_x, config.block_y);
1252		}
1253		else
1254		{
1255			printf("    Block size:                 %ux%ux%u\n", config.block_x, config.block_y, config.block_z);
1256		}
1257
1258		printf("    Bitrate:                    %3.2f bpp\n", 128.0 / (config.block_x * config.block_y * config.block_z));
1259		printf("    RGB alpha scale weight:     %d\n", (config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT));
1260		if ((config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT))
1261		{
1262			printf("    Radius RGB alpha scale:     %u texels\n", config.a_scale_radius);
1263		}
1264
1265		printf("    R component weight:         %g\n", static_cast<double>(config.cw_r_weight));
1266		printf("    G component weight:         %g\n", static_cast<double>(config.cw_g_weight));
1267		printf("    B component weight:         %g\n", static_cast<double>(config.cw_b_weight));
1268		printf("    A component weight:         %g\n", static_cast<double>(config.cw_a_weight));
1269		printf("    Partition cutoff:           %u partitions\n", config.tune_partition_count_limit);
1270		printf("    2 partition index cutoff:   %u partition ids\n", config.tune_2partition_index_limit);
1271		printf("    3 partition index cutoff:   %u partition ids\n", config.tune_3partition_index_limit);
1272		printf("    4 partition index cutoff:   %u partition ids\n", config.tune_4partition_index_limit);
1273		printf("    PSNR cutoff:                %g dB\n", static_cast<double>(config.tune_db_limit));
1274		printf("    3 partition cutoff:         %g\n", static_cast<double>(config.tune_2partition_early_out_limit_factor));
1275		printf("    4 partition cutoff:         %g\n", static_cast<double>(config.tune_3partition_early_out_limit_factor));
1276		printf("    2 plane correlation cutoff: %g\n", static_cast<double>(config.tune_2plane_early_out_limit_correlation));
1277		printf("    Block mode centile cutoff:  %g%%\n", static_cast<double>(config.tune_block_mode_limit));
1278		printf("    Candidate cutoff:           %u candidates\n", config.tune_candidate_limit);
1279		printf("    Refinement cutoff:          %u iterations\n", config.tune_refinement_limit);
1280		printf("    Compressor thread count:    %d\n", cli_config.thread_count);
1281		printf("\n");
1282	}
1283}
1284
1285/**
1286 * @brief Get the value of a single pixel in an image.
1287 *
1288 * Note, this implementation is not particularly optimal as it puts format
1289 * checks in the inner-most loop. For the CLI preprocess passes this is deemed
1290 * acceptable as these are not performance critical paths.
1291 *
1292 * @param[in] img   The output image.
1293 * @param     x     The pixel x coordinate.
1294 * @param     y     The pixel y coordinate.
1295 * @param     z     The pixel z coordinate.
1296 *
1297 * @return      pixel   The pixel color value to write.
1298 */
1299static vfloat4 image_get_pixel(
1300	const astcenc_image& img,
1301	unsigned int x,
1302	unsigned int y,
1303	unsigned int z
1304) {
1305	// We should never escape bounds
1306	assert(x < img.dim_x);
1307	assert(y < img.dim_y);
1308	assert(z < img.dim_z);
1309
1310	if (img.data_type == ASTCENC_TYPE_U8)
1311	{
1312		uint8_t* data = static_cast<uint8_t*>(img.data[z]);
1313
1314		float r = data[(4 * img.dim_x * y) + (4 * x    )] / 255.0f;
1315		float g = data[(4 * img.dim_x * y) + (4 * x + 1)] / 255.0f;
1316		float b = data[(4 * img.dim_x * y) + (4 * x + 2)] / 255.0f;
1317		float a = data[(4 * img.dim_x * y) + (4 * x + 3)] / 255.0f;
1318
1319		return vfloat4(r, g, b, a);
1320	}
1321	else if (img.data_type == ASTCENC_TYPE_F16)
1322	{
1323		uint16_t* data = static_cast<uint16_t*>(img.data[z]);
1324
1325		vint4 colori(
1326			data[(4 * img.dim_x * y) + (4 * x    )],
1327			data[(4 * img.dim_x * y) + (4 * x + 1)],
1328			data[(4 * img.dim_x * y) + (4 * x + 2)],
1329			data[(4 * img.dim_x * y) + (4 * x + 3)]
1330		);
1331
1332		return float16_to_float(colori);
1333	}
1334	else // if (img.data_type == ASTCENC_TYPE_F32)
1335	{
1336		assert(img.data_type == ASTCENC_TYPE_F32);
1337		float* data = static_cast<float*>(img.data[z]);
1338
1339		return vfloat4(
1340			data[(4 * img.dim_x * y) + (4 * x    )],
1341			data[(4 * img.dim_x * y) + (4 * x + 1)],
1342			data[(4 * img.dim_x * y) + (4 * x + 2)],
1343			data[(4 * img.dim_x * y) + (4 * x + 3)]
1344		);
1345	}
1346}
1347
1348/**
1349 * @brief Set the value of a single pixel in an image.
1350 *
1351 * @param[out] img     The output image; must use F32 texture components.
1352 * @param      x       The pixel x coordinate.
1353 * @param      y       The pixel y coordinate.
1354 * @param      z       The pixel z coordinate.
1355 * @param      pixel   The pixel color value to write.
1356 */
1357static void image_set_pixel(
1358	astcenc_image& img,
1359	unsigned int x,
1360	unsigned int y,
1361	unsigned int z,
1362	vfloat4 pixel
1363) {
1364	// We should never escape bounds
1365	assert(x < img.dim_x);
1366	assert(y < img.dim_y);
1367	assert(z < img.dim_z);
1368	assert(img.data_type == ASTCENC_TYPE_F32);
1369
1370	float* data = static_cast<float*>(img.data[z]);
1371
1372	data[(4 * img.dim_x * y) + (4 * x    )] = pixel.lane<0>();
1373	data[(4 * img.dim_x * y) + (4 * x + 1)] = pixel.lane<1>();
1374	data[(4 * img.dim_x * y) + (4 * x + 2)] = pixel.lane<2>();
1375	data[(4 * img.dim_x * y) + (4 * x + 3)] = pixel.lane<3>();
1376}
1377
1378/**
1379 * @brief Set the value of a single pixel in an image.
1380 *
1381 * @param[out] img     The output image; must use F32 texture components.
1382 * @param      x       The pixel x coordinate.
1383 * @param      y       The pixel y coordinate.
1384 * @param      pixel   The pixel color value to write.
1385 */
1386static void image_set_pixel_u8(
1387	astcenc_image& img,
1388	size_t x,
1389	size_t y,
1390	vint4 pixel
1391) {
1392	// We should never escape bounds
1393	assert(x < img.dim_x);
1394	assert(y < img.dim_y);
1395	assert(img.data_type == ASTCENC_TYPE_U8);
1396
1397	uint8_t* data = static_cast<uint8_t*>(img.data[0]);
1398	pixel = pack_low_bytes(pixel);
1399	store_nbytes(pixel, data + (4 * img.dim_x * y) + (4 * x    ));
1400}
1401
1402/**
1403 * @brief Create a copy of @c input with forced unit-length normal vectors.
1404 *
1405 * It is assumed that all normal vectors are stored in the RGB components, and
1406 * stored in a packed unsigned range of [0,1] which must be unpacked prior
1407 * normalization. Data must then be repacked into this form for handing over to
1408 * the core codec.
1409 *
1410 * @param[in]  input    The input image.
1411 * @param[out] output   The output image, must use F32 components.
1412 */
1413static void image_preprocess_normalize(
1414	const astcenc_image& input,
1415	astcenc_image& output
1416) {
1417	for (unsigned int z = 0; z < input.dim_z; z++)
1418	{
1419		for (unsigned int y = 0; y < input.dim_y; y++)
1420		{
1421			for (unsigned int x = 0; x < input.dim_x; x++)
1422			{
1423				vfloat4 pixel = image_get_pixel(input, x, y, z);
1424
1425				// Stash alpha component and zero
1426				float a = pixel.lane<3>();
1427				pixel.set_lane<3>(0.0f);
1428
1429				// Decode [0,1] normals to [-1,1]
1430				pixel.set_lane<0>((pixel.lane<0>() * 2.0f) - 1.0f);
1431				pixel.set_lane<1>((pixel.lane<1>() * 2.0f) - 1.0f);
1432				pixel.set_lane<2>((pixel.lane<2>() * 2.0f) - 1.0f);
1433
1434				// Normalize pixel and restore alpha
1435				pixel = normalize(pixel);
1436				pixel.set_lane<3>(a);
1437
1438				// Encode [-1,1] normals to [0,1]
1439				pixel.set_lane<0>((pixel.lane<0>() + 1.0f) / 2.0f);
1440				pixel.set_lane<1>((pixel.lane<1>() + 1.0f) / 2.0f);
1441				pixel.set_lane<2>((pixel.lane<2>() + 1.0f) / 2.0f);
1442
1443				image_set_pixel(output, x, y, z, pixel);
1444			}
1445		}
1446	}
1447}
1448
1449/**
1450 * @brief Linearize an sRGB value.
1451 *
1452 * @return The linearized value.
1453 */
1454static float srgb_to_linear(
1455	float a
1456) {
1457	if (a <= 0.04045f)
1458	{
1459		return a * (1.0f / 12.92f);
1460	}
1461
1462	return powf((a + 0.055f) * (1.0f / 1.055f), 2.4f);
1463}
1464
1465/**
1466 * @brief sRGB gamma-encode a linear value.
1467 *
1468 * @return The gamma encoded value.
1469 */
1470static float linear_to_srgb(
1471	float a
1472) {
1473	if (a <= 0.0031308f)
1474	{
1475		return a * 12.92f;
1476	}
1477
1478	return 1.055f * powf(a, 1.0f / 2.4f) - 0.055f;
1479}
1480
1481/**
1482 * @brief Create a copy of @c input with premultiplied color data.
1483 *
1484 * If we are compressing sRGB data we linearize the data prior to
1485 * premultiplication and re-gamma-encode afterwards.
1486 *
1487 * @param[in]  input     The input image.
1488 * @param[out] output    The output image, must use F32 components.
1489 * @param      profile   The encoding profile.
1490 */
1491static void image_preprocess_premultiply(
1492	const astcenc_image& input,
1493	astcenc_image& output,
1494	astcenc_profile profile
1495) {
1496	for (unsigned int z = 0; z < input.dim_z; z++)
1497	{
1498		for (unsigned int y = 0; y < input.dim_y; y++)
1499		{
1500			for (unsigned int x = 0; x < input.dim_x; x++)
1501			{
1502				vfloat4 pixel = image_get_pixel(input, x, y, z);
1503
1504				// Linearize sRGB
1505				if (profile == ASTCENC_PRF_LDR_SRGB)
1506				{
1507					pixel.set_lane<0>(srgb_to_linear(pixel.lane<0>()));
1508					pixel.set_lane<1>(srgb_to_linear(pixel.lane<1>()));
1509					pixel.set_lane<2>(srgb_to_linear(pixel.lane<2>()));
1510				}
1511
1512				// Premultiply pixel in linear-space
1513				pixel.set_lane<0>(pixel.lane<0>() * pixel.lane<3>());
1514				pixel.set_lane<1>(pixel.lane<1>() * pixel.lane<3>());
1515				pixel.set_lane<2>(pixel.lane<2>() * pixel.lane<3>());
1516
1517				// Gamma-encode sRGB
1518				if (profile == ASTCENC_PRF_LDR_SRGB)
1519				{
1520					pixel.set_lane<0>(linear_to_srgb(pixel.lane<0>()));
1521					pixel.set_lane<1>(linear_to_srgb(pixel.lane<1>()));
1522					pixel.set_lane<2>(linear_to_srgb(pixel.lane<2>()));
1523				}
1524
1525				image_set_pixel(output, x, y, z, pixel);
1526			}
1527		}
1528	}
1529}
1530
1531/**
1532 * @brief Populate a single diagnostic image showing aspects of the encoding.
1533 *
1534 * @param context      The context to use.
1535 * @param image        The compressed image to analyze.
1536 * @param diag_image   The output visualization image to populate.
1537 * @param texel_func   The per-texel callback used to determine output color.
1538 */
1539static void print_diagnostic_image(
1540	astcenc_context* context,
1541	const astc_compressed_image& image,
1542	astcenc_image& diag_image,
1543	std::function<vint4(astcenc_block_info&, size_t, size_t)> texel_func
1544) {
1545	size_t block_cols = (image.dim_x + image.block_x - 1) / image.block_x;
1546	size_t block_rows = (image.dim_y + image.block_y - 1) / image.block_y;
1547
1548	uint8_t* data = image.data;
1549	for (size_t block_y = 0; block_y < block_rows; block_y++)
1550	{
1551		for (size_t block_x = 0; block_x < block_cols; block_x++)
1552		{
1553			astcenc_block_info block_info;
1554			astcenc_get_block_info(context, data, &block_info);
1555			data += 16;
1556
1557			size_t start_row = block_y * image.block_y;
1558			size_t start_col = block_x * image.block_x;
1559
1560			size_t end_row = astc::min(start_row + image.block_y, static_cast<size_t>(image.dim_y));
1561			size_t end_col = astc::min(start_col + image.block_x, static_cast<size_t>(image.dim_x));
1562
1563			for (size_t texel_y = start_row; texel_y < end_row; texel_y++)
1564			{
1565				for (size_t texel_x = start_col; texel_x < end_col; texel_x++)
1566				{
1567					vint4 color = texel_func(block_info, texel_x - start_col, texel_y - start_row);
1568					image_set_pixel_u8(diag_image, texel_x, texel_y, color);
1569				}
1570			}
1571		}
1572	}
1573}
1574
1575/**
1576 * @brief Print a set of diagnostic images showing aspects of the encoding.
1577 *
1578 * @param context       The context to use.
1579 * @param image         The compressed image to analyze.
1580 * @param output_file   The output file name to use as a stem for new names.
1581 */
1582static void print_diagnostic_images(
1583	astcenc_context* context,
1584	const astc_compressed_image& image,
1585	const std::string& output_file
1586) {
1587	if (image.dim_z != 1)
1588	{
1589		return;
1590	}
1591
1592	// Try to find a file extension we know about
1593	size_t index = output_file.find_last_of(".");
1594	std::string stem = output_file;
1595	if (index != std::string::npos)
1596	{
1597		stem = stem.substr(0, index);
1598	}
1599
1600	auto diag_image = alloc_image(8, image.dim_x, image.dim_y, image.dim_z);
1601
1602	// ---- ---- ---- ---- Partitioning ---- ---- ---- ----
1603	auto partition_func = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1604		const vint4 colors[] {
1605			vint4(  0,   0,   0, 255),
1606			vint4(255,   0,   0, 255),
1607			vint4(  0, 255,   0, 255),
1608			vint4(  0,   0, 255, 255),
1609			vint4(255, 255, 255, 255)
1610		};
1611
1612		size_t texel_index = texel_y * info.block_x + texel_x;
1613
1614		int partition { 0 };
1615		if (!info.is_constant_block)
1616		{
1617			partition = info.partition_assignment[texel_index] + 1;
1618		}
1619
1620		return colors[partition];
1621	};
1622
1623	print_diagnostic_image(context, image, *diag_image, partition_func);
1624	std::string fname = stem + "_diag_partitioning.png";
1625	store_ncimage(diag_image, fname.c_str(), false);
1626
1627	// ---- ---- ---- ---- Weight planes  ---- ---- ---- ----
1628	auto texel_func1 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1629		(void)texel_x;
1630		(void)texel_y;
1631
1632		const vint4 colors[] {
1633			vint4(  0,   0,   0, 255),
1634			vint4(255,   0,   0, 255),
1635			vint4(  0, 255,   0, 255),
1636			vint4(  0,   0, 255, 255),
1637			vint4(255, 255, 255, 255)
1638		};
1639
1640		int component { 0 };
1641		if (info.is_dual_plane_block)
1642		{
1643			component = info.dual_plane_component + 1;
1644		}
1645
1646		return colors[component];
1647	};
1648
1649	print_diagnostic_image(context, image, *diag_image, texel_func1);
1650	fname = stem + "_diag_weight_plane2.png";
1651	store_ncimage(diag_image, fname.c_str(), false);
1652
1653	// ---- ---- ---- ---- Weight density  ---- ---- ---- ----
1654	auto texel_func2 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1655		(void)texel_x;
1656		(void)texel_y;
1657
1658		float density = 0.0f;
1659		if (!info.is_constant_block)
1660		{
1661			float texel_count = static_cast<float>(info.block_x * info.block_y);
1662			float weight_count = static_cast<float>(info.weight_x * info.weight_y);
1663			density = weight_count / texel_count;
1664		}
1665
1666		int densityi = static_cast<int>(255.0f * density);
1667		return vint4(densityi, densityi, densityi, 255);
1668	};
1669
1670	print_diagnostic_image(context, image, *diag_image, texel_func2);
1671	fname = stem + "_diag_weight_density.png";
1672	store_ncimage(diag_image, fname.c_str(), false);
1673
1674	// ---- ---- ---- ---- Weight quant  ---- ---- ---- ----
1675	auto texel_func3 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1676		(void)texel_x;
1677		(void)texel_y;
1678
1679		int quant { 0 };
1680		if (!info.is_constant_block)
1681		{
1682			quant = info.weight_level_count - 1;
1683		}
1684
1685		return vint4(quant, quant, quant, 255);
1686	};
1687
1688	print_diagnostic_image(context, image, *diag_image, texel_func3);
1689	fname = stem + "_diag_weight_quant.png";
1690	store_ncimage(diag_image, fname.c_str(), false);
1691
1692	// ---- ---- ---- ---- Color quant  ---- ---- ---- ----
1693	auto texel_func4 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1694		(void)texel_x;
1695		(void)texel_y;
1696
1697		int quant { 0 };
1698		if (!info.is_constant_block)
1699		{
1700			quant = info.color_level_count - 1;
1701		}
1702
1703		return vint4(quant, quant, quant, 255);
1704	};
1705
1706	print_diagnostic_image(context, image, *diag_image, texel_func4);
1707	fname = stem + "_diag_color_quant.png";
1708	store_ncimage(diag_image, fname.c_str(), false);
1709
1710	// ---- ---- ---- ---- Color endpoint mode: Index ---- ---- ---- ----
1711	auto texel_func5 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1712		(void)texel_x;
1713		(void)texel_y;
1714
1715		size_t texel_index = texel_y * info.block_x + texel_x;
1716
1717		int cem { 255 };
1718		if (!info.is_constant_block)
1719		{
1720			uint8_t partition = info.partition_assignment[texel_index];
1721			cem = info.color_endpoint_modes[partition] * 16;
1722		}
1723
1724		return vint4(cem, cem, cem, 255);
1725	};
1726
1727	print_diagnostic_image(context, image, *diag_image, texel_func5);
1728	fname = stem + "_diag_cem_index.png";
1729	store_ncimage(diag_image, fname.c_str(), false);
1730
1731	// ---- ---- ---- ---- Color endpoint mode: Components ---- ---- ---- ----
1732	auto texel_func6 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1733		(void)texel_x;
1734		(void)texel_y;
1735
1736		const vint4 colors[] {
1737			vint4(  0,   0,   0, 255),
1738			vint4(255,   0,   0, 255),
1739			vint4(  0, 255,   0, 255),
1740			vint4(  0,   0, 255, 255),
1741			vint4(255, 255, 255, 255)
1742		};
1743
1744		size_t texel_index = texel_y * info.block_x + texel_x;
1745
1746		int components { 0 };
1747		if (!info.is_constant_block)
1748		{
1749			uint8_t partition = info.partition_assignment[texel_index];
1750			uint8_t cem = info.color_endpoint_modes[partition];
1751
1752			switch (cem)
1753			{
1754				case 0:
1755				case 1:
1756				case 2:
1757				case 3:
1758					components = 1;
1759					break;
1760				case 4:
1761				case 5:
1762					components = 2;
1763					break;
1764				case 6:
1765				case 7:
1766				case 8:
1767				case 9:
1768				case 11:
1769					components = 3;
1770					break;
1771				default:
1772					components = 4;
1773					break;
1774			}
1775		}
1776
1777		return colors[components];
1778	};
1779
1780	print_diagnostic_image(context, image, *diag_image, texel_func6);
1781	fname = stem + "_diag_cem_components.png";
1782	store_ncimage(diag_image, fname.c_str(), false);
1783
1784	// ---- ---- ---- ---- Color endpoint mode: Style ---- ---- ---- ----
1785	auto texel_func7 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1786		(void)texel_x;
1787		(void)texel_y;
1788
1789		const vint4 colors[] {
1790			vint4(  0,   0,   0, 255),
1791			vint4(255,   0,   0, 255),
1792			vint4(  0, 255,   0, 255),
1793			vint4(  0,   0, 255, 255),
1794		};
1795
1796		size_t texel_index = texel_y * info.block_x + texel_x;
1797
1798		int style { 0 };
1799		if (!info.is_constant_block)
1800		{
1801			uint8_t partition = info.partition_assignment[texel_index];
1802			uint8_t cem = info.color_endpoint_modes[partition];
1803
1804			switch (cem)
1805			{
1806				// Direct - two absolute endpoints
1807				case 0:
1808				case 1:
1809				case 2:
1810				case 3:
1811				case 4:
1812				case 8:
1813				case 11:
1814				case 12:
1815				case 14:
1816				case 15:
1817					style = 1;
1818					break;
1819				// Offset - one absolute plus delta
1820				case 5:
1821				case 9:
1822				case 13:
1823					style = 2;
1824					break;
1825				// Scale - one absolute plus scale
1826				case 6:
1827				case 7:
1828				case 10:
1829					style = 3;
1830					break;
1831				// Shouldn't happen ...
1832				default:
1833					style = 0;
1834					break;
1835			}
1836		}
1837
1838		return colors[style];
1839	};
1840
1841	print_diagnostic_image(context, image, *diag_image, texel_func7);
1842	fname = stem + "_diag_cem_style.png";
1843	store_ncimage(diag_image, fname.c_str(), false);
1844
1845	// ---- ---- ---- ---- Color endpoint mode: Style ---- ---- ---- ----
1846	auto texel_func8 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1847		(void)texel_x;
1848		(void)texel_y;
1849
1850		size_t texel_index = texel_y * info.block_x + texel_x;
1851
1852		int style { 0 };
1853		if (!info.is_constant_block)
1854		{
1855			uint8_t partition = info.partition_assignment[texel_index];
1856			uint8_t cem = info.color_endpoint_modes[partition];
1857
1858			switch (cem)
1859			{
1860				// LDR blocks
1861				case 0:
1862				case 1:
1863				case 4:
1864				case 5:
1865				case 6:
1866				case 8:
1867				case 9:
1868				case 10:
1869				case 12:
1870				case 13:
1871					style = 128;
1872					break;
1873				// HDR blocks
1874				default:
1875					style = 155;
1876					break;
1877			}
1878		}
1879
1880		return vint4(style, style, style, 255);
1881	};
1882
1883	print_diagnostic_image(context, image, *diag_image, texel_func8);
1884	fname = stem + "_diag_cem_hdr.png";
1885	store_ncimage(diag_image, fname.c_str(), false);
1886
1887	free_image(diag_image);
1888}
1889
1890#if QUALITY_CONTROL
1891constexpr double MAX_PSNR = 99.9;
1892constexpr double MAX_VALUE = 255;
1893constexpr double THRESHOLD_R = 30.0;
1894constexpr double THRESHOLD_G = 30.0;
1895constexpr double THRESHOLD_B = 30.0;
1896constexpr double THRESHOLD_A = 30.0;
1897constexpr double THRESHOLD_RGB = 30.0;
1898constexpr double LOG_BASE = 10.0;
1899
1900bool CheckQuality(int32_t* mseIn[RGBA_COM], int blockNum, int blockXYZ)
1901{
1902    double psnr[RGBA_COM + 1];
1903    double threshold[RGBA_COM + 1] = { THRESHOLD_R, THRESHOLD_G, THRESHOLD_B, THRESHOLD_A, THRESHOLD_RGB};
1904    uint64_t mseTotal[RGBA_COM + 1] = { 0, 0, 0, 0, 0};
1905    for (int i = R_COM; i < RGBA_COM; i++) {
1906        int32_t* mse = mseIn[i];
1907        for (int j = 0; j < blockNum; j++) {
1908            mseTotal[i] += *mse;
1909            if(i != A_COM) mseTotal[RGBA_COM] += *mse;
1910            mse++;
1911        }
1912    }
1913    for (int i = R_COM; i < RGBA_COM; i++) {
1914        if (mseTotal[i] == 0) {
1915        	psnr[i] = MAX_PSNR;
1916        	continue;
1917        }
1918        double mseRgb = (double)mseTotal[i] / (blockNum * blockXYZ);
1919        psnr[i] = LOG_BASE * log((double)(MAX_VALUE * MAX_VALUE) / mseRgb) / log(LOG_BASE);
1920    }
1921    if (mseTotal[RGBA_COM] == 0) {
1922        psnr[RGBA_COM] = MAX_PSNR;
1923    }
1924    else {
1925        double mseRgb = (double)mseTotal[RGBA_COM] / (blockNum * blockXYZ * (RGBA_COM - 1));
1926        psnr[RGBA_COM] = LOG_BASE * log((double)(MAX_VALUE * MAX_VALUE) / mseRgb) / log(LOG_BASE);
1927    }
1928    printf("astc psnr r%f g%f b%f a%f rgb%f\n",
1929        psnr[R_COM], psnr[G_COM], psnr[B_COM], psnr[A_COM],
1930        psnr[RGBA_COM]);
1931    return (psnr[R_COM] > threshold[R_COM]) && (psnr[G_COM] > threshold[G_COM])
1932        && (psnr[B_COM] > threshold[B_COM]) && (psnr[A_COM] > threshold[A_COM])
1933        && (psnr[RGBA_COM] > threshold[RGBA_COM]);
1934}
1935#endif
1936
1937/**
1938 * @brief The main entry point.
1939 *
1940 * @param argc   The number of arguments.
1941 * @param argv   The vector of arguments.
1942 *
1943 * @return 0 on success, non-zero otherwise.
1944 */
1945int astcenc_main(
1946	int argc,
1947	char **argv
1948) {
1949	double start_time = get_time();
1950
1951	if (argc < 2)
1952	{
1953		astcenc_print_shorthelp();
1954		return 0;
1955	}
1956
1957	astcenc_operation operation;
1958	astcenc_profile profile;
1959	int error = parse_commandline_options(argc, argv, operation, profile);
1960	if (error)
1961	{
1962		return 1;
1963	}
1964
1965	switch (operation)
1966	{
1967	case ASTCENC_OP_HELP:
1968		astcenc_print_longhelp();
1969		return 0;
1970	case ASTCENC_OP_VERSION:
1971		astcenc_print_header();
1972		return 0;
1973	default:
1974		break;
1975	}
1976
1977	std::string input_filename = argc >= 3 ? argv[2] : "";
1978	std::string output_filename = argc >= 4 ? argv[3] : "";
1979
1980	if (input_filename.empty())
1981	{
1982		print_error("ERROR: Input file not specified\n");
1983		return 1;
1984	}
1985
1986	if (output_filename.empty())
1987	{
1988		print_error("ERROR: Output file not specified\n");
1989		return 1;
1990	}
1991
1992	// TODO: Handle RAII resources so they get freed when out of scope
1993	// Load the compressed input file if needed
1994
1995	// This has to come first, as the block size is in the file header
1996	astc_compressed_image image_comp {};
1997	if (operation & ASTCENC_STAGE_LD_COMP)
1998	{
1999		if (ends_with(input_filename, ".astc"))
2000		{
2001			error = load_cimage(input_filename.c_str(), image_comp);
2002			if (error)
2003			{
2004				return 1;
2005			}
2006		}
2007		else if (ends_with(input_filename, ".ktx"))
2008		{
2009			bool is_srgb;
2010			error = load_ktx_compressed_image(input_filename.c_str(), is_srgb, image_comp);
2011			if (error)
2012			{
2013				return 1;
2014			}
2015
2016			if (is_srgb && (profile != ASTCENC_PRF_LDR_SRGB))
2017			{
2018				printf("WARNING: Input file is sRGB, but decompressing as linear\n");
2019			}
2020
2021			if (!is_srgb && (profile == ASTCENC_PRF_LDR_SRGB))
2022			{
2023				printf("WARNING: Input file is linear, but decompressing as sRGB\n");
2024			}
2025		}
2026		else
2027		{
2028			print_error("ERROR: Unknown compressed input file type\n");
2029			return 1;
2030		}
2031	}
2032
2033	astcenc_config config {};
2034	astcenc_preprocess preprocess;
2035	error = init_astcenc_config(argc, argv, profile, operation, image_comp, preprocess, config);
2036	if (error)
2037	{
2038		return 1;
2039	}
2040
2041	// Initialize cli_config_options with default values
2042	cli_config_options cli_config { 0, 1, 1, false, false, false, -10, 10,
2043		{ ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A },
2044		{ ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A } };
2045
2046	error = edit_astcenc_config(argc, argv, operation, cli_config, config);
2047	if (error)
2048	{
2049		return 1;
2050	}
2051
2052	// Enable progress callback if not in silent mode and using a terminal
2053	#if defined(_WIN32)
2054		int stdoutfno = _fileno(stdout);
2055	#else
2056		int stdoutfno = STDOUT_FILENO;
2057	#endif
2058
2059	if ((!cli_config.silentmode) && isatty(stdoutfno))
2060	{
2061		config.progress_callback = progress_emitter;
2062	}
2063
2064	astcenc_image* image_uncomp_in = nullptr ;
2065	unsigned int image_uncomp_in_component_count = 0;
2066	bool image_uncomp_in_is_hdr = false;
2067	astcenc_image* image_decomp_out = nullptr;
2068
2069	// Determine decompression output bitness, if limited by file type
2070	int out_bitness = 0;
2071	if (operation & ASTCENC_STAGE_DECOMPRESS)
2072	{
2073		out_bitness = get_output_filename_enforced_bitness(output_filename.c_str());
2074		if (out_bitness == 0)
2075		{
2076			bool is_hdr = (config.profile == ASTCENC_PRF_HDR) ||
2077			              (config.profile == ASTCENC_PRF_HDR_RGB_LDR_A);
2078			out_bitness = is_hdr ? 16 : 8;
2079		}
2080
2081		// If decompressed output is unorm8 then force the decode_unorm8 heuristics for compression
2082		if (out_bitness == 8)
2083		{
2084			config.flags |= ASTCENC_FLG_USE_DECODE_UNORM8;
2085		}
2086	}
2087
2088	// TODO: Handle RAII resources so they get freed when out of scope
2089	astcenc_error    codec_status;
2090	astcenc_context* codec_context;
2091
2092	// Preflight - check we have valid extensions for storing a file
2093	if (operation & ASTCENC_STAGE_ST_NCOMP)
2094	{
2095		int bitness = get_output_filename_enforced_bitness(output_filename.c_str());
2096		if (bitness < 0)
2097		{
2098			const char *eptr = strrchr(output_filename.c_str(), '.');
2099			eptr = eptr ? eptr : "";
2100			print_error("ERROR: Unknown uncompressed output file type '%s'\n", eptr);
2101			return 1;
2102		}
2103	}
2104
2105	if (operation & ASTCENC_STAGE_ST_COMP)
2106	{
2107#if defined(_WIN32)
2108		bool is_null = output_filename == "NUL" || output_filename == "nul";
2109#else
2110		bool is_null = output_filename == "/dev/null";
2111#endif
2112
2113		if (!(is_null || ends_with(output_filename, ".astc") || ends_with(output_filename, ".ktx")))
2114		{
2115			const char *eptr = strrchr(output_filename.c_str(), '.');
2116			eptr = eptr ? eptr : "";
2117			print_error("ERROR: Unknown compressed output file type '%s'\n", eptr);
2118			return 1;
2119		}
2120	}
2121
2122	codec_status = astcenc_context_alloc(&config, cli_config.thread_count, &codec_context);
2123	if (codec_status != ASTCENC_SUCCESS)
2124	{
2125		print_error("ERROR: Codec context alloc failed: %s\n", astcenc_get_error_string(codec_status));
2126		return 1;
2127	}
2128
2129	// Load the uncompressed input file if needed
2130	if (operation & ASTCENC_STAGE_LD_NCOMP)
2131	{
2132		image_uncomp_in = load_uncomp_file(
2133		    input_filename.c_str(), cli_config.array_size, cli_config.y_flip,
2134		    image_uncomp_in_is_hdr, image_uncomp_in_component_count);
2135		if (!image_uncomp_in)
2136		{
2137			print_error("ERROR: Failed to load uncompressed image file\n");
2138			return 1;
2139		}
2140
2141
2142		if (preprocess != ASTCENC_PP_NONE)
2143		{
2144			// Allocate a float image so we can avoid additional quantization,
2145			// as e.g. premultiplication can result in fractional color values
2146			astcenc_image* image_pp = alloc_image(32,
2147			                                      image_uncomp_in->dim_x,
2148			                                      image_uncomp_in->dim_y,
2149			                                      image_uncomp_in->dim_z);
2150			if (!image_pp)
2151			{
2152				print_error("ERROR: Failed to allocate preprocessed image\n");
2153				return 1;
2154			}
2155
2156			if (preprocess == ASTCENC_PP_NORMALIZE)
2157			{
2158				image_preprocess_normalize(*image_uncomp_in, *image_pp);
2159			}
2160
2161			if (preprocess == ASTCENC_PP_PREMULTIPLY)
2162			{
2163				image_preprocess_premultiply(*image_uncomp_in, *image_pp,
2164				                             config.profile);
2165			}
2166
2167			// Delete the original as we no longer need it
2168			free_image(image_uncomp_in);
2169			image_uncomp_in = image_pp;
2170		}
2171
2172		if (!cli_config.silentmode)
2173		{
2174			printf("Source image\n");
2175			printf("============\n\n");
2176			printf("    Source:                     %s\n", input_filename.c_str());
2177			printf("    Color profile:              %s\n", image_uncomp_in_is_hdr ? "HDR" : "LDR");
2178			if (image_uncomp_in->dim_z > 1)
2179			{
2180				printf("    Dimensions:                 3D, %ux%ux%u\n",
2181				       image_uncomp_in->dim_x, image_uncomp_in->dim_y, image_uncomp_in->dim_z);
2182			}
2183			else
2184			{
2185				printf("    Dimensions:                 2D, %ux%u\n",
2186				       image_uncomp_in->dim_x, image_uncomp_in->dim_y);
2187			}
2188			printf("    Components:                 %d\n\n", image_uncomp_in_component_count);
2189		}
2190	}
2191
2192	double image_size = 0.0;
2193	if (image_uncomp_in)
2194	{
2195		image_size = static_cast<double>(image_uncomp_in->dim_x) *
2196		             static_cast<double>(image_uncomp_in->dim_y) *
2197		             static_cast<double>(image_uncomp_in->dim_z);
2198	}
2199	else
2200	{
2201		image_size = static_cast<double>(image_comp.dim_x) *
2202		             static_cast<double>(image_comp.dim_y) *
2203		             static_cast<double>(image_comp.dim_z);
2204	}
2205
2206	// Compress an image
2207	double best_compression_time = 100000.0;
2208	double total_compression_time = 0.0;
2209	if (operation & ASTCENC_STAGE_COMPRESS)
2210	{
2211		print_astcenc_config(cli_config, config);
2212
2213		unsigned int blocks_x = (image_uncomp_in->dim_x + config.block_x - 1) / config.block_x;
2214		unsigned int blocks_y = (image_uncomp_in->dim_y + config.block_y - 1) / config.block_y;
2215		unsigned int blocks_z = (image_uncomp_in->dim_z + config.block_z - 1) / config.block_z;
2216		size_t buffer_size = blocks_x * blocks_y * blocks_z * 16;
2217		uint8_t* buffer = new uint8_t[buffer_size];
2218
2219		compression_workload work;
2220		work.context = codec_context;
2221		image_uncomp_in->dim_stride = image_uncomp_in->dim_x;
2222		work.image = image_uncomp_in;
2223		work.swizzle = cli_config.swz_encode;
2224		work.data_out = buffer;
2225		work.data_len = buffer_size;
2226		work.error = ASTCENC_SUCCESS;
2227#if QUALITY_CONTROL
2228		work.calQualityEnable = true;
2229		work.mse[R_COM] = work.mse[G_COM] = work.mse[B_COM] = work.mse[A_COM] = nullptr;
2230		if (work.calQualityEnable) {
2231		for (int i = R_COM; i < RGBA_COM; i++) {
2232				work.mse[i] = (int32_t*)calloc(blocks_x * blocks_y, sizeof(int32_t));
2233				if (!work.mse[i]) {
2234					printf("quality control calloc failed");
2235					return -1;
2236				}
2237			}
2238		}
2239#endif
2240		// Only launch worker threads for multi-threaded use - it makes basic
2241		// single-threaded profiling and debugging a little less convoluted
2242		double start_compression_time = get_time();
2243		for (unsigned int i = 0; i < cli_config.repeat_count; i++)
2244		{
2245			if (config.progress_callback)
2246			{
2247				printf("Compression\n");
2248				printf("===========\n");
2249				printf("\n");
2250			}
2251
2252			double start_iter_time = get_time();
2253			if (cli_config.thread_count > 1)
2254			{
2255				launch_threads("Compression", cli_config.thread_count, compression_workload_runner, &work);
2256			}
2257			else
2258			{
2259				work.error = astcenc_compress_image(
2260					work.context, work.image, &work.swizzle,
2261					work.data_out, work.data_len,
2262#if QUALITY_CONTROL
2263			    	work.calQualityEnable, work.mse,
2264#endif
2265			    	0);
2266			}
2267
2268			astcenc_compress_reset(codec_context);
2269
2270			if (config.progress_callback)
2271			{
2272				printf("\n\n");
2273			}
2274
2275			double iter_time = get_time() - start_iter_time;
2276			best_compression_time = astc::min(iter_time, best_compression_time);
2277		}
2278		total_compression_time = get_time() - start_compression_time;
2279
2280		if (work.error != ASTCENC_SUCCESS)
2281		{
2282			print_error("ERROR: Codec compress failed: %s\n", astcenc_get_error_string(work.error));
2283			return 1;
2284		}
2285#if QUALITY_CONTROL
2286		if (work.calQualityEnable && !CheckQuality(work.mse, blocks_x * blocks_y, config.block_x * config.block_y)) {
2287		    work.error = ASTCENC_ERR_BAD_QUALITY_CHECK;
2288		}
2289		if (work.calQualityEnable) {
2290			for (int i = R_COM; i < RGBA_COM; i++) {
2291				if (work.mse[i]) {
2292					free(work.mse[i]);
2293				}
2294			}
2295		}
2296#endif
2297		image_comp.block_x = config.block_x;
2298		image_comp.block_y = config.block_y;
2299		image_comp.block_z = config.block_z;
2300		image_comp.dim_x = image_uncomp_in->dim_x;
2301		image_comp.dim_y = image_uncomp_in->dim_y;
2302		image_comp.dim_z = image_uncomp_in->dim_z;
2303		image_comp.data = buffer;
2304		image_comp.data_len = buffer_size;
2305	}
2306
2307	// Decompress an image
2308	double best_decompression_time = 100000.0;
2309	double total_decompression_time = 0.0;
2310	if (operation & ASTCENC_STAGE_DECOMPRESS)
2311	{
2312		image_decomp_out = alloc_image(
2313		    out_bitness, image_comp.dim_x, image_comp.dim_y, image_comp.dim_z);
2314
2315		decompression_workload work;
2316		work.context = codec_context;
2317		work.data = image_comp.data;
2318		work.data_len = image_comp.data_len;
2319		work.image_out = image_decomp_out;
2320		work.swizzle = cli_config.swz_decode;
2321		work.error = ASTCENC_SUCCESS;
2322
2323		// Only launch worker threads for multi-threaded use - it makes basic
2324		// single-threaded profiling and debugging a little less convoluted
2325		double start_decompression_time = get_time();
2326		for (unsigned int i = 0; i < cli_config.repeat_count; i++)
2327		{
2328			double start_iter_time = get_time();
2329			if (cli_config.thread_count > 1)
2330			{
2331				launch_threads("Decompression", cli_config.thread_count, decompression_workload_runner, &work);
2332			}
2333			else
2334			{
2335				work.error = astcenc_decompress_image(
2336				    work.context, work.data, work.data_len,
2337				    work.image_out, &work.swizzle, 0);
2338			}
2339
2340			astcenc_decompress_reset(codec_context);
2341
2342			double iter_time = get_time() - start_iter_time;
2343			best_decompression_time = astc::min(iter_time, best_decompression_time);
2344		}
2345		total_decompression_time = get_time() - start_decompression_time;
2346
2347		if (work.error != ASTCENC_SUCCESS)
2348		{
2349			print_error("ERROR: Codec decompress failed: %s\n", astcenc_get_error_string(codec_status));
2350			return 1;
2351		}
2352	}
2353
2354#if defined(_WIN32)
2355	bool is_null = output_filename == "NUL" || output_filename == "nul";
2356#else
2357	bool is_null = output_filename == "/dev/null";
2358#endif
2359
2360   // Print metrics in comparison mode
2361	if (operation & ASTCENC_STAGE_COMPARE)
2362	{
2363		bool is_normal_map = config.flags & ASTCENC_FLG_MAP_NORMAL;
2364
2365		compute_error_metrics(
2366		    image_uncomp_in_is_hdr, is_normal_map, image_uncomp_in_component_count,
2367		    image_uncomp_in, image_decomp_out, cli_config.low_fstop, cli_config.high_fstop);
2368	}
2369
2370	// Store compressed image
2371	if (operation & ASTCENC_STAGE_ST_COMP)
2372	{
2373		if (ends_with(output_filename, ".astc"))
2374		{
2375			error = store_cimage(image_comp, output_filename.c_str());
2376			if (error)
2377			{
2378				print_error("ERROR: Failed to store compressed image\n");
2379				return 1;
2380			}
2381		}
2382		else if (ends_with(output_filename, ".ktx"))
2383		{
2384			bool srgb = profile == ASTCENC_PRF_LDR_SRGB;
2385			error = store_ktx_compressed_image(image_comp, output_filename.c_str(), srgb);
2386			if (error)
2387			{
2388				print_error("ERROR: Failed to store compressed image\n");
2389				return 1;
2390			}
2391		}
2392		else
2393		{
2394			if (!is_null)
2395			{
2396				print_error("ERROR: Unknown compressed output file type\n");
2397				return 1;
2398			}
2399		}
2400	}
2401
2402	// Store decompressed image
2403	if (operation & ASTCENC_STAGE_ST_NCOMP)
2404	{
2405		if (!is_null)
2406		{
2407			bool store_result = store_ncimage(image_decomp_out, output_filename.c_str(),
2408			                                  cli_config.y_flip);
2409			if (!store_result)
2410			{
2411				print_error("ERROR: Failed to write output image %s\n", output_filename.c_str());
2412				return 1;
2413			}
2414		}
2415	}
2416
2417	// Store diagnostic images
2418	if (cli_config.diagnostic_images && !is_null)
2419	{
2420		print_diagnostic_images(codec_context, image_comp, output_filename);
2421	}
2422
2423	free_image(image_uncomp_in);
2424	free_image(image_decomp_out);
2425	astcenc_context_free(codec_context);
2426
2427	delete[] image_comp.data;
2428
2429	if ((operation & ASTCENC_STAGE_COMPARE) || (!cli_config.silentmode))
2430	{
2431		double end_time = get_time();
2432
2433		double repeats = static_cast<double>(cli_config.repeat_count);
2434		double avg_compression_time = total_compression_time / repeats;
2435		double avg_decompression_time = total_decompression_time / repeats;
2436		double total_time = (end_time - start_time) - ((repeats - 1.0) * avg_compression_time)  - ((repeats - 1.0) * avg_decompression_time);
2437
2438		printf("Performance metrics\n");
2439		printf("===================\n\n");
2440		printf("    Total time:                %8.4f s\n", total_time);
2441
2442		if (operation & ASTCENC_STAGE_COMPRESS)
2443		{
2444			double compression_rate = image_size / (best_compression_time * 1000000.0);
2445
2446			printf("    Coding time:               %8.4f s\n", best_compression_time);
2447			printf("    Coding rate:               %8.4f MT/s\n", compression_rate);
2448		}
2449
2450		if (operation & ASTCENC_STAGE_DECOMPRESS)
2451		{
2452			double decompression_rate = image_size / (best_decompression_time * 1000000.0);
2453			printf("    Decoding time:             %8.4f s\n", best_decompression_time);
2454			printf("    Decoding rate:             %8.4f MT/s\n", decompression_rate);
2455		}
2456	}
2457
2458	return 0;
2459}
2460