1// SPDX-License-Identifier: Apache-2.0
2// ----------------------------------------------------------------------------
3// Copyright 2011-2023 Arm Limited
4//
5// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6// use this file except in compliance with the License. You may obtain a copy
7// of the License at:
8//
9//     http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14// License for the specific language governing permissions and limitations
15// under the License.
16// ----------------------------------------------------------------------------
17
18/**
19 * @brief Functions for loading/storing uncompressed and compressed images.
20 */
21
22#include <array>
23#include <cassert>
24#include <cstdio>
25#include <cstdlib>
26#include <cstring>
27#include <fstream>
28#include <iomanip>
29#include <sstream>
30
31#include "astcenccli_internal.h"
32
33#include "stb_image.h"
34#include "stb_image_write.h"
35#include "tinyexr.h"
36
37/**
38 * @brief Determine the output file name to use for a sliced image write.
39 *
40 * @param img        The source data for the image.
41 * @param filename   The base name of the file to save.
42 * @param index      The slice index to write.
43 *
44 * @return The file name to use when saving the file.
45 */
46static std::string get_output_filename(
47	const astcenc_image* img,
48	const char* filename,
49	unsigned int index
50) {
51	if (img->dim_z <= 1)
52	{
53		return filename;
54	}
55
56	std::string fnmod(filename);
57	std::string fnext = fnmod.substr(fnmod.find_last_of("."));
58
59	// Remove the extension
60	fnmod = fnmod.erase(fnmod.length() - fnext.size());
61
62	// Insert the file index into the base name, then append the extension
63	std::stringstream ss;
64	ss << fnmod << "_" << std::setw(3) << std::setfill('0') << index << fnext;
65	return ss.str();
66}
67
68/* ============================================================================
69  Image load and store through the stb_image and tinyexr libraries
70============================================================================ */
71
72/**
73 * @brief Load a .exr image using TinyExr to provide the loader.
74 *
75 * @param      filename          The name of the file to load.
76 * @param      y_flip            Should the image be vertically flipped?
77 * @param[out] is_hdr            Is this an HDR image load? Always @c true for this function.
78 * @param[out] component_count   The number of components in the data.
79 *
80 * @return The loaded image data in a canonical 4 channel format.
81 */
82static astcenc_image* load_image_with_tinyexr(
83	const char* filename,
84	bool y_flip,
85	bool& is_hdr,
86	unsigned int& component_count
87) {
88	int dim_x, dim_y;
89	float* image;
90	const char* err;
91
92	int load_res = LoadEXR(&image, &dim_x, &dim_y, filename, &err);
93	if (load_res != TINYEXR_SUCCESS)
94	{
95		print_error("ERROR: Failed to load image %s (%s)\n", filename, err);
96		free(reinterpret_cast<void*>(const_cast<char*>(err)));
97		return nullptr;
98	}
99
100	astcenc_image* res_img = astc_img_from_floatx4_array(image, dim_x, dim_y, y_flip);
101	free(image);
102
103	is_hdr = true;
104	component_count = 4;
105	return res_img;
106}
107
108/**
109 * @brief Load an image using STBImage to provide the loader.
110 *
111 * @param      filename          The name of the file to load.
112 * @param      y_flip            Should the image be vertically flipped?
113 * @param[out] is_hdr            Is this an HDR image load?
114 * @param[out] component_count   The number of components in the data.
115 *
116 * @return The loaded image data in a canonical 4 channel format, or @c nullptr on error.
117 */
118static astcenc_image* load_image_with_stb(
119	const char* filename,
120	bool y_flip,
121	bool& is_hdr,
122	unsigned int& component_count
123) {
124	int dim_x, dim_y;
125
126	if (stbi_is_hdr(filename))
127	{
128		float* data = stbi_loadf(filename, &dim_x, &dim_y, nullptr, STBI_rgb_alpha);
129		if (data)
130		{
131			astcenc_image* img = astc_img_from_floatx4_array(data, dim_x, dim_y, y_flip);
132			stbi_image_free(data);
133			is_hdr = true;
134			component_count = 4;
135			return img;
136		}
137	}
138	else
139	{
140		uint8_t* data = stbi_load(filename, &dim_x, &dim_y, nullptr, STBI_rgb_alpha);
141		if (data)
142		{
143			astcenc_image* img = astc_img_from_unorm8x4_array(data, dim_x, dim_y, y_flip);
144			stbi_image_free(data);
145			is_hdr = false;
146			component_count = 4;
147			return img;
148		}
149	}
150
151	print_error("ERROR: Failed to load image %s (%s)\n", filename, stbi_failure_reason());
152	return nullptr;
153}
154
155/**
156 * @brief Save an EXR image using TinyExr to provide the store routine.
157 *
158 * @param img        The source data for the image.
159 * @param filename   The name of the file to save.
160 * @param y_flip     Should the image be vertically flipped?
161 *
162 * @return @c true if the image saved OK, @c false on error.
163 */
164static bool store_exr_image_with_tinyexr(
165	const astcenc_image* img,
166	const char* filename,
167	int y_flip
168) {
169	int res { 0 };
170
171	for (unsigned int i = 0; i < img->dim_z; i++)
172	{
173		std::string fnmod = get_output_filename(img, filename, i);
174		float* buf = floatx4_array_from_astc_img(img, y_flip, i);
175
176		res = SaveEXR(buf, img->dim_x, img->dim_y, 4, 1, fnmod.c_str(), nullptr);
177		delete[] buf;
178		if (res < 0)
179		{
180			break;
181		}
182	}
183
184	return res >= 0;
185}
186
187/**
188 * @brief Save a PNG image using STBImageWrite to provide the store routine.
189 *
190 * @param img        The source data for the image.
191 * @param filename   The name of the file to save.
192 * @param y_flip     Should the image be vertically flipped?
193 *
194 * @return @c true if the image saved OK, @c false on error.
195 */
196static bool store_png_image_with_stb(
197	const astcenc_image* img,
198	const char* filename,
199	int y_flip
200) {
201	int res { 0 };
202
203	assert(img->data_type == ASTCENC_TYPE_U8);
204
205	for (unsigned int i = 0; i < img->dim_z; i++)
206	{
207		std::string fnmod = get_output_filename(img, filename, i);
208		uint8_t* buf = reinterpret_cast<uint8_t*>(img->data[i]);
209
210		stbi_flip_vertically_on_write(y_flip);
211		res = stbi_write_png(fnmod.c_str(), img->dim_x, img->dim_y, 4, buf, img->dim_x * 4);
212		if (res == 0)
213		{
214			break;
215		}
216	}
217
218	return res != 0;
219}
220
221/**
222 * @brief Save a TGA image using STBImageWrite to provide the store routine.
223 *
224 * @param img        The source data for the image.
225 * @param filename   The name of the file to save.
226 * @param y_flip     Should the image be vertically flipped?
227 *
228 * @return @c true if the image saved OK, @c false on error.
229 */
230static bool store_tga_image_with_stb(
231	const astcenc_image* img,
232	const char* filename,
233	int y_flip
234) {
235	int res { 0 };
236
237	assert(img->data_type == ASTCENC_TYPE_U8);
238
239	for (unsigned int i = 0; i < img->dim_z; i++)
240	{
241		std::string fnmod = get_output_filename(img, filename, i);
242		uint8_t* buf = reinterpret_cast<uint8_t*>(img->data[i]);
243
244		stbi_flip_vertically_on_write(y_flip);
245		res = stbi_write_tga(fnmod.c_str(), img->dim_x, img->dim_y, 4, buf);
246		if (res == 0)
247		{
248			break;
249		}
250	}
251
252	return res != 0;
253}
254
255/**
256 * @brief Save a BMP image using STBImageWrite to provide the store routine.
257 *
258 * @param img        The source data for the image.
259 * @param filename   The name of the file to save.
260 * @param y_flip     Should the image be vertically flipped?
261 *
262 * @return @c true if the image saved OK, @c false on error.
263 */
264static bool store_bmp_image_with_stb(
265	const astcenc_image* img,
266	const char* filename,
267	int y_flip
268) {
269	int res { 0 };
270
271	assert(img->data_type == ASTCENC_TYPE_U8);
272
273	for (unsigned int i = 0; i < img->dim_z; i++)
274	{
275		std::string fnmod = get_output_filename(img, filename, i);
276		uint8_t* buf = reinterpret_cast<uint8_t*>(img->data[i]);
277
278		stbi_flip_vertically_on_write(y_flip);
279		res = stbi_write_bmp(fnmod.c_str(), img->dim_x, img->dim_y, 4, buf);
280		if (res == 0)
281		{
282			break;
283		}
284	}
285
286	return res != 0;
287}
288
289/**
290 * @brief Save a HDR image using STBImageWrite to provide the store routine.
291 *
292 * @param img        The source data for the image.
293 * @param filename   The name of the file to save.
294 * @param y_flip     Should the image be vertically flipped?
295 *
296 * @return @c true if the image saved OK, @c false on error.
297 */
298static bool store_hdr_image_with_stb(
299	const astcenc_image* img,
300	const char* filename,
301	int y_flip
302) {
303	int res { 0 };
304
305	for (unsigned int i = 0; i < img->dim_z; i++)
306	{
307		std::string fnmod = get_output_filename(img, filename, i);
308		float* buf = floatx4_array_from_astc_img(img, y_flip, i);
309
310		res = stbi_write_hdr(fnmod.c_str(), img->dim_x, img->dim_y, 4, buf);
311		delete[] buf;
312		if (res == 0)
313		{
314			break;
315		}
316	}
317
318	return res != 0;
319}
320
321/* ============================================================================
322Native Load and store of KTX and DDS file formats.
323
324Unlike "regular" 2D image formats, which are mostly supported through stb_image
325and tinyexr, these formats are supported directly; this involves a relatively
326large number of pixel formats.
327
328The following restrictions apply to loading of these file formats:
329
330    * Only uncompressed data supported
331    * Only first mipmap in mipmap pyramid supported
332    * KTX: Cube-map arrays are not supported
333============================================================================ */
334enum scanline_transfer
335{
336	R8_TO_RGBA8,
337	RG8_TO_RGBA8,
338	RGB8_TO_RGBA8,
339	RGBA8_TO_RGBA8,
340	BGR8_TO_RGBA8,
341	BGRA8_TO_RGBA8,
342	L8_TO_RGBA8,
343	LA8_TO_RGBA8,
344
345	RGBX8_TO_RGBA8,
346	BGRX8_TO_RGBA8,
347
348	R16_TO_RGBA16F,
349	RG16_TO_RGBA16F,
350	RGB16_TO_RGBA16F,
351	RGBA16_TO_RGBA16F,
352	BGR16_TO_RGBA16F,
353	BGRA16_TO_RGBA16F,
354	L16_TO_RGBA16F,
355	LA16_TO_RGBA16F,
356
357	R16F_TO_RGBA16F,
358	RG16F_TO_RGBA16F,
359	RGB16F_TO_RGBA16F,
360	RGBA16F_TO_RGBA16F,
361	BGR16F_TO_RGBA16F,
362	BGRA16F_TO_RGBA16F,
363	L16F_TO_RGBA16F,
364	LA16F_TO_RGBA16F,
365
366	R32F_TO_RGBA16F,
367	RG32F_TO_RGBA16F,
368	RGB32F_TO_RGBA16F,
369	RGBA32F_TO_RGBA16F,
370	BGR32F_TO_RGBA16F,
371	BGRA32F_TO_RGBA16F,
372	L32F_TO_RGBA16F,
373	LA32F_TO_RGBA16F
374};
375
376/**
377 * @brief Copy a scanline from a source file and expand to a canonical format.
378 *
379 * Outputs are always 4 component RGBA, stored as U8 (LDR) or FP16 (HDR).
380 *
381 * @param[out] dst           The start of the line to store to.
382 * @param      src           The start of the line to load.
383 * @param      pixel_count   The number of pixels in the scanline.
384 * @param      method        The conversion function.
385 */
386static void copy_scanline(
387	void* dst,
388	const void* src,
389	int pixel_count,
390	scanline_transfer method
391) {
392
393#define id(x) (x)
394#define u16_sf16(x) float_to_float16(x * (1.0f/65535.0f))
395#define f32_sf16(x) float_to_float16(x)
396
397#define COPY_R(dsttype, srctype, convfunc, oneval) \
398	do { \
399		const srctype* s = reinterpret_cast<const srctype*>(src); \
400		dsttype* d = reinterpret_cast<dsttype*>(dst); \
401		for (int i = 0; i < pixel_count; i++) \
402		{ \
403			d[4 * i    ] = convfunc(s[i]); \
404			d[4 * i + 1] = 0;              \
405			d[4 * i + 2] = 0;              \
406			d[4 * i + 3] = oneval;         \
407		} \
408	} while (0); \
409	break
410
411#define COPY_RG(dsttype, srctype, convfunc, oneval) \
412	do { \
413		const srctype* s = reinterpret_cast<const srctype*>(src); \
414		dsttype* d = reinterpret_cast<dsttype*>(dst); \
415		for (int i = 0; i < pixel_count; i++) \
416		{ \
417			d[4 * i    ] = convfunc(s[2 * i    ]); \
418			d[4 * i + 1] = convfunc(s[2 * i + 1]); \
419			d[4 * i + 2] = 0;                      \
420			d[4 * i + 3] = oneval;                 \
421		} \
422	} while (0); \
423	break
424
425#define COPY_RGB(dsttype, srctype, convfunc, oneval) \
426	do { \
427		const srctype* s = reinterpret_cast<const srctype*>(src); \
428		dsttype* d = reinterpret_cast<dsttype*>(dst); \
429		for (int i = 0; i < pixel_count; i++) \
430		{ \
431			d[4 * i    ] = convfunc(s[3 * i    ]); \
432			d[4 * i + 1] = convfunc(s[3 * i + 1]); \
433			d[4 * i + 2] = convfunc(s[3 * i + 2]); \
434			d[4 * i + 3] = oneval;                 \
435		} \
436	} while (0); \
437	break
438
439#define COPY_BGR(dsttype, srctype, convfunc, oneval) \
440	do { \
441		const srctype* s = reinterpret_cast<const srctype*>(src); \
442		dsttype* d = reinterpret_cast<dsttype*>(dst); \
443		for (int i = 0; i < pixel_count; i++)\
444		{ \
445			d[4 * i    ] = convfunc(s[3 * i + 2]); \
446			d[4 * i + 1] = convfunc(s[3 * i + 1]); \
447			d[4 * i + 2] = convfunc(s[3 * i    ]); \
448			d[4 * i + 3] = oneval;                 \
449		} \
450	} while (0); \
451	break
452
453#define COPY_RGBX(dsttype, srctype, convfunc, oneval) \
454	do { \
455		const srctype* s = reinterpret_cast<const srctype*>(src); \
456		dsttype* d = reinterpret_cast<dsttype*>(dst); \
457		for (int i = 0; i < pixel_count; i++)\
458		{ \
459			d[4 * i    ] = convfunc(s[4 * i    ]); \
460			d[4 * i + 1] = convfunc(s[4 * i + 1]); \
461			d[4 * i + 2] = convfunc(s[4 * i + 2]); \
462			d[4 * i + 3] = oneval;                 \
463		} \
464	} while (0); \
465	break
466
467#define COPY_BGRX(dsttype, srctype, convfunc, oneval) \
468	do { \
469		const srctype* s = reinterpret_cast<const srctype*>(src); \
470		dsttype* d = reinterpret_cast<dsttype*>(dst); \
471		for (int i = 0; i < pixel_count; i++)\
472		{ \
473			d[4 * i    ] = convfunc(s[4 * i + 2]); \
474			d[4 * i + 1] = convfunc(s[4 * i + 1]); \
475			d[4 * i + 2] = convfunc(s[4 * i    ]); \
476			d[4 * i + 3] = oneval;                 \
477		} \
478	} while (0); \
479	break
480
481#define COPY_RGBA(dsttype, srctype, convfunc, oneval) \
482	do { \
483		const srctype* s = reinterpret_cast<const srctype*>(src); \
484		dsttype* d = reinterpret_cast<dsttype*>(dst); \
485		for (int i = 0; i < pixel_count; i++) \
486		{ \
487			d[4 * i    ] = convfunc(s[4 * i    ]); \
488			d[4 * i + 1] = convfunc(s[4 * i + 1]); \
489			d[4 * i + 2] = convfunc(s[4 * i + 2]); \
490			d[4 * i + 3] = convfunc(s[4 * i + 3]); \
491		} \
492	} while (0); \
493	break
494
495#define COPY_BGRA(dsttype, srctype, convfunc, oneval) \
496	do { \
497		const srctype* s = reinterpret_cast<const srctype*>(src); \
498		dsttype* d = reinterpret_cast<dsttype*>(dst); \
499		for (int i = 0; i < pixel_count; i++) \
500		{ \
501			d[4 * i    ] = convfunc(s[4 * i + 2]); \
502			d[4 * i + 1] = convfunc(s[4 * i + 1]); \
503			d[4 * i + 2] = convfunc(s[4 * i    ]); \
504			d[4 * i + 3] = convfunc(s[4 * i + 3]); \
505		} \
506	} while (0); \
507	break
508
509#define COPY_L(dsttype, srctype, convfunc, oneval) \
510	do { \
511		const srctype* s = reinterpret_cast<const srctype*>(src); \
512		dsttype* d = reinterpret_cast<dsttype*>(dst); \
513		for (int i = 0; i < pixel_count; i++) \
514		{ \
515			d[4 * i    ] = convfunc(s[i]); \
516			d[4 * i + 1] = convfunc(s[i]); \
517			d[4 * i + 2] = convfunc(s[i]); \
518			d[4 * i + 3] = oneval;         \
519		} \
520	} while (0); \
521	break
522
523#define COPY_LA(dsttype, srctype, convfunc, oneval) \
524	do { \
525		const srctype* s = reinterpret_cast<const srctype*>(src); \
526		dsttype* d = reinterpret_cast<dsttype*>(dst); \
527		for (int i = 0; i < pixel_count; i++) \
528		{ \
529			d[4 * i    ] = convfunc(s[2 * i    ]); \
530			d[4 * i + 1] = convfunc(s[2 * i    ]); \
531			d[4 * i + 2] = convfunc(s[2 * i    ]); \
532			d[4 * i + 3] = convfunc(s[2 * i + 1]); \
533		} \
534	} while (0); \
535	break
536
537	switch (method)
538	{
539	case R8_TO_RGBA8:
540		COPY_R(uint8_t, uint8_t, id, 0xFF);
541	case RG8_TO_RGBA8:
542		COPY_RG(uint8_t, uint8_t, id, 0xFF);
543	case RGB8_TO_RGBA8:
544		COPY_RGB(uint8_t, uint8_t, id, 0xFF);
545	case RGBA8_TO_RGBA8:
546		COPY_RGBA(uint8_t, uint8_t, id, 0xFF);
547	case BGR8_TO_RGBA8:
548		COPY_BGR(uint8_t, uint8_t, id, 0xFF);
549	case BGRA8_TO_RGBA8:
550		COPY_BGRA(uint8_t, uint8_t, id, 0xFF);
551	case RGBX8_TO_RGBA8:
552		COPY_RGBX(uint8_t, uint8_t, id, 0xFF);
553	case BGRX8_TO_RGBA8:
554		COPY_BGRX(uint8_t, uint8_t, id, 0xFF);
555	case L8_TO_RGBA8:
556		COPY_L(uint8_t, uint8_t, id, 0xFF);
557	case LA8_TO_RGBA8:
558		COPY_LA(uint8_t, uint8_t, id, 0xFF);
559
560	case R16F_TO_RGBA16F:
561		COPY_R(uint16_t, uint16_t, id, 0x3C00);
562	case RG16F_TO_RGBA16F:
563		COPY_RG(uint16_t, uint16_t, id, 0x3C00);
564	case RGB16F_TO_RGBA16F:
565		COPY_RGB(uint16_t, uint16_t, id, 0x3C00);
566	case RGBA16F_TO_RGBA16F:
567		COPY_RGBA(uint16_t, uint16_t, id, 0x3C00);
568	case BGR16F_TO_RGBA16F:
569		COPY_BGR(uint16_t, uint16_t, id, 0x3C00);
570	case BGRA16F_TO_RGBA16F:
571		COPY_BGRA(uint16_t, uint16_t, id, 0x3C00);
572	case L16F_TO_RGBA16F:
573		COPY_L(uint16_t, uint16_t, id, 0x3C00);
574	case LA16F_TO_RGBA16F:
575		COPY_LA(uint16_t, uint16_t, id, 0x3C00);
576
577	case R16_TO_RGBA16F:
578		COPY_R(uint16_t, uint16_t, u16_sf16, 0x3C00);
579	case RG16_TO_RGBA16F:
580		COPY_RG(uint16_t, uint16_t, u16_sf16, 0x3C00);
581	case RGB16_TO_RGBA16F:
582		COPY_RGB(uint16_t, uint16_t, u16_sf16, 0x3C00);
583	case RGBA16_TO_RGBA16F:
584		COPY_RGBA(uint16_t, uint16_t, u16_sf16, 0x3C00);
585	case BGR16_TO_RGBA16F:
586		COPY_BGR(uint16_t, uint16_t, u16_sf16, 0x3C00);
587	case BGRA16_TO_RGBA16F:
588		COPY_BGRA(uint16_t, uint16_t, u16_sf16, 0x3C00);
589	case L16_TO_RGBA16F:
590		COPY_L(uint16_t, uint16_t, u16_sf16, 0x3C00);
591	case LA16_TO_RGBA16F:
592		COPY_LA(uint16_t, uint16_t, u16_sf16, 0x3C00);
593
594	case R32F_TO_RGBA16F:
595		COPY_R(uint16_t, float, f32_sf16, 0x3C00);
596	case RG32F_TO_RGBA16F:
597		COPY_RG(uint16_t, float, f32_sf16, 0x3C00);
598	case RGB32F_TO_RGBA16F:
599		COPY_RGB(uint16_t, float, f32_sf16, 0x3C00);
600	case RGBA32F_TO_RGBA16F:
601		COPY_RGBA(uint16_t, float, f32_sf16, 0x3C00);
602	case BGR32F_TO_RGBA16F:
603		COPY_BGR(uint16_t, float, f32_sf16, 0x3C00);
604	case BGRA32F_TO_RGBA16F:
605		COPY_BGRA(uint16_t, float, f32_sf16, 0x3C00);
606	case L32F_TO_RGBA16F:
607		COPY_L(uint16_t, float, f32_sf16, 0x3C00);
608	case LA32F_TO_RGBA16F:
609		COPY_LA(uint16_t, float, f32_sf16, 0x3C00);
610	}
611}
612
613/**
614 * @brief Swap endianness of N two byte values.
615 *
616 * @param[in,out] dataptr      The data to convert.
617 * @param         byte_count   The number of bytes to convert.
618 */
619static void switch_endianness2(
620	void* dataptr,
621	int byte_count
622) {
623	uint8_t* data = reinterpret_cast<uint8_t*>(dataptr);
624	for (int i = 0; i < byte_count / 2; i++)
625	{
626		uint8_t d0 = data[0];
627		uint8_t d1 = data[1];
628		data[0] = d1;
629		data[1] = d0;
630		data += 2;
631	}
632}
633
634/**
635 * @brief Swap endianness of N four byte values.
636 *
637 * @param[in,out] dataptr      The data to convert.
638 * @param         byte_count   The number of bytes to convert.
639 */
640static void switch_endianness4(
641	void* dataptr,
642	int byte_count
643) {
644	uint8_t* data = reinterpret_cast<uint8_t*>(dataptr);
645	for (int i = 0; i < byte_count / 4; i++)
646	{
647		uint8_t d0 = data[0];
648		uint8_t d1 = data[1];
649		uint8_t d2 = data[2];
650		uint8_t d3 = data[3];
651		data[0] = d3;
652		data[1] = d2;
653		data[2] = d1;
654		data[3] = d0;
655		data += 4;
656	}
657}
658
659/**
660 * @brief Swap endianness of a u32 value.
661 *
662 * @param v   The data to convert.
663 *
664 * @return The converted value.
665 */
666static uint32_t u32_byterev(uint32_t v)
667{
668	return (v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24);
669}
670
671/*
672 Notes about KTX:
673
674 After the header and the key/value data area, the actual image data follows.
675 Each image starts with a 4-byte "imageSize" value indicating the number of bytes of image data follow.
676 (For cube-maps, this value appears only after first image; the remaining 5 images are all of equal size.)
677 If the size of an image is not a multiple of 4, then it is padded to the next multiple of 4.
678 Note that this padding is NOT included in the "imageSize" field.
679 In a cubemap, the padding appears after each face note that in a 2D/3D texture, padding does
680 NOT appear between the lines/planes of the texture!
681
682 In a KTX file, there may be multiple images; they are organized as follows:
683
684 For each mipmap_level in numberOfMipmapLevels
685 	UInt32 imageSize;
686 	For each array_element in numberOfArrayElements
687 	* for each face in numberOfFaces
688 		* for each z_slice in pixelDepth
689 			* for each row or row_of_blocks in pixelHeight
690 				* for each pixel or block_of_pixels in pixelWidth
691 					Byte data[format-specific-number-of-bytes]
692 				* end
693 			* end
694 		*end
695 		Byte cubePadding[0-3]
696 	*end
697 	Byte mipPadding[3 - ((imageSize+ 3) % 4)]
698 *end
699
700 In the ASTC codec, we will, for the time being only harvest the first image,
701 and we will support only a limited set of formats:
702
703 gl_type: UNSIGNED_BYTE UNSIGNED_SHORT HALF_FLOAT FLOAT UNSIGNED_INT_8_8_8_8 UNSIGNED_INT_8_8_8_8_REV
704 gl_format: RED, RG. RGB, RGBA BGR, BGRA
705 gl_internal_format: used for upload to OpenGL; we can ignore it on uncompressed-load, but
706 	need to provide a reasonable value on store: RGB8 RGBA8 RGB16F RGBA16F
707 gl_base_internal_format: same as gl_format unless texture is compressed (well, BGR is turned into RGB)
708 	RED, RG, RGB, RGBA
709*/
710
711// Khronos enums
712#define GL_RED                                      0x1903
713#define GL_RG                                       0x8227
714#define GL_RGB                                      0x1907
715#define GL_RGBA                                     0x1908
716#define GL_BGR                                      0x80E0
717#define GL_BGRA                                     0x80E1
718#define GL_LUMINANCE                                0x1909
719#define GL_LUMINANCE_ALPHA                          0x190A
720
721#define GL_R8                                       0x8229
722#define GL_RG8                                      0x822B
723#define GL_RGB8                                     0x8051
724#define GL_RGBA8                                    0x8058
725
726#define GL_R16F                                     0x822D
727#define GL_RG16F                                    0x822F
728#define GL_RGB16F                                   0x881B
729#define GL_RGBA16F                                  0x881A
730
731#define GL_UNSIGNED_BYTE                            0x1401
732#define GL_UNSIGNED_SHORT                           0x1403
733#define GL_HALF_FLOAT                               0x140B
734#define GL_FLOAT                                    0x1406
735
736#define GL_COMPRESSED_RGBA_ASTC_4x4                 0x93B0
737#define GL_COMPRESSED_RGBA_ASTC_5x4                 0x93B1
738#define GL_COMPRESSED_RGBA_ASTC_5x5                 0x93B2
739#define GL_COMPRESSED_RGBA_ASTC_6x5                 0x93B3
740#define GL_COMPRESSED_RGBA_ASTC_6x6                 0x93B4
741#define GL_COMPRESSED_RGBA_ASTC_8x5                 0x93B5
742#define GL_COMPRESSED_RGBA_ASTC_8x6                 0x93B6
743#define GL_COMPRESSED_RGBA_ASTC_8x8                 0x93B7
744#define GL_COMPRESSED_RGBA_ASTC_10x5                0x93B8
745#define GL_COMPRESSED_RGBA_ASTC_10x6                0x93B9
746#define GL_COMPRESSED_RGBA_ASTC_10x8                0x93BA
747#define GL_COMPRESSED_RGBA_ASTC_10x10               0x93BB
748#define GL_COMPRESSED_RGBA_ASTC_12x10               0x93BC
749#define GL_COMPRESSED_RGBA_ASTC_12x12               0x93BD
750
751#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4         0x93D0
752#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4         0x93D1
753#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5         0x93D2
754#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5         0x93D3
755#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6         0x93D4
756#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5         0x93D5
757#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6         0x93D6
758#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8         0x93D7
759#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5        0x93D8
760#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6        0x93D9
761#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8        0x93DA
762#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10       0x93DB
763#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10       0x93DC
764#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12       0x93DD
765
766#define GL_COMPRESSED_RGBA_ASTC_3x3x3_OES           0x93C0
767#define GL_COMPRESSED_RGBA_ASTC_4x3x3_OES           0x93C1
768#define GL_COMPRESSED_RGBA_ASTC_4x4x3_OES           0x93C2
769#define GL_COMPRESSED_RGBA_ASTC_4x4x4_OES           0x93C3
770#define GL_COMPRESSED_RGBA_ASTC_5x4x4_OES           0x93C4
771#define GL_COMPRESSED_RGBA_ASTC_5x5x4_OES           0x93C5
772#define GL_COMPRESSED_RGBA_ASTC_5x5x5_OES           0x93C6
773#define GL_COMPRESSED_RGBA_ASTC_6x5x5_OES           0x93C7
774#define GL_COMPRESSED_RGBA_ASTC_6x6x5_OES           0x93C8
775#define GL_COMPRESSED_RGBA_ASTC_6x6x6_OES           0x93C9
776
777#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_3x3x3_OES   0x93E0
778#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x3x3_OES   0x93E1
779#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4x3_OES   0x93E2
780#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4x4_OES   0x93E3
781#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4x4_OES   0x93E4
782#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5x4_OES   0x93E5
783#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5x5_OES   0x93E6
784#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5x5_OES   0x93E7
785#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6x5_OES   0x93E8
786#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6x6_OES   0x93E9
787
788struct format_entry
789{
790	unsigned int x;
791	unsigned int y;
792	unsigned int z;
793	bool is_srgb;
794	unsigned int format;
795};
796
797static const std::array<format_entry, 48> ASTC_FORMATS =
798{{
799	// 2D Linear RGB
800	{ 4,  4,  1, false, GL_COMPRESSED_RGBA_ASTC_4x4},
801	{ 5,  4,  1, false, GL_COMPRESSED_RGBA_ASTC_5x4},
802	{ 5,  5,  1, false, GL_COMPRESSED_RGBA_ASTC_5x5},
803	{ 6,  5,  1, false, GL_COMPRESSED_RGBA_ASTC_6x5},
804	{ 6,  6,  1, false, GL_COMPRESSED_RGBA_ASTC_6x6},
805	{ 8,  5,  1, false, GL_COMPRESSED_RGBA_ASTC_8x5},
806	{ 8,  6,  1, false, GL_COMPRESSED_RGBA_ASTC_8x6},
807	{ 8,  8,  1, false, GL_COMPRESSED_RGBA_ASTC_8x8},
808	{10,  5,  1, false, GL_COMPRESSED_RGBA_ASTC_10x5},
809	{10,  6,  1, false, GL_COMPRESSED_RGBA_ASTC_10x6},
810	{10,  8,  1, false, GL_COMPRESSED_RGBA_ASTC_10x8},
811	{10, 10,  1, false, GL_COMPRESSED_RGBA_ASTC_10x10},
812	{12, 10,  1, false, GL_COMPRESSED_RGBA_ASTC_12x10},
813	{12, 12,  1, false, GL_COMPRESSED_RGBA_ASTC_12x12},
814	// 2D SRGB
815	{ 4,  4,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4},
816	{ 5,  4,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4},
817	{ 5,  5,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5},
818	{ 6,  5,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5},
819	{ 6,  6,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6},
820	{ 8,  5,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5},
821	{ 8,  6,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6},
822	{ 8,  8,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8},
823	{10,  5,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5},
824	{10,  6,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6},
825	{10,  8,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8},
826	{10, 10,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10},
827	{12, 10,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10},
828	{12, 12,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12},
829	// 3D Linear RGB
830	{ 3,  3,  3, false, GL_COMPRESSED_RGBA_ASTC_3x3x3_OES},
831	{ 4,  3,  3, false, GL_COMPRESSED_RGBA_ASTC_4x3x3_OES},
832	{ 4,  4,  3, false, GL_COMPRESSED_RGBA_ASTC_4x4x3_OES},
833	{ 4,  4,  4, false, GL_COMPRESSED_RGBA_ASTC_4x4x4_OES},
834	{ 5,  4,  4, false, GL_COMPRESSED_RGBA_ASTC_5x4x4_OES},
835	{ 5,  5,  4, false, GL_COMPRESSED_RGBA_ASTC_5x5x4_OES},
836	{ 5,  5,  5, false, GL_COMPRESSED_RGBA_ASTC_5x5x5_OES},
837	{ 6,  5,  5, false, GL_COMPRESSED_RGBA_ASTC_6x5x5_OES},
838	{ 6,  6,  5, false, GL_COMPRESSED_RGBA_ASTC_6x6x5_OES},
839	{ 6,  6,  6, false, GL_COMPRESSED_RGBA_ASTC_6x6x6_OES},
840	// 3D SRGB
841	{ 3,  3,  3,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_3x3x3_OES},
842	{ 4,  3,  3,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x3x3_OES},
843	{ 4,  4,  3,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4x3_OES},
844	{ 4,  4,  4,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4x4_OES},
845	{ 5,  4,  4,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4x4_OES},
846	{ 5,  5,  4,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5x4_OES},
847	{ 5,  5,  5,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5x5_OES},
848	{ 6,  5,  5,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5x5_OES},
849	{ 6,  6,  5,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6x5_OES},
850	{ 6,  6,  6,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6x6_OES}
851}};
852
853static const format_entry* get_format(
854	unsigned int format
855) {
856	for (auto& it : ASTC_FORMATS)
857	{
858		if (it.format == format)
859		{
860			return &it;
861		}
862	}
863	return nullptr;
864}
865
866static unsigned int get_format(
867	unsigned int x,
868	unsigned int y,
869	unsigned int z,
870	bool is_srgb
871) {
872	for (auto& it : ASTC_FORMATS)
873	{
874		if ((it.x == x) && (it.y == y) && (it.z == z) && (it.is_srgb == is_srgb))
875		{
876			return it.format;
877		}
878	}
879	return 0;
880}
881
882struct ktx_header
883{
884	uint8_t magic[12];
885	uint32_t endianness;				// should be 0x04030201; if it is instead 0x01020304, then the endianness of everything must be switched.
886	uint32_t gl_type;					// 0 for compressed textures, otherwise value from table 3.2 (page 162) of OpenGL 4.0 spec
887	uint32_t gl_type_size;				// size of data elements to do endianness swap on (1=endian-neutral data)
888	uint32_t gl_format;					// 0 for compressed textures, otherwise value from table 3.3 (page 163) of OpenGL spec
889	uint32_t gl_internal_format;		// sized-internal-format, corresponding to table 3.12 to 3.14 (pages 182-185) of OpenGL spec
890	uint32_t gl_base_internal_format;	// unsized-internal-format: corresponding to table 3.11 (page 179) of OpenGL spec
891	uint32_t pixel_width;				// texture dimensions; not rounded up to block size for compressed.
892	uint32_t pixel_height;				// must be 0 for 1D textures.
893	uint32_t pixel_depth;				// must be 0 for 1D, 2D and cubemap textures.
894	uint32_t number_of_array_elements;	// 0 if not a texture array
895	uint32_t number_of_faces;			// 6 for cubemaps, 1 for non-cubemaps
896	uint32_t number_of_mipmap_levels;	// 0 or 1 for non-mipmapped textures; 0 indicates that auto-mipmap-gen should be done at load time.
897	uint32_t bytes_of_key_value_data;	// size in bytes of the key-and-value area immediately following the header.
898};
899
900// Magic 12-byte sequence that must appear at the beginning of every KTX file.
901static uint8_t ktx_magic[12] {
902	0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A
903};
904
905static void ktx_header_switch_endianness(ktx_header * kt)
906{
907	#define REV(x) kt->x = u32_byterev(kt->x)
908	REV(endianness);
909	REV(gl_type);
910	REV(gl_type_size);
911	REV(gl_format);
912	REV(gl_internal_format);
913	REV(gl_base_internal_format);
914	REV(pixel_width);
915	REV(pixel_height);
916	REV(pixel_depth);
917	REV(number_of_array_elements);
918	REV(number_of_faces);
919	REV(number_of_mipmap_levels);
920	REV(bytes_of_key_value_data);
921	#undef REV
922}
923
924/**
925 * @brief Load an uncompressed KTX image using the local custom loader.
926 *
927 * @param      filename          The name of the file to load.
928 * @param      y_flip            Should the image be vertically flipped?
929 * @param[out] is_hdr            Is this an HDR image load?
930 * @param[out] component_count   The number of components in the data.
931 *
932 * @return The loaded image data in a canonical 4 channel format, or @c nullptr on error.
933 */
934static astcenc_image* load_ktx_uncompressed_image(
935	const char* filename,
936	bool y_flip,
937	bool& is_hdr,
938	unsigned int& component_count
939) {
940	FILE *f = fopen(filename, "rb");
941	if (!f)
942	{
943		printf("Failed to open file %s\n", filename);
944		return nullptr;
945	}
946
947	ktx_header hdr;
948	size_t header_bytes_read = fread(&hdr, 1, sizeof(hdr), f);
949
950	if (header_bytes_read != sizeof(hdr))
951	{
952		printf("Failed to read header of KTX file %s\n", filename);
953		fclose(f);
954		return nullptr;
955	}
956
957	if (memcmp(hdr.magic, ktx_magic, 12) != 0 || (hdr.endianness != 0x04030201 && hdr.endianness != 0x01020304))
958	{
959		printf("File %s does not have a valid KTX header\n", filename);
960		fclose(f);
961		return nullptr;
962	}
963
964	int switch_endianness = 0;
965	if (hdr.endianness == 0x01020304)
966	{
967		ktx_header_switch_endianness(&hdr);
968		switch_endianness = 1;
969	}
970
971	if (hdr.gl_type == 0 || hdr.gl_format == 0)
972	{
973		printf("File %s appears to be compressed, not supported as input\n", filename);
974		fclose(f);
975		return nullptr;
976	}
977
978	// the formats we support are:
979
980	// Cartesian product of gl_type=(UNSIGNED_BYTE, UNSIGNED_SHORT, HALF_FLOAT, FLOAT) x gl_format=(RED, RG, RGB, RGBA, BGR, BGRA)
981
982	int components;
983	switch (hdr.gl_format)
984	{
985	case GL_RED:
986		components = 1;
987		break;
988	case GL_RG:
989		components = 2;
990		break;
991	case GL_RGB:
992		components = 3;
993		break;
994	case GL_RGBA:
995		components = 4;
996		break;
997	case GL_BGR:
998		components = 3;
999		break;
1000	case GL_BGRA:
1001		components = 4;
1002		break;
1003	case GL_LUMINANCE:
1004		components = 1;
1005		break;
1006	case GL_LUMINANCE_ALPHA:
1007		components = 2;
1008		break;
1009	default:
1010		printf("KTX file %s has unsupported GL type\n", filename);
1011		fclose(f);
1012		return nullptr;
1013	}
1014
1015	// Although these are set up later, use default initializer to remove warnings
1016	int bitness = 8;              // Internal precision after conversion
1017	int bytes_per_component = 1;  // Bytes per component in the KTX file
1018	scanline_transfer copy_method = R8_TO_RGBA8;
1019
1020	switch (hdr.gl_type)
1021	{
1022	case GL_UNSIGNED_BYTE:
1023		{
1024			bitness = 8;
1025			bytes_per_component = 1;
1026			switch (hdr.gl_format)
1027			{
1028			case GL_RED:
1029				copy_method = R8_TO_RGBA8;
1030				break;
1031			case GL_RG:
1032				copy_method = RG8_TO_RGBA8;
1033				break;
1034			case GL_RGB:
1035				copy_method = RGB8_TO_RGBA8;
1036				break;
1037			case GL_RGBA:
1038				copy_method = RGBA8_TO_RGBA8;
1039				break;
1040			case GL_BGR:
1041				copy_method = BGR8_TO_RGBA8;
1042				break;
1043			case GL_BGRA:
1044				copy_method = BGRA8_TO_RGBA8;
1045				break;
1046			case GL_LUMINANCE:
1047				copy_method = L8_TO_RGBA8;
1048				break;
1049			case GL_LUMINANCE_ALPHA:
1050				copy_method = LA8_TO_RGBA8;
1051				break;
1052			}
1053			break;
1054		}
1055	case GL_UNSIGNED_SHORT:
1056		{
1057			bitness = 16;
1058			bytes_per_component = 2;
1059			switch (hdr.gl_format)
1060			{
1061			case GL_RED:
1062				copy_method = R16_TO_RGBA16F;
1063				break;
1064			case GL_RG:
1065				copy_method = RG16_TO_RGBA16F;
1066				break;
1067			case GL_RGB:
1068				copy_method = RGB16_TO_RGBA16F;
1069				break;
1070			case GL_RGBA:
1071				copy_method = RGBA16_TO_RGBA16F;
1072				break;
1073			case GL_BGR:
1074				copy_method = BGR16_TO_RGBA16F;
1075				break;
1076			case GL_BGRA:
1077				copy_method = BGRA16_TO_RGBA16F;
1078				break;
1079			case GL_LUMINANCE:
1080				copy_method = L16_TO_RGBA16F;
1081				break;
1082			case GL_LUMINANCE_ALPHA:
1083				copy_method = LA16_TO_RGBA16F;
1084				break;
1085			}
1086			break;
1087		}
1088	case GL_HALF_FLOAT:
1089		{
1090			bitness = 16;
1091			bytes_per_component = 2;
1092			switch (hdr.gl_format)
1093			{
1094			case GL_RED:
1095				copy_method = R16F_TO_RGBA16F;
1096				break;
1097			case GL_RG:
1098				copy_method = RG16F_TO_RGBA16F;
1099				break;
1100			case GL_RGB:
1101				copy_method = RGB16F_TO_RGBA16F;
1102				break;
1103			case GL_RGBA:
1104				copy_method = RGBA16F_TO_RGBA16F;
1105				break;
1106			case GL_BGR:
1107				copy_method = BGR16F_TO_RGBA16F;
1108				break;
1109			case GL_BGRA:
1110				copy_method = BGRA16F_TO_RGBA16F;
1111				break;
1112			case GL_LUMINANCE:
1113				copy_method = L16F_TO_RGBA16F;
1114				break;
1115			case GL_LUMINANCE_ALPHA:
1116				copy_method = LA16F_TO_RGBA16F;
1117				break;
1118			}
1119			break;
1120		}
1121	case GL_FLOAT:
1122		{
1123			bitness = 16;
1124			bytes_per_component = 4;
1125			switch (hdr.gl_format)
1126			{
1127			case GL_RED:
1128				copy_method = R32F_TO_RGBA16F;
1129				break;
1130			case GL_RG:
1131				copy_method = RG32F_TO_RGBA16F;
1132				break;
1133			case GL_RGB:
1134				copy_method = RGB32F_TO_RGBA16F;
1135				break;
1136			case GL_RGBA:
1137				copy_method = RGBA32F_TO_RGBA16F;
1138				break;
1139			case GL_BGR:
1140				copy_method = BGR32F_TO_RGBA16F;
1141				break;
1142			case GL_BGRA:
1143				copy_method = BGRA32F_TO_RGBA16F;
1144				break;
1145			case GL_LUMINANCE:
1146				copy_method = L32F_TO_RGBA16F;
1147				break;
1148			case GL_LUMINANCE_ALPHA:
1149				copy_method = LA32F_TO_RGBA16F;
1150				break;
1151			}
1152			break;
1153		}
1154	default:
1155		printf("KTX file %s has unsupported GL format\n", filename);
1156		fclose(f);
1157		return nullptr;
1158	}
1159
1160	if (hdr.number_of_mipmap_levels > 1)
1161	{
1162		printf("WARNING: KTX file %s has %d mipmap levels; only the first one will be encoded.\n", filename, hdr.number_of_mipmap_levels);
1163	}
1164
1165	if (hdr.number_of_array_elements > 1)
1166	{
1167		printf("WARNING: KTX file %s contains a texture array with %d layers; only the first one will be encoded.\n", filename, hdr.number_of_array_elements);
1168	}
1169
1170	if (hdr.number_of_faces > 1)
1171	{
1172		printf("WARNING: KTX file %s contains a cubemap with 6 faces; only the first one will be encoded.\n", filename);
1173	}
1174
1175
1176	unsigned int dim_x = hdr.pixel_width;
1177	unsigned int dim_y = astc::max(hdr.pixel_height, 1u);
1178	unsigned int dim_z = astc::max(hdr.pixel_depth, 1u);
1179
1180	// ignore the key/value data
1181	fseek(f, hdr.bytes_of_key_value_data, SEEK_CUR);
1182
1183	uint32_t specified_bytes_of_surface = 0;
1184	size_t sb_read = fread(&specified_bytes_of_surface, 1, 4, f);
1185	if (sb_read != 4)
1186	{
1187		printf("Failed to read header of KTX file %s\n", filename);
1188		fclose(f);
1189		return nullptr;
1190	}
1191
1192	if (switch_endianness)
1193	{
1194		specified_bytes_of_surface = u32_byterev(specified_bytes_of_surface);
1195	}
1196
1197	// read the surface
1198	uint32_t xstride = bytes_per_component * components * dim_x;
1199	uint32_t ystride = xstride * dim_y;
1200	uint32_t computed_bytes_of_surface = dim_z * ystride;
1201	if (computed_bytes_of_surface != specified_bytes_of_surface)
1202	{
1203		fclose(f);
1204		printf("%s: KTX file inconsistency: computed surface size is %d bytes, but specified size is %d bytes\n", filename, computed_bytes_of_surface, specified_bytes_of_surface);
1205		return nullptr;
1206	}
1207
1208	uint8_t *buf = new uint8_t[specified_bytes_of_surface];
1209	size_t bytes_read = fread(buf, 1, specified_bytes_of_surface, f);
1210	fclose(f);
1211	if (bytes_read != specified_bytes_of_surface)
1212	{
1213		delete[] buf;
1214		printf("Failed to read file %s\n", filename);
1215		return nullptr;
1216	}
1217
1218	// perform an endianness swap on the surface if needed.
1219	if (switch_endianness)
1220	{
1221		if (hdr.gl_type_size == 2)
1222		{
1223			switch_endianness2(buf, specified_bytes_of_surface);
1224		}
1225
1226		if (hdr.gl_type_size == 4)
1227		{
1228			switch_endianness4(buf, specified_bytes_of_surface);
1229		}
1230	}
1231
1232	// Transfer data from the surface to our own image data structure
1233	astcenc_image *astc_img = alloc_image(bitness, dim_x, dim_y, dim_z);
1234
1235	for (unsigned int z = 0; z < dim_z; z++)
1236	{
1237		for (unsigned int y = 0; y < dim_y; y++)
1238		{
1239			unsigned int ymod = y_flip ? dim_y - y - 1 : y;
1240			unsigned int ydst = ymod;
1241			void *dst;
1242
1243			if (astc_img->data_type == ASTCENC_TYPE_U8)
1244			{
1245				uint8_t* data8 = static_cast<uint8_t*>(astc_img->data[z]);
1246				dst = static_cast<void*>(&data8[4 * dim_x * ydst]);
1247			}
1248			else // if (astc_img->data_type == ASTCENC_TYPE_F16)
1249			{
1250				assert(astc_img->data_type == ASTCENC_TYPE_F16);
1251				uint16_t* data16 = static_cast<uint16_t*>(astc_img->data[z]);
1252				dst = static_cast<void*>(&data16[4 * dim_x * ydst]);
1253			}
1254
1255			uint8_t *src = buf + (z * ystride) + (y * xstride);
1256			copy_scanline(dst, src, dim_x, copy_method);
1257		}
1258	}
1259
1260	delete[] buf;
1261	is_hdr = bitness >= 16;
1262	component_count = components;
1263	return astc_img;
1264}
1265
1266/**
1267 * @brief Load a KTX compressed image using the local custom loader.
1268 *
1269 * @param      filename          The name of the file to load.
1270 * @param[out] is_srgb           @c true if this is an sRGB image, @c false otherwise.
1271 * @param[out] img               The output image to populate.
1272 *
1273 * @return @c true on error, @c false otherwise.
1274 */
1275bool load_ktx_compressed_image(
1276	const char* filename,
1277	bool& is_srgb,
1278	astc_compressed_image& img
1279) {
1280	FILE *f = fopen(filename, "rb");
1281	if (!f)
1282	{
1283		printf("Failed to open file %s\n", filename);
1284		return true;
1285	}
1286
1287	ktx_header hdr;
1288	size_t actual = fread(&hdr, 1, sizeof(hdr), f);
1289	if (actual != sizeof(hdr))
1290	{
1291		printf("Failed to read header from %s\n", filename);
1292		fclose(f);
1293		return true;
1294	}
1295
1296	if (memcmp(hdr.magic, ktx_magic, 12) != 0 ||
1297	    (hdr.endianness != 0x04030201 && hdr.endianness != 0x01020304))
1298	{
1299		printf("File %s does not have a valid KTX header\n", filename);
1300		fclose(f);
1301		return true;
1302	}
1303
1304	bool switch_endianness = false;
1305	if (hdr.endianness == 0x01020304)
1306	{
1307		switch_endianness = true;
1308		ktx_header_switch_endianness(&hdr);
1309	}
1310
1311	if (hdr.gl_type != 0 || hdr.gl_format != 0 || hdr.gl_type_size != 1 ||
1312	    hdr.gl_base_internal_format != GL_RGBA)
1313	{
1314		printf("File %s is not a compressed ASTC file\n", filename);
1315		fclose(f);
1316		return true;
1317	}
1318
1319	const format_entry* fmt = get_format(hdr.gl_internal_format);
1320	if (!fmt)
1321	{
1322		printf("File %s is not a compressed ASTC file\n", filename);
1323		fclose(f);
1324		return true;
1325	}
1326
1327	// Skip over any key-value pairs
1328	int seekerr;
1329	seekerr = fseek(f, hdr.bytes_of_key_value_data, SEEK_CUR);
1330	if (seekerr)
1331	{
1332		printf("Failed to skip key-value pairs in %s\n", filename);
1333		fclose(f);
1334		return true;
1335	}
1336
1337	// Read the length of the data and endianess convert
1338	unsigned int data_len;
1339	actual = fread(&data_len, 1, sizeof(data_len), f);
1340	if (actual != sizeof(data_len))
1341	{
1342		printf("Failed to read mip 0 size from %s\n", filename);
1343		fclose(f);
1344		return true;
1345	}
1346
1347	if (switch_endianness)
1348	{
1349		data_len = u32_byterev(data_len);
1350	}
1351
1352	// Read the data
1353	unsigned char* data = new unsigned char[data_len];
1354	actual = fread(data, 1, data_len, f);
1355	if (actual != data_len)
1356	{
1357		printf("Failed to read mip 0 data from %s\n", filename);
1358		fclose(f);
1359		delete[] data;
1360		return true;
1361	}
1362
1363	img.block_x = fmt->x;
1364	img.block_y = fmt->y;
1365	img.block_z = fmt->z == 0 ? 1 : fmt->z;
1366
1367	img.dim_x = hdr.pixel_width;
1368	img.dim_y = hdr.pixel_height;
1369	img.dim_z = hdr.pixel_depth == 0 ? 1 : hdr.pixel_depth;
1370
1371	img.data_len = data_len;
1372	img.data = data;
1373
1374	is_srgb = fmt->is_srgb;
1375
1376	fclose(f);
1377	return false;
1378}
1379
1380/**
1381 * @brief Store a KTX compressed image using a local store routine.
1382 *
1383 * @param img        The image data to store.
1384 * @param filename   The name of the file to save.
1385 * @param is_srgb    @c true if this is an sRGB image, @c false if linear.
1386 *
1387 * @return @c true on error, @c false otherwise.
1388 */
1389bool store_ktx_compressed_image(
1390	const astc_compressed_image& img,
1391	const char* filename,
1392	bool is_srgb
1393) {
1394	unsigned int fmt = get_format(img.block_x, img.block_y, img.block_z, is_srgb);
1395
1396	ktx_header hdr;
1397	memcpy(hdr.magic, ktx_magic, 12);
1398	hdr.endianness = 0x04030201;
1399	hdr.gl_type = 0;
1400	hdr.gl_type_size = 1;
1401	hdr.gl_format = 0;
1402	hdr.gl_internal_format = fmt;
1403	hdr.gl_base_internal_format = GL_RGBA;
1404	hdr.pixel_width = img.dim_x;
1405	hdr.pixel_height = img.dim_y;
1406	hdr.pixel_depth = (img.dim_z == 1) ? 0 : img.dim_z;
1407	hdr.number_of_array_elements = 0;
1408	hdr.number_of_faces = 1;
1409	hdr.number_of_mipmap_levels = 1;
1410	hdr.bytes_of_key_value_data = 0;
1411
1412	size_t expected = sizeof(ktx_header) + 4 + img.data_len;
1413	size_t actual = 0;
1414
1415	FILE *wf = fopen(filename, "wb");
1416	if (!wf)
1417	{
1418		return true;
1419	}
1420
1421	actual += fwrite(&hdr, 1, sizeof(ktx_header), wf);
1422	actual += fwrite(&img.data_len, 1, 4, wf);
1423	actual += fwrite(img.data, 1, img.data_len, wf);
1424	fclose(wf);
1425
1426	if (actual != expected)
1427	{
1428		return true;
1429	}
1430
1431	return false;
1432}
1433
1434/**
1435 * @brief Save a KTX uncompressed image using a local store routine.
1436 *
1437 * @param img        The source data for the image.
1438 * @param filename   The name of the file to save.
1439 * @param y_flip     Should the image be vertically flipped?
1440 *
1441 * @return @c true if the image saved OK, @c false on error.
1442 */
1443static bool store_ktx_uncompressed_image(
1444	const astcenc_image* img,
1445	const char* filename,
1446	int y_flip
1447) {
1448	unsigned int dim_x = img->dim_x;
1449	unsigned int dim_y = img->dim_y;
1450	unsigned int dim_z = img->dim_z;
1451
1452	int bitness = img->data_type == ASTCENC_TYPE_U8 ? 8 : 16;
1453	int image_components = determine_image_components(img);
1454
1455	ktx_header hdr;
1456
1457	static const int gl_format_of_components[4] {
1458		GL_RED, GL_RG, GL_RGB, GL_RGBA
1459	};
1460
1461	static const int gl_sized_format_of_components_ldr[4] {
1462		GL_R8, GL_RG8, GL_RGB8, GL_RGBA8
1463	};
1464
1465	static const int gl_sized_format_of_components_hdr[4] {
1466		GL_R16F, GL_RG16F, GL_RGB16F, GL_RGBA16F
1467	};
1468
1469	memcpy(hdr.magic, ktx_magic, 12);
1470	hdr.endianness = 0x04030201;
1471	hdr.gl_type = (bitness == 16) ? GL_HALF_FLOAT : GL_UNSIGNED_BYTE;
1472	hdr.gl_type_size = bitness / 8;
1473	hdr.gl_format = gl_format_of_components[image_components - 1];
1474	if (bitness == 16)
1475	{
1476		hdr.gl_internal_format = gl_sized_format_of_components_hdr[image_components - 1];
1477	}
1478	else
1479	{
1480		hdr.gl_internal_format = gl_sized_format_of_components_ldr[image_components - 1];
1481	}
1482	hdr.gl_base_internal_format = hdr.gl_format;
1483	hdr.pixel_width = dim_x;
1484	hdr.pixel_height = dim_y;
1485	hdr.pixel_depth = (dim_z == 1) ? 0 : dim_z;
1486	hdr.number_of_array_elements = 0;
1487	hdr.number_of_faces = 1;
1488	hdr.number_of_mipmap_levels = 1;
1489	hdr.bytes_of_key_value_data = 0;
1490
1491	// Collect image data to write
1492	uint8_t ***row_pointers8 = nullptr;
1493	uint16_t ***row_pointers16 = nullptr;
1494	if (bitness == 8)
1495	{
1496		row_pointers8 = new uint8_t **[dim_z];
1497		row_pointers8[0] = new uint8_t *[dim_y * dim_z];
1498		row_pointers8[0][0] = new uint8_t[dim_x * dim_y * dim_z * image_components + 3];
1499
1500		for (unsigned int z = 1; z < dim_z; z++)
1501		{
1502			row_pointers8[z] = row_pointers8[0] + dim_y * z;
1503			row_pointers8[z][0] = row_pointers8[0][0] + dim_y * dim_x * image_components * z;
1504		}
1505
1506		for (unsigned int z = 0; z < dim_z; z++)
1507		{
1508			for (unsigned int y = 1; y < dim_y; y++)
1509			{
1510				row_pointers8[z][y] = row_pointers8[z][0] + dim_x * image_components * y;
1511			}
1512		}
1513
1514		for (unsigned int z = 0; z < dim_z; z++)
1515		{
1516			uint8_t* data8 = static_cast<uint8_t*>(img->data[z]);
1517			for (unsigned int y = 0; y < dim_y; y++)
1518			{
1519				int ym = y_flip ? dim_y - y - 1 : y;
1520				switch (image_components)
1521				{
1522				case 1:		// single-component, treated as Luminance
1523					for (unsigned int x = 0; x < dim_x; x++)
1524					{
1525						row_pointers8[z][y][x] = data8[(4 * dim_x * ym) + (4 * x    )];
1526					}
1527					break;
1528				case 2:		// two-component, treated as Luminance-Alpha
1529					for (unsigned int x = 0; x < dim_x; x++)
1530					{
1531						row_pointers8[z][y][2 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
1532						row_pointers8[z][y][2 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 3)];
1533					}
1534					break;
1535				case 3:		// three-component, treated a
1536					for (unsigned int x = 0; x < dim_x; x++)
1537					{
1538						row_pointers8[z][y][3 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
1539						row_pointers8[z][y][3 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
1540						row_pointers8[z][y][3 * x + 2] = data8[(4 * dim_x * ym) + (4 * x + 2)];
1541					}
1542					break;
1543				case 4:		// four-component, treated as RGBA
1544					for (unsigned int x = 0; x < dim_x; x++)
1545					{
1546						row_pointers8[z][y][4 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
1547						row_pointers8[z][y][4 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
1548						row_pointers8[z][y][4 * x + 2] = data8[(4 * dim_x * ym) + (4 * x + 2)];
1549						row_pointers8[z][y][4 * x + 3] = data8[(4 * dim_x * ym) + (4 * x + 3)];
1550					}
1551					break;
1552				}
1553			}
1554		}
1555	}
1556	else						// if bitness == 16
1557	{
1558		row_pointers16 = new uint16_t **[dim_z];
1559		row_pointers16[0] = new uint16_t *[dim_y * dim_z];
1560		row_pointers16[0][0] = new uint16_t[dim_x * dim_y * dim_z * image_components + 1];
1561
1562		for (unsigned int z = 1; z < dim_z; z++)
1563		{
1564			row_pointers16[z] = row_pointers16[0] + dim_y * z;
1565			row_pointers16[z][0] = row_pointers16[0][0] + dim_y * dim_x * image_components * z;
1566		}
1567
1568		for (unsigned int z = 0; z < dim_z; z++)
1569		{
1570			for (unsigned int y = 1; y < dim_y; y++)
1571			{
1572				row_pointers16[z][y] = row_pointers16[z][0] + dim_x * image_components * y;
1573			}
1574		}
1575
1576		for (unsigned int z = 0; z < dim_z; z++)
1577		{
1578			uint16_t* data16 = static_cast<uint16_t*>(img->data[z]);
1579			for (unsigned int y = 0; y < dim_y; y++)
1580			{
1581				int ym = y_flip ? dim_y - y - 1 : y;
1582				switch (image_components)
1583				{
1584				case 1:		// single-component, treated as Luminance
1585					for (unsigned int x = 0; x < dim_x; x++)
1586					{
1587						row_pointers16[z][y][x] = data16[(4 * dim_x * ym) + (4 * x    )];
1588					}
1589					break;
1590				case 2:		// two-component, treated as Luminance-Alpha
1591					for (unsigned int x = 0; x < dim_x; x++)
1592					{
1593						row_pointers16[z][y][2 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
1594						row_pointers16[z][y][2 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 3)];
1595					}
1596					break;
1597				case 3:		// three-component, treated as RGB
1598					for (unsigned int x = 0; x < dim_x; x++)
1599					{
1600						row_pointers16[z][y][3 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
1601						row_pointers16[z][y][3 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
1602						row_pointers16[z][y][3 * x + 2] = data16[(4 * dim_x * ym) + (4 * x + 2)];
1603					}
1604					break;
1605				case 4:		// four-component, treated as RGBA
1606					for (unsigned int x = 0; x < dim_x; x++)
1607					{
1608						row_pointers16[z][y][4 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
1609						row_pointers16[z][y][4 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
1610						row_pointers16[z][y][4 * x + 2] = data16[(4 * dim_x * ym) + (4 * x + 2)];
1611						row_pointers16[z][y][4 * x + 3] = data16[(4 * dim_x * ym) + (4 * x + 3)];
1612					}
1613					break;
1614				}
1615			}
1616		}
1617	}
1618
1619	bool retval { true };
1620	uint32_t image_bytes = dim_x * dim_y * dim_z * image_components * (bitness / 8);
1621	uint32_t image_write_bytes = (image_bytes + 3) & ~3;
1622
1623	FILE *wf = fopen(filename, "wb");
1624	if (wf)
1625	{
1626		void* dataptr = (bitness == 16) ?
1627			reinterpret_cast<void*>(row_pointers16[0][0]) :
1628			reinterpret_cast<void*>(row_pointers8[0][0]);
1629
1630		size_t expected_bytes_written = sizeof(ktx_header) + image_write_bytes + 4;
1631		size_t hdr_bytes_written = fwrite(&hdr, 1, sizeof(ktx_header), wf);
1632		size_t bytecount_bytes_written = fwrite(&image_bytes, 1, 4, wf);
1633		size_t data_bytes_written = fwrite(dataptr, 1, image_write_bytes, wf);
1634		fclose(wf);
1635		if (hdr_bytes_written + bytecount_bytes_written + data_bytes_written != expected_bytes_written)
1636		{
1637			retval = false;
1638		}
1639	}
1640	else
1641	{
1642		retval = false;
1643	}
1644
1645	if (row_pointers8)
1646	{
1647		delete[] row_pointers8[0][0];
1648		delete[] row_pointers8[0];
1649		delete[] row_pointers8;
1650	}
1651
1652	if (row_pointers16)
1653	{
1654		delete[] row_pointers16[0][0];
1655		delete[] row_pointers16[0];
1656		delete[] row_pointers16;
1657	}
1658
1659	return retval;
1660}
1661
1662/*
1663	Loader for DDS files.
1664
1665	Note that after the header, data are densely packed with no padding;
1666	in the case of multiple surfaces, they appear one after another in
1667	the file, again with no padding.
1668
1669	This code is NOT endian-neutral.
1670*/
1671struct dds_pixelformat
1672{
1673	uint32_t size;				// structure size, set to 32.
1674	/*
1675	   flags bits are a combination of the following: 0x1 : Texture contains alpha data 0x2 : ---- (older files: texture contains alpha data, for Alpha-only texture) 0x4 : The fourcc field is valid,
1676	   indicating a compressed or DX10 texture format 0x40 : texture contains uncompressed RGB data 0x200 : ---- (YUV in older files) 0x20000 : Texture contains Luminance data (can be combined with
1677	   0x1 for Lum-Alpha) */
1678	uint32_t flags;
1679	uint32_t fourcc;			// "DX10" to indicate a DX10 format, "DXTn" for the DXT formats
1680	uint32_t rgbbitcount;		// number of bits per texel; up to 32 for non-DX10 formats.
1681	uint32_t rbitmask;			// bitmap indicating position of red/luminance color component
1682	uint32_t gbitmask;			// bitmap indicating position of green color component
1683	uint32_t bbitmask;			// bitmap indicating position of blue color component
1684	uint32_t abitmask;			// bitmap indicating position of alpha color component
1685};
1686
1687struct dds_header
1688{
1689	uint32_t size;				// header size; must be exactly 124.
1690	/*
1691	   flag field is an OR or the following bits, that indicate fields containing valid data:
1692		1: caps/caps2/caps3/caps4 (set in all DDS files, ignore on read)
1693		2: height (set in all DDS files, ignore on read)
1694		4: width (set in all DDS files, ignore on read)
1695		8: pitch (for uncompressed texture)
1696		0x1000: the pixel format field (set in all DDS files, ignore on read)
1697		0x20000: mipmap count (for mipmapped textures with >1 level)
1698		0x80000: pitch (for compressed texture)
1699		0x800000: depth (for 3d textures)
1700	*/
1701	uint32_t flags;
1702	uint32_t height;
1703	uint32_t width;
1704	uint32_t pitch_or_linear_size;	// scanline pitch for uncompressed; total size in bytes for compressed
1705	uint32_t depth;
1706	uint32_t mipmapcount;
1707	// unused, set to 0
1708	uint32_t reserved1[11];
1709	dds_pixelformat ddspf;
1710	/*
1711	   caps field is an OR of the following values:
1712		8 : should be set for a file that contains more than 1 surface (ignore on read)
1713		0x400000 : should be set for a mipmapped texture
1714		0x1000 : should be set if the surface is a texture at all (all DDS files, ignore on read)
1715	*/
1716	uint32_t caps;
1717	/*
1718	   caps2 field is an OR of the following values:
1719		0x200 : texture is cubemap
1720		0x400 : +X face of cubemap is present
1721		0x800 : -X face of cubemap is present
1722		0x1000 : +Y face of cubemap is present
1723		0x2000 : -Y face of cubemap is present
1724		0x4000 : +Z face of cubemap is present
1725		0x8000 : -Z face of cubemap is present
1726		0x200000 : texture is a 3d texture.
1727	*/
1728	uint32_t caps2;
1729	// unused, set to 0
1730	uint32_t caps3;
1731	// unused, set to 0
1732	uint32_t caps4;
1733	// unused, set to 0
1734	uint32_t reserved2;
1735};
1736
1737struct dds_header_dx10
1738{
1739	uint32_t dxgi_format;
1740	uint32_t resource_dimension;	// 2=1d-texture, 3=2d-texture or cubemap, 4=3d-texture
1741	uint32_t misc_flag;			// 4 if cubemap, else 0
1742	uint32_t array_size;		// size of array in case of a texture array; set to 1 for a non-array
1743	uint32_t reserved;			// set to 0.
1744};
1745
1746#define DDS_MAGIC 0x20534444
1747#define DX10_MAGIC 0x30315844
1748
1749/**
1750 * @brief Load an uncompressed DDS image using the local custom loader.
1751 *
1752 * @param      filename          The name of the file to load.
1753 * @param      y_flip            Should the image be vertically flipped?
1754 * @param[out] is_hdr            Is this an HDR image load?
1755 * @param[out] component_count   The number of components in the data.
1756 *
1757 * @return The loaded image data in a canonical 4 channel format, or @c nullptr on error.
1758 */
1759static astcenc_image* load_dds_uncompressed_image(
1760	const char* filename,
1761	bool y_flip,
1762	bool& is_hdr,
1763	unsigned int& component_count
1764) {
1765	FILE *f = fopen(filename, "rb");
1766	if (!f)
1767	{
1768		printf("Failed to open file %s\n", filename);
1769		return nullptr;
1770	}
1771
1772	uint8_t magic[4];
1773
1774	dds_header hdr;
1775	size_t magic_bytes_read = fread(magic, 1, 4, f);
1776	size_t header_bytes_read = fread(&hdr, 1, sizeof(hdr), f);
1777	if (magic_bytes_read != 4 || header_bytes_read != sizeof(hdr))
1778	{
1779		printf("Failed to read header of DDS file %s\n", filename);
1780		fclose(f);
1781		return nullptr;
1782	}
1783
1784	uint32_t magicx = magic[0] | (magic[1] << 8) | (magic[2] << 16) | (magic[3] << 24);
1785
1786	if (magicx != DDS_MAGIC || hdr.size != 124)
1787	{
1788		printf("File %s does not have a valid DDS header\n", filename);
1789		fclose(f);
1790		return nullptr;
1791	}
1792
1793	int use_dx10_header = 0;
1794	if (hdr.ddspf.flags & 4)
1795	{
1796		if (hdr.ddspf.fourcc == DX10_MAGIC)
1797		{
1798			use_dx10_header = 1;
1799		}
1800		else
1801		{
1802			printf("DDS file %s is compressed, not supported\n", filename);
1803			fclose(f);
1804			return nullptr;
1805		}
1806	}
1807
1808	dds_header_dx10 dx10_header;
1809	if (use_dx10_header)
1810	{
1811		size_t dx10_header_bytes_read = fread(&dx10_header, 1, sizeof(dx10_header), f);
1812		if (dx10_header_bytes_read != sizeof(dx10_header))
1813		{
1814			printf("Failed to read header of DDS file %s\n", filename);
1815			fclose(f);
1816			return nullptr;
1817		}
1818	}
1819
1820	unsigned int dim_x = hdr.width;
1821	unsigned int dim_y = hdr.height;
1822	unsigned int dim_z = (hdr.flags & 0x800000) ? hdr.depth : 1;
1823
1824	// The bitcount that we will use internally in the codec
1825	int bitness = 0;
1826
1827	// The bytes per component in the DDS file itself
1828	int bytes_per_component = 0;
1829	int components = 0;
1830	scanline_transfer copy_method = R8_TO_RGBA8;
1831
1832	// figure out the format actually used in the DDS file.
1833	if (use_dx10_header)
1834	{
1835		// DX10 header present; use the DXGI format.
1836		#define DXGI_FORMAT_R32G32B32A32_FLOAT   2
1837		#define DXGI_FORMAT_R32G32B32_FLOAT      6
1838		#define DXGI_FORMAT_R16G16B16A16_FLOAT  10
1839		#define DXGI_FORMAT_R16G16B16A16_UNORM  11
1840		#define DXGI_FORMAT_R32G32_FLOAT        16
1841		#define DXGI_FORMAT_R8G8B8A8_UNORM      28
1842		#define DXGI_FORMAT_R16G16_FLOAT    34
1843		#define DXGI_FORMAT_R16G16_UNORM    35
1844		#define DXGI_FORMAT_R32_FLOAT       41
1845		#define DXGI_FORMAT_R8G8_UNORM      49
1846		#define DXGI_FORMAT_R16_FLOAT       54
1847		#define DXGI_FORMAT_R16_UNORM       56
1848		#define DXGI_FORMAT_R8_UNORM        61
1849		#define DXGI_FORMAT_B8G8R8A8_UNORM  86
1850		#define DXGI_FORMAT_B8G8R8X8_UNORM  87
1851
1852		struct dxgi_params
1853		{
1854			int bitness;
1855			int bytes_per_component;
1856			int components;
1857			scanline_transfer copy_method;
1858			uint32_t dxgi_format_number;
1859		};
1860
1861		static const dxgi_params format_params[] {
1862			{16, 4, 4, RGBA32F_TO_RGBA16F, DXGI_FORMAT_R32G32B32A32_FLOAT},
1863			{16, 4, 3, RGB32F_TO_RGBA16F, DXGI_FORMAT_R32G32B32_FLOAT},
1864			{16, 2, 4, RGBA16F_TO_RGBA16F, DXGI_FORMAT_R16G16B16A16_FLOAT},
1865			{16, 2, 4, RGBA16_TO_RGBA16F, DXGI_FORMAT_R16G16B16A16_UNORM},
1866			{16, 4, 2, RG32F_TO_RGBA16F, DXGI_FORMAT_R32G32_FLOAT},
1867			{8, 1, 4, RGBA8_TO_RGBA8, DXGI_FORMAT_R8G8B8A8_UNORM},
1868			{16, 2, 2, RG16F_TO_RGBA16F, DXGI_FORMAT_R16G16_FLOAT},
1869			{16, 2, 2, RG16_TO_RGBA16F, DXGI_FORMAT_R16G16_UNORM},
1870			{16, 4, 1, R32F_TO_RGBA16F, DXGI_FORMAT_R32_FLOAT},
1871			{8, 1, 2, RG8_TO_RGBA8, DXGI_FORMAT_R8G8_UNORM},
1872			{16, 2, 1, R16F_TO_RGBA16F, DXGI_FORMAT_R16_FLOAT},
1873			{16, 2, 1, R16_TO_RGBA16F, DXGI_FORMAT_R16_UNORM},
1874			{8, 1, 1, R8_TO_RGBA8, DXGI_FORMAT_R8_UNORM},
1875			{8, 1, 4, BGRA8_TO_RGBA8, DXGI_FORMAT_B8G8R8A8_UNORM},
1876			{8, 1, 4, BGRX8_TO_RGBA8, DXGI_FORMAT_B8G8R8X8_UNORM},
1877		};
1878
1879		int dxgi_modes_supported = sizeof(format_params) / sizeof(format_params[0]);
1880		int did_select_format = 0;
1881		for (int i = 0; i < dxgi_modes_supported; i++)
1882		{
1883			if (dx10_header.dxgi_format == format_params[i].dxgi_format_number)
1884			{
1885				bitness = format_params[i].bitness;
1886				bytes_per_component = format_params[i].bytes_per_component;
1887				components = format_params[i].components;
1888				copy_method = format_params[i].copy_method;
1889				did_select_format = 1;
1890				break;
1891			}
1892		}
1893
1894		if (!did_select_format)
1895		{
1896			printf("DDS file %s: DXGI format not supported by codec\n", filename);
1897			fclose(f);
1898			return nullptr;
1899		}
1900	}
1901	else
1902	{
1903		// No DX10 header present. Then try to match the bitcount and bitmask against
1904		// a set of prepared patterns.
1905		uint32_t flags = hdr.ddspf.flags;
1906		uint32_t bitcount = hdr.ddspf.rgbbitcount;
1907		uint32_t rmask = hdr.ddspf.rbitmask;
1908		uint32_t gmask = hdr.ddspf.gbitmask;
1909		uint32_t bmask = hdr.ddspf.bbitmask;
1910		uint32_t amask = hdr.ddspf.abitmask;
1911
1912		// RGBA-unorm8
1913		if ((flags & 0x41) == 0x41 && bitcount == 32 && rmask == 0xFF && gmask == 0xFF00 && bmask == 0xFF0000 && amask == 0xFF000000)
1914		{
1915			bytes_per_component = 1;
1916			components = 4;
1917			copy_method = RGBA8_TO_RGBA8;
1918		}
1919		// BGRA-unorm8
1920		else if ((flags & 0x41) == 0x41 && bitcount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF && amask == 0xFF000000)
1921		{
1922			bytes_per_component = 1;
1923			components = 4;
1924			copy_method = BGRA8_TO_RGBA8;
1925		}
1926		// RGBX-unorm8
1927		else if ((flags & 0x40) && bitcount == 32 && rmask == 0xFF && gmask == 0xFF00 && bmask == 0xFF0000)
1928		{
1929			bytes_per_component = 1;
1930			components = 4;
1931			copy_method = RGBX8_TO_RGBA8;
1932		}
1933		// BGRX-unorm8
1934		else if ((flags & 0x40) && bitcount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF)
1935		{
1936			bytes_per_component = 1;
1937			components = 4;
1938			copy_method = BGRX8_TO_RGBA8;
1939		}
1940		// RGB-unorm8
1941		else if ((flags & 0x40) && bitcount == 24 && rmask == 0xFF && gmask == 0xFF00 && bmask == 0xFF0000)
1942		{
1943			bytes_per_component = 1;
1944			components = 3;
1945			copy_method = RGB8_TO_RGBA8;
1946		}
1947		// BGR-unorm8
1948		else if ((flags & 0x40) && bitcount == 24 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF)
1949		{
1950			bytes_per_component = 1;
1951			components = 3;
1952			copy_method = BGR8_TO_RGBA8;
1953		}
1954		// RG-unorm16
1955		else if ((flags & 0x40) && bitcount == 16 && rmask == 0xFFFF && gmask == 0xFFFF0000)
1956		{
1957			bytes_per_component = 2;
1958			components = 2;
1959			copy_method = RG16_TO_RGBA16F;
1960		}
1961		// A8L8
1962		else if ((flags & 0x20001) == 0x20001 && bitcount == 16 && rmask == 0xFF && amask == 0xFF00)
1963		{
1964			bytes_per_component = 1;
1965			components = 2;
1966			copy_method = LA8_TO_RGBA8;
1967		}
1968		// L8
1969		else if ((flags & 0x20000) && bitcount == 8 && rmask == 0xFF)
1970		{
1971			bytes_per_component = 1;
1972			components = 1;
1973			copy_method = L8_TO_RGBA8;
1974		}
1975		// L16
1976		else if ((flags & 0x20000) && bitcount == 16 && rmask == 0xFFFF)
1977		{
1978			bytes_per_component = 2;
1979			components = 1;
1980			copy_method = L16_TO_RGBA16F;
1981		}
1982		else
1983		{
1984			printf("DDS file %s: Non-DXGI format not supported by codec\n", filename);
1985			fclose(f);
1986			return nullptr;
1987		}
1988
1989		bitness = bytes_per_component * 8;
1990	}
1991
1992	// then, load the actual file.
1993	uint32_t xstride = bytes_per_component * components * dim_x;
1994	uint32_t ystride = xstride * dim_y;
1995	uint32_t bytes_of_surface = ystride * dim_z;
1996
1997	uint8_t *buf = new uint8_t[bytes_of_surface];
1998	size_t bytes_read = fread(buf, 1, bytes_of_surface, f);
1999	fclose(f);
2000	if (bytes_read != bytes_of_surface)
2001	{
2002		delete[] buf;
2003		printf("Failed to read file %s\n", filename);
2004		return nullptr;
2005	}
2006
2007	// then transfer data from the surface to our own image-data-structure.
2008	astcenc_image *astc_img = alloc_image(bitness, dim_x, dim_y, dim_z);
2009
2010	for (unsigned int z = 0; z < dim_z; z++)
2011	{
2012		for (unsigned int y = 0; y < dim_y; y++)
2013		{
2014			unsigned int ymod = y_flip ? dim_y - y - 1 : y;
2015			unsigned int ydst = ymod;
2016			void* dst;
2017
2018			if (astc_img->data_type == ASTCENC_TYPE_U8)
2019			{
2020				uint8_t* data8 = static_cast<uint8_t*>(astc_img->data[z]);
2021				dst = static_cast<void*>(&data8[4 * dim_x * ydst]);
2022			}
2023			else // if (astc_img->data_type == ASTCENC_TYPE_F16)
2024			{
2025				assert(astc_img->data_type == ASTCENC_TYPE_F16);
2026				uint16_t* data16 = static_cast<uint16_t*>(astc_img->data[z]);
2027				dst = static_cast<void*>(&data16[4 * dim_x * ydst]);
2028			}
2029
2030			uint8_t *src = buf + (z * ystride) + (y * xstride);
2031			copy_scanline(dst, src, dim_x, copy_method);
2032		}
2033	}
2034
2035	delete[] buf;
2036	is_hdr = bitness >= 16;
2037	component_count = components;
2038	return astc_img;
2039}
2040
2041/**
2042 * @brief Save a DDS uncompressed image using a local store routine.
2043 *
2044 * @param img        The source data for the image.
2045 * @param filename   The name of the file to save.
2046 * @param y_flip     Should the image be vertically flipped?
2047 *
2048 * @return @c true if the image saved OK, @c false on error.
2049 */
2050static bool store_dds_uncompressed_image(
2051	const astcenc_image* img,
2052	const char* filename,
2053	int y_flip
2054) {
2055	unsigned int dim_x = img->dim_x;
2056	unsigned int dim_y = img->dim_y;
2057	unsigned int dim_z = img->dim_z;
2058
2059	int bitness = img->data_type == ASTCENC_TYPE_U8 ? 8 : 16;
2060	int image_components = (bitness == 16) ? 4 : determine_image_components(img);
2061
2062	// DDS-pixel-format structures to use when storing LDR image with 1,2,3 or 4 components.
2063	static const dds_pixelformat format_of_image_components[4] =
2064	{
2065		{32, 0x20000, 0, 8, 0xFF, 0, 0, 0},	// luminance
2066		{32, 0x20001, 0, 16, 0xFF, 0, 0, 0xFF00},	// L8A8
2067		{32, 0x40, 0, 24, 0xFF, 0xFF00, 0xFF0000, 0},	// RGB8
2068		{32, 0x41, 0, 32, 0xFF, 0xFF00, 0xFF0000, 0xFF000000}	// RGBA8
2069	};
2070
2071	// DDS-pixel-format structures to use when storing HDR image.
2072	static const dds_pixelformat dxt10_diverter =
2073	{
2074		32, 4, DX10_MAGIC, 0, 0, 0, 0, 0
2075	};
2076
2077	// Header handling; will write:
2078	// * DDS magic value
2079	// * DDS header
2080	// * DDS DX10 header, if the file is floating-point
2081	// * pixel data
2082
2083	// Main header data
2084	dds_header hdr;
2085	hdr.size = 124;
2086	hdr.flags = 0x100F | (dim_z > 1 ? 0x800000 : 0);
2087	hdr.height = dim_y;
2088	hdr.width = dim_x;
2089	hdr.pitch_or_linear_size = image_components * (bitness / 8) * dim_x;
2090	hdr.depth = dim_z;
2091	hdr.mipmapcount = 1;
2092	for (unsigned int i = 0; i < 11; i++)
2093	{
2094		hdr.reserved1[i] = 0;
2095	}
2096	hdr.caps = 0x1000;
2097	hdr.caps2 = (dim_z > 1) ? 0x200000 : 0;
2098	hdr.caps3 = 0;
2099	hdr.caps4 = 0;
2100
2101	// Pixel-format data
2102	if (bitness == 8)
2103	{
2104		hdr.ddspf = format_of_image_components[image_components - 1];
2105	}
2106	else
2107	{
2108		hdr.ddspf = dxt10_diverter;
2109	}
2110
2111	// DX10 data
2112	dds_header_dx10 dx10;
2113	dx10.dxgi_format = DXGI_FORMAT_R16G16B16A16_FLOAT;
2114	dx10.resource_dimension = (dim_z > 1) ? 4 : 3;
2115	dx10.misc_flag = 0;
2116	dx10.array_size = 1;
2117	dx10.reserved = 0;
2118
2119	// Collect image data to write
2120	uint8_t ***row_pointers8 = nullptr;
2121	uint16_t ***row_pointers16 = nullptr;
2122
2123	if (bitness == 8)
2124	{
2125		row_pointers8 = new uint8_t **[dim_z];
2126		row_pointers8[0] = new uint8_t *[dim_y * dim_z];
2127		row_pointers8[0][0] = new uint8_t[dim_x * dim_y * dim_z * image_components];
2128
2129		for (unsigned int z = 1; z < dim_z; z++)
2130		{
2131			row_pointers8[z] = row_pointers8[0] + dim_y * z;
2132			row_pointers8[z][0] = row_pointers8[0][0] + dim_y * dim_z * image_components * z;
2133		}
2134
2135		for (unsigned int z = 0; z < dim_z; z++)
2136		{
2137			for (unsigned int y = 1; y < dim_y; y++)
2138			{
2139				row_pointers8[z][y] = row_pointers8[z][0] + dim_x * image_components * y;
2140			}
2141		}
2142
2143		for (unsigned int z = 0; z < dim_z; z++)
2144		{
2145			uint8_t* data8 = static_cast<uint8_t*>(img->data[z]);
2146
2147			for (unsigned int y = 0; y < dim_y; y++)
2148			{
2149				int ym = y_flip ? dim_y - y - 1 : y;
2150				switch (image_components)
2151				{
2152				case 1:		// single-component, treated as Luminance
2153					for (unsigned int x = 0; x < dim_x; x++)
2154					{
2155						row_pointers8[z][y][x] = data8[(4 * dim_x * ym) + (4 * x    )];
2156					}
2157					break;
2158				case 2:		// two-component, treated as Luminance-Alpha
2159					for (unsigned int x = 0; x < dim_x; x++)
2160					{
2161						row_pointers8[z][y][2 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
2162						row_pointers8[z][y][2 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 3)];
2163					}
2164					break;
2165				case 3:		// three-component, treated as RGB
2166					for (unsigned int x = 0; x < dim_x; x++)
2167					{
2168						row_pointers8[z][y][3 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
2169						row_pointers8[z][y][3 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
2170						row_pointers8[z][y][3 * x + 2] = data8[(4 * dim_x * ym) + (4 * x + 2)];
2171					}
2172					break;
2173				case 4:		// four-component, treated as RGBA
2174					for (unsigned int x = 0; x < dim_x; x++)
2175					{
2176						row_pointers8[z][y][4 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
2177						row_pointers8[z][y][4 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
2178						row_pointers8[z][y][4 * x + 2] = data8[(4 * dim_x * ym) + (4 * x + 2)];
2179						row_pointers8[z][y][4 * x + 3] = data8[(4 * dim_x * ym) + (4 * x + 3)];
2180					}
2181					break;
2182				}
2183			}
2184		}
2185	}
2186	else						// if bitness == 16
2187	{
2188		row_pointers16 = new uint16_t **[dim_z];
2189		row_pointers16[0] = new uint16_t *[dim_y * dim_z];
2190		row_pointers16[0][0] = new uint16_t[dim_x * dim_y * dim_z * image_components];
2191
2192		for (unsigned int z = 1; z < dim_z; z++)
2193		{
2194			row_pointers16[z] = row_pointers16[0] + dim_y * z;
2195			row_pointers16[z][0] = row_pointers16[0][0] + dim_y * dim_x * image_components * z;
2196		}
2197
2198		for (unsigned int z = 0; z < dim_z; z++)
2199		{
2200			for (unsigned int y = 1; y < dim_y; y++)
2201			{
2202				row_pointers16[z][y] = row_pointers16[z][0] + dim_x * image_components * y;
2203			}
2204		}
2205
2206		for (unsigned int z = 0; z < dim_z; z++)
2207		{
2208			uint16_t* data16 = static_cast<uint16_t*>(img->data[z]);
2209
2210			for (unsigned int y = 0; y < dim_y; y++)
2211			{
2212				int ym = y_flip ? dim_y - y - 1: y;
2213				switch (image_components)
2214				{
2215				case 1:		// single-component, treated as Luminance
2216					for (unsigned int x = 0; x < dim_x; x++)
2217					{
2218						row_pointers16[z][y][x] = data16[(4 * dim_x * ym) + (4 * x    )];
2219					}
2220					break;
2221				case 2:		// two-component, treated as Luminance-Alpha
2222					for (unsigned int x = 0; x < dim_x; x++)
2223					{
2224						row_pointers16[z][y][2 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
2225						row_pointers16[z][y][2 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 3)];
2226					}
2227					break;
2228				case 3:		// three-component, treated as RGB
2229					for (unsigned int x = 0; x < dim_x; x++)
2230					{
2231						row_pointers16[z][y][3 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
2232						row_pointers16[z][y][3 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
2233						row_pointers16[z][y][3 * x + 2] = data16[(4 * dim_x * ym) + (4 * x + 2)];
2234					}
2235					break;
2236				case 4:		// four-component, treated as RGBA
2237					for (unsigned int x = 0; x < dim_x; x++)
2238					{
2239						row_pointers16[z][y][4 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
2240						row_pointers16[z][y][4 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
2241						row_pointers16[z][y][4 * x + 2] = data16[(4 * dim_x * ym) + (4 * x + 2)];
2242						row_pointers16[z][y][4 * x + 3] = data16[(4 * dim_x * ym) + (4 * x + 3)];
2243					}
2244					break;
2245				}
2246			}
2247		}
2248	}
2249
2250	bool retval { true };
2251	uint32_t image_bytes = dim_x * dim_y * dim_z * image_components * (bitness / 8);
2252
2253	uint32_t dds_magic = DDS_MAGIC;
2254
2255	FILE *wf = fopen(filename, "wb");
2256	if (wf)
2257	{
2258		void *dataptr = (bitness == 16) ?
2259			reinterpret_cast<void*>(row_pointers16[0][0]) :
2260			reinterpret_cast<void*>(row_pointers8[0][0]);
2261
2262		size_t expected_bytes_written = 4 + sizeof(dds_header) + (bitness > 8 ? sizeof(dds_header_dx10) : 0) + image_bytes;
2263
2264		size_t magic_bytes_written = fwrite(&dds_magic, 1, 4, wf);
2265		size_t hdr_bytes_written = fwrite(&hdr, 1, sizeof(dds_header), wf);
2266
2267		size_t dx10_bytes_written;
2268		if (bitness > 8)
2269		{
2270			dx10_bytes_written = fwrite(&dx10, 1, sizeof(dx10), wf);
2271		}
2272		else
2273		{
2274			dx10_bytes_written = 0;
2275		}
2276
2277		size_t data_bytes_written = fwrite(dataptr, 1, image_bytes, wf);
2278
2279		fclose(wf);
2280		if (magic_bytes_written + hdr_bytes_written + dx10_bytes_written + data_bytes_written != expected_bytes_written)
2281		{
2282			retval = false;
2283		}
2284	}
2285	else
2286	{
2287		retval = false;
2288	}
2289
2290	if (row_pointers8)
2291	{
2292		delete[] row_pointers8[0][0];
2293		delete[] row_pointers8[0];
2294		delete[] row_pointers8;
2295	}
2296
2297	if (row_pointers16)
2298	{
2299		delete[] row_pointers16[0][0];
2300		delete[] row_pointers16[0];
2301		delete[] row_pointers16;
2302	}
2303
2304	return retval;
2305}
2306
2307/**
2308 * @brief Supported uncompressed image load functions, and their associated file extensions.
2309 */
2310static const struct
2311{
2312	const char* ending1;
2313	const char* ending2;
2314	astcenc_image* (*loader_func)(const char*, bool, bool&, unsigned int&);
2315} loader_descs[] {
2316	// LDR formats
2317	{".png",   ".PNG",  load_png_with_wuffs},
2318	// HDR formats
2319	{".exr",   ".EXR",  load_image_with_tinyexr },
2320	// Container formats
2321	{".ktx",   ".KTX",  load_ktx_uncompressed_image },
2322	{".dds",   ".DDS",  load_dds_uncompressed_image },
2323	// Generic catch all; this one must be last in the list
2324	{ nullptr, nullptr, load_image_with_stb }
2325};
2326
2327static const int loader_descr_count = sizeof(loader_descs) / sizeof(loader_descs[0]);
2328
2329/**
2330 * @brief Supported uncompressed image store functions, and their associated file extensions.
2331 */
2332static const struct
2333{
2334	const char *ending1;
2335	const char *ending2;
2336	int enforced_bitness;
2337	bool (*storer_func)(const astcenc_image *output_image, const char *filename, int y_flip);
2338} storer_descs[] {
2339	// LDR formats
2340	{".bmp", ".BMP",  8, store_bmp_image_with_stb},
2341	{".png", ".PNG",  8, store_png_image_with_stb},
2342	{".tga", ".TGA",  8, store_tga_image_with_stb},
2343	// HDR formats
2344	{".exr", ".EXR", 16, store_exr_image_with_tinyexr},
2345	{".hdr", ".HDR", 16, store_hdr_image_with_stb},
2346	// Container formats
2347	{".dds", ".DDS",  0, store_dds_uncompressed_image},
2348	{".ktx", ".KTX",  0, store_ktx_uncompressed_image}
2349};
2350
2351static const int storer_descr_count = sizeof(storer_descs) / sizeof(storer_descs[0]);
2352
2353/* See header for documentation. */
2354int get_output_filename_enforced_bitness(
2355	const char* filename
2356) {
2357	const char *eptr = strrchr(filename, '.');
2358	if (!eptr)
2359	{
2360		return 0;
2361	}
2362
2363	for (int i = 0; i < storer_descr_count; i++)
2364	{
2365		if (strcmp(eptr, storer_descs[i].ending1) == 0
2366		 || strcmp(eptr, storer_descs[i].ending2) == 0)
2367		{
2368			return storer_descs[i].enforced_bitness;
2369		}
2370	}
2371
2372	return -1;
2373}
2374
2375/* See header for documentation. */
2376astcenc_image* load_ncimage(
2377	const char* filename,
2378	bool y_flip,
2379	bool& is_hdr,
2380	unsigned int& component_count
2381) {
2382	// Get the file extension
2383	const char* eptr = strrchr(filename, '.');
2384	if (!eptr)
2385	{
2386		eptr = filename;
2387	}
2388
2389	// Scan through descriptors until a matching loader is found
2390	for (unsigned int i = 0; i < loader_descr_count; i++)
2391	{
2392		if (loader_descs[i].ending1 == nullptr
2393			|| strcmp(eptr, loader_descs[i].ending1) == 0
2394			|| strcmp(eptr, loader_descs[i].ending2) == 0)
2395		{
2396			return loader_descs[i].loader_func(filename, y_flip, is_hdr, component_count);
2397		}
2398	}
2399
2400	// Should never reach here - stb_image provides a generic handler
2401	return nullptr;
2402}
2403
2404/* See header for documentation. */
2405bool store_ncimage(
2406	const astcenc_image* output_image,
2407	const char* filename,
2408	int y_flip
2409) {
2410	const char* eptr = strrchr(filename, '.');
2411	if (!eptr)
2412	{
2413		eptr = ".ktx"; // use KTX file format if we don't have an ending.
2414	}
2415
2416	for (int i = 0; i < storer_descr_count; i++)
2417	{
2418		if (strcmp(eptr, storer_descs[i].ending1) == 0
2419		 || strcmp(eptr, storer_descs[i].ending2) == 0)
2420		{
2421			return storer_descs[i].storer_func(output_image, filename, y_flip);
2422		}
2423	}
2424
2425	// Should never reach here - get_output_filename_enforced_bitness should
2426	// have acted as a preflight check
2427	return false;
2428}
2429
2430/* ============================================================================
2431	ASTC compressed file loading
2432============================================================================ */
2433struct astc_header
2434{
2435	uint8_t magic[4];
2436	uint8_t block_x;
2437	uint8_t block_y;
2438	uint8_t block_z;
2439	uint8_t dim_x[3];			// dims = dim[0] + (dim[1] << 8) + (dim[2] << 16)
2440	uint8_t dim_y[3];			// Sizes are given in texels;
2441	uint8_t dim_z[3];			// block count is inferred
2442};
2443
2444static const uint32_t ASTC_MAGIC_ID = 0x5CA1AB13;
2445
2446static unsigned int unpack_bytes(
2447	uint8_t a,
2448	uint8_t b,
2449	uint8_t c,
2450	uint8_t d
2451) {
2452	return (static_cast<unsigned int>(a)      ) +
2453	       (static_cast<unsigned int>(b) <<  8) +
2454	       (static_cast<unsigned int>(c) << 16) +
2455	       (static_cast<unsigned int>(d) << 24);
2456}
2457
2458/* See header for documentation. */
2459int load_cimage(
2460	const char* filename,
2461	astc_compressed_image& img
2462) {
2463	std::ifstream file(filename, std::ios::in | std::ios::binary);
2464	if (!file)
2465	{
2466		print_error("ERROR: File open failed '%s'\n", filename);
2467		return 1;
2468	}
2469
2470	astc_header hdr;
2471	file.read(reinterpret_cast<char*>(&hdr), sizeof(astc_header));
2472	if (file.fail())
2473	{
2474		print_error("ERROR: File read failed '%s'\n", filename);
2475		return 1;
2476	}
2477
2478	unsigned int magicval = unpack_bytes(hdr.magic[0], hdr.magic[1], hdr.magic[2], hdr.magic[3]);
2479	if (magicval != ASTC_MAGIC_ID)
2480	{
2481		print_error("ERROR: File not recognized '%s'\n", filename);
2482		return 1;
2483	}
2484
2485	// Ensure these are not zero to avoid div by zero
2486	unsigned int block_x = astc::max(static_cast<unsigned int>(hdr.block_x), 1u);
2487	unsigned int block_y = astc::max(static_cast<unsigned int>(hdr.block_y), 1u);
2488	unsigned int block_z = astc::max(static_cast<unsigned int>(hdr.block_z), 1u);
2489
2490	unsigned int dim_x = unpack_bytes(hdr.dim_x[0], hdr.dim_x[1], hdr.dim_x[2], 0);
2491	unsigned int dim_y = unpack_bytes(hdr.dim_y[0], hdr.dim_y[1], hdr.dim_y[2], 0);
2492	unsigned int dim_z = unpack_bytes(hdr.dim_z[0], hdr.dim_z[1], hdr.dim_z[2], 0);
2493
2494	if (dim_x == 0 || dim_y == 0 || dim_z == 0)
2495	{
2496		print_error("ERROR: Image header corrupt '%s'\n", filename);
2497		return 1;
2498	}
2499
2500	unsigned int xblocks = (dim_x + block_x - 1) / block_x;
2501	unsigned int yblocks = (dim_y + block_y - 1) / block_y;
2502	unsigned int zblocks = (dim_z + block_z - 1) / block_z;
2503
2504	size_t data_size = xblocks * yblocks * zblocks * 16;
2505	uint8_t *buffer = new uint8_t[data_size];
2506
2507	file.read(reinterpret_cast<char*>(buffer), data_size);
2508	if (file.fail())
2509	{
2510		print_error("ERROR: Image data size exceeded file size '%s'\n", filename);
2511		delete[] buffer;
2512		return 1;
2513	}
2514
2515	img.data = buffer;
2516	img.data_len = data_size;
2517	img.block_x = block_x;
2518	img.block_y = block_y;
2519	img.block_z = block_z;
2520	img.dim_x = dim_x;
2521	img.dim_y = dim_y;
2522	img.dim_z = dim_z;
2523	return 0;
2524}
2525
2526/* See header for documentation. */
2527int store_cimage(
2528	const astc_compressed_image& img,
2529	const char* filename
2530) {
2531	astc_header hdr;
2532	hdr.magic[0] =  ASTC_MAGIC_ID        & 0xFF;
2533	hdr.magic[1] = (ASTC_MAGIC_ID >>  8) & 0xFF;
2534	hdr.magic[2] = (ASTC_MAGIC_ID >> 16) & 0xFF;
2535	hdr.magic[3] = (ASTC_MAGIC_ID >> 24) & 0xFF;
2536
2537	hdr.block_x = static_cast<uint8_t>(img.block_x);
2538	hdr.block_y = static_cast<uint8_t>(img.block_y);
2539	hdr.block_z = static_cast<uint8_t>(img.block_z);
2540
2541	hdr.dim_x[0] =  img.dim_x        & 0xFF;
2542	hdr.dim_x[1] = (img.dim_x >>  8) & 0xFF;
2543	hdr.dim_x[2] = (img.dim_x >> 16) & 0xFF;
2544
2545	hdr.dim_y[0] =  img.dim_y       & 0xFF;
2546	hdr.dim_y[1] = (img.dim_y >>  8) & 0xFF;
2547	hdr.dim_y[2] = (img.dim_y >> 16) & 0xFF;
2548
2549	hdr.dim_z[0] =  img.dim_z        & 0xFF;
2550	hdr.dim_z[1] = (img.dim_z >>  8) & 0xFF;
2551	hdr.dim_z[2] = (img.dim_z >> 16) & 0xFF;
2552
2553	std::ofstream file(filename, std::ios::out | std::ios::binary);
2554	if (!file)
2555	{
2556		print_error("ERROR: File open failed '%s'\n", filename);
2557		return 1;
2558	}
2559
2560	file.write(reinterpret_cast<char*>(&hdr), sizeof(astc_header));
2561	file.write(reinterpret_cast<char*>(img.data), img.data_len);
2562	return 0;
2563}
2564