1// SPDX-License-Identifier: Apache-2.0
2// ----------------------------------------------------------------------------
3// Copyright 2011-2024 Arm Limited
4//
5// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6// use this file except in compliance with the License. You may obtain a copy
7// of the License at:
8//
9//     http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14// License for the specific language governing permissions and limitations
15// under the License.
16// ----------------------------------------------------------------------------
17
18/*
19 * This module implements a variety of mathematical data types and library
20 * functions used by the codec.
21 */
22
23#ifndef ASTC_MATHLIB_H_INCLUDED
24#define ASTC_MATHLIB_H_INCLUDED
25
26#include <cassert>
27#include <cstdint>
28#include <cmath>
29
30#ifndef ASTCENC_POPCNT
31  #if defined(__POPCNT__)
32    #define ASTCENC_POPCNT 1
33  #else
34    #define ASTCENC_POPCNT 0
35  #endif
36#endif
37
38#ifndef ASTCENC_F16C
39  #if defined(__F16C__)
40    #define ASTCENC_F16C 1
41  #else
42    #define ASTCENC_F16C 0
43  #endif
44#endif
45
46#ifndef ASTCENC_SSE
47  #if defined(__SSE4_2__)
48    #define ASTCENC_SSE 42
49  #elif defined(__SSE4_1__)
50    #define ASTCENC_SSE 41
51  #elif defined(__SSE2__)
52    #define ASTCENC_SSE 20
53  #else
54    #define ASTCENC_SSE 0
55  #endif
56#endif
57
58#ifndef ASTCENC_AVX
59  #if defined(__AVX2__)
60    #define ASTCENC_AVX 2
61  #elif defined(__AVX__)
62    #define ASTCENC_AVX 1
63  #else
64    #define ASTCENC_AVX 0
65  #endif
66#endif
67
68#ifndef ASTCENC_NEON
69  #if defined(__aarch64__)
70    #define ASTCENC_NEON 1
71  #else
72    #define ASTCENC_NEON 0
73  #endif
74#endif
75
76// Force vector-sized SIMD alignment
77#if ASTCENC_AVX
78  #define ASTCENC_VECALIGN 32
79#elif ASTCENC_SSE || ASTCENC_NEON
80  #define ASTCENC_VECALIGN 16
81// Use default alignment for non-SIMD builds
82#else
83  #define ASTCENC_VECALIGN 0
84#endif
85
86// C++11 states that alignas(0) should be ignored but GCC doesn't do
87// this on some versions, so workaround and avoid emitting alignas(0)
88#if ASTCENC_VECALIGN > 0
89	#define ASTCENC_ALIGNAS alignas(ASTCENC_VECALIGN)
90#else
91	#define ASTCENC_ALIGNAS
92#endif
93
94#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
95	#include <immintrin.h>
96#endif
97
98/* ============================================================================
99  Fast math library; note that many of the higher-order functions in this set
100  use approximations which are less accurate, but faster, than <cmath> standard
101  library equivalents.
102
103  Note: Many of these are not necessarily faster than simple C versions when
104  used on a single scalar value, but are included for testing purposes as most
105  have an option based on SSE intrinsics and therefore provide an obvious route
106  to future vectorization.
107============================================================================ */
108
109// Union for manipulation of float bit patterns
110typedef union
111{
112	uint32_t u;
113	int32_t s;
114	float f;
115} if32;
116
117// These are namespaced to avoid colliding with C standard library functions.
118namespace astc
119{
120
121static const float PI          = 3.14159265358979323846f;
122static const float PI_OVER_TWO = 1.57079632679489661923f;
123
124/**
125 * @brief SP float absolute value.
126 *
127 * @param v   The value to make absolute.
128 *
129 * @return The absolute value.
130 */
131static inline float fabs(float v)
132{
133	return std::fabs(v);
134}
135
136/**
137 * @brief Test if a float value is a nan.
138 *
139 * @param v    The value test.
140 *
141 * @return Zero is not a NaN, non-zero otherwise.
142 */
143static inline bool isnan(float v)
144{
145	return v != v;
146}
147
148/**
149 * @brief Return the minimum of two values.
150 *
151 * For floats, NaNs are turned into @c q.
152 *
153 * @param p   The first value to compare.
154 * @param q   The second value to compare.
155 *
156 * @return The smallest value.
157 */
158template<typename T>
159static inline T min(T p, T q)
160{
161	return p < q ? p : q;
162}
163
164/**
165 * @brief Return the minimum of three values.
166 *
167 * For floats, NaNs are turned into @c r.
168 *
169 * @param p   The first value to compare.
170 * @param q   The second value to compare.
171 * @param r   The third value to compare.
172 *
173 * @return The smallest value.
174 */
175template<typename T>
176static inline T min(T p, T q, T r)
177{
178	return min(min(p, q), r);
179}
180
181/**
182 * @brief Return the minimum of four values.
183 *
184 * For floats, NaNs are turned into @c s.
185 *
186 * @param p   The first value to compare.
187 * @param q   The second value to compare.
188 * @param r   The third value to compare.
189 * @param s   The fourth value to compare.
190 *
191 * @return The smallest value.
192 */
193template<typename T>
194static inline T min(T p, T q, T r, T s)
195{
196	return min(min(p, q), min(r, s));
197}
198
199/**
200 * @brief Return the maximum of two values.
201 *
202 * For floats, NaNs are turned into @c q.
203 *
204 * @param p   The first value to compare.
205 * @param q   The second value to compare.
206 *
207 * @return The largest value.
208 */
209template<typename T>
210static inline T max(T p, T q)
211{
212	return p > q ? p : q;
213}
214
215/**
216 * @brief Return the maximum of three values.
217 *
218 * For floats, NaNs are turned into @c r.
219 *
220 * @param p   The first value to compare.
221 * @param q   The second value to compare.
222 * @param r   The third value to compare.
223 *
224 * @return The largest value.
225 */
226template<typename T>
227static inline T max(T p, T q, T r)
228{
229	return max(max(p, q), r);
230}
231
232/**
233 * @brief Return the maximum of four values.
234 *
235 * For floats, NaNs are turned into @c s.
236 *
237 * @param p   The first value to compare.
238 * @param q   The second value to compare.
239 * @param r   The third value to compare.
240 * @param s   The fourth value to compare.
241 *
242 * @return The largest value.
243 */
244template<typename T>
245static inline T max(T p, T q, T r, T s)
246{
247	return max(max(p, q), max(r, s));
248}
249
250/**
251 * @brief Clamp a value value between @c mn and @c mx.
252 *
253 * For floats, NaNs are turned into @c mn.
254 *
255 * @param v      The value to clamp.
256 * @param mn     The min value (inclusive).
257 * @param mx     The max value (inclusive).
258 *
259 * @return The clamped value.
260 */
261template<typename T>
262inline T clamp(T v, T mn, T mx)
263{
264	// Do not reorder; correct NaN handling relies on the fact that comparison
265	// with NaN returns false and will fall-though to the "min" value.
266	if (v > mx) return mx;
267	if (v > mn) return v;
268	return mn;
269}
270
271/**
272 * @brief Clamp a float value between 0.0f and 1.0f.
273 *
274 * NaNs are turned into 0.0f.
275 *
276 * @param v   The value to clamp.
277 *
278 * @return The clamped value.
279 */
280static inline float clamp1f(float v)
281{
282	return astc::clamp(v, 0.0f, 1.0f);
283}
284
285/**
286 * @brief Clamp a float value between 0.0f and 255.0f.
287 *
288 * NaNs are turned into 0.0f.
289 *
290 * @param v  The value to clamp.
291 *
292 * @return The clamped value.
293 */
294static inline float clamp255f(float v)
295{
296	return astc::clamp(v, 0.0f, 255.0f);
297}
298
299/**
300 * @brief SP float round-down.
301 *
302 * @param v   The value to round.
303 *
304 * @return The rounded value.
305 */
306static inline float flt_rd(float v)
307{
308	return std::floor(v);
309}
310
311/**
312 * @brief SP float round-to-nearest and convert to integer.
313 *
314 * @param v   The value to round.
315 *
316 * @return The rounded value.
317 */
318static inline int flt2int_rtn(float v)
319{
320
321	return static_cast<int>(v + 0.5f);
322}
323
324/**
325 * @brief SP float round down and convert to integer.
326 *
327 * @param v   The value to round.
328 *
329 * @return The rounded value.
330 */
331static inline int flt2int_rd(float v)
332{
333	return static_cast<int>(v);
334}
335
336/**
337 * @brief SP float bit-interpreted as an integer.
338 *
339 * @param v   The value to bitcast.
340 *
341 * @return The converted value.
342 */
343static inline int float_as_int(float v)
344{
345	union { int a; float b; } u;
346	u.b = v;
347	return u.a;
348}
349
350/**
351 * @brief Integer bit-interpreted as an SP float.
352 *
353 * @param v   The value to bitcast.
354 *
355 * @return The converted value.
356 */
357static inline float int_as_float(int v)
358{
359	union { int a; float b; } u;
360	u.a = v;
361	return u.b;
362}
363
364/**
365 * @brief Fast approximation of 1.0 / sqrt(val).
366 *
367 * @param v   The input value.
368 *
369 * @return The approximated result.
370 */
371static inline float rsqrt(float v)
372{
373	return 1.0f / std::sqrt(v);
374}
375
376/**
377 * @brief Fast approximation of sqrt(val).
378 *
379 * @param v   The input value.
380 *
381 * @return The approximated result.
382 */
383static inline float sqrt(float v)
384{
385	return std::sqrt(v);
386}
387
388/**
389 * @brief Extract mantissa and exponent of a float value.
390 *
391 * @param      v      The input value.
392 * @param[out] expo   The output exponent.
393 *
394 * @return The mantissa.
395 */
396static inline float frexp(float v, int* expo)
397{
398	if32 p;
399	p.f = v;
400	*expo = ((p.u >> 23) & 0xFF) - 126;
401	p.u = (p.u & 0x807fffff) | 0x3f000000;
402	return p.f;
403}
404
405/**
406 * @brief Initialize the seed structure for a random number generator.
407 *
408 * Important note: For the purposes of ASTC we want sets of random numbers to
409 * use the codec, but we want the same seed value across instances and threads
410 * to ensure that image output is stable across compressor runs and across
411 * platforms. Every PRNG created by this call will therefore return the same
412 * sequence of values ...
413 *
414 * @param state The state structure to initialize.
415 */
416void rand_init(uint64_t state[2]);
417
418/**
419 * @brief Return the next random number from the generator.
420 *
421 * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
422 * public-domain implementation given by David Blackman & Sebastiano Vigna at
423 * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
424 *
425 * @param state The state structure to use/update.
426 */
427uint64_t rand(uint64_t state[2]);
428
429}
430
431/* ============================================================================
432  Softfloat library with fp32 and fp16 conversion functionality.
433============================================================================ */
434#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
435	/* narrowing float->float conversions */
436	uint16_t float_to_sf16(float val);
437	float sf16_to_float(uint16_t val);
438#endif
439
440/*********************************
441  Vector library
442*********************************/
443#include "astcenc_vecmathlib.h"
444
445/*********************************
446  Declaration of line types
447*********************************/
448// parametric line, 2D: The line is given by line = a + b * t.
449
450struct line2
451{
452	vfloat4 a;
453	vfloat4 b;
454};
455
456// parametric line, 3D
457struct line3
458{
459	vfloat4 a;
460	vfloat4 b;
461};
462
463struct line4
464{
465	vfloat4 a;
466	vfloat4 b;
467};
468
469
470struct processed_line2
471{
472	vfloat4 amod;
473	vfloat4 bs;
474};
475
476struct processed_line3
477{
478	vfloat4 amod;
479	vfloat4 bs;
480};
481
482struct processed_line4
483{
484	vfloat4 amod;
485	vfloat4 bs;
486};
487
488#endif
489