1cc1dc7a3Sopenharmony_ci// SPDX-License-Identifier: Apache-2.0
2cc1dc7a3Sopenharmony_ci// ----------------------------------------------------------------------------
3cc1dc7a3Sopenharmony_ci// Copyright 2011-2024 Arm Limited
4cc1dc7a3Sopenharmony_ci//
5cc1dc7a3Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6cc1dc7a3Sopenharmony_ci// use this file except in compliance with the License. You may obtain a copy
7cc1dc7a3Sopenharmony_ci// of the License at:
8cc1dc7a3Sopenharmony_ci//
9cc1dc7a3Sopenharmony_ci//     http://www.apache.org/licenses/LICENSE-2.0
10cc1dc7a3Sopenharmony_ci//
11cc1dc7a3Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software
12cc1dc7a3Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13cc1dc7a3Sopenharmony_ci// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14cc1dc7a3Sopenharmony_ci// License for the specific language governing permissions and limitations
15cc1dc7a3Sopenharmony_ci// under the License.
16cc1dc7a3Sopenharmony_ci// ----------------------------------------------------------------------------
17cc1dc7a3Sopenharmony_ci
18cc1dc7a3Sopenharmony_ci/*
19cc1dc7a3Sopenharmony_ci * This module implements a variety of mathematical data types and library
20cc1dc7a3Sopenharmony_ci * functions used by the codec.
21cc1dc7a3Sopenharmony_ci */
22cc1dc7a3Sopenharmony_ci
23cc1dc7a3Sopenharmony_ci#ifndef ASTC_MATHLIB_H_INCLUDED
24cc1dc7a3Sopenharmony_ci#define ASTC_MATHLIB_H_INCLUDED
25cc1dc7a3Sopenharmony_ci
26cc1dc7a3Sopenharmony_ci#include <cassert>
27cc1dc7a3Sopenharmony_ci#include <cstdint>
28cc1dc7a3Sopenharmony_ci#include <cmath>
29cc1dc7a3Sopenharmony_ci
30cc1dc7a3Sopenharmony_ci#ifndef ASTCENC_POPCNT
31cc1dc7a3Sopenharmony_ci  #if defined(__POPCNT__)
32cc1dc7a3Sopenharmony_ci    #define ASTCENC_POPCNT 1
33cc1dc7a3Sopenharmony_ci  #else
34cc1dc7a3Sopenharmony_ci    #define ASTCENC_POPCNT 0
35cc1dc7a3Sopenharmony_ci  #endif
36cc1dc7a3Sopenharmony_ci#endif
37cc1dc7a3Sopenharmony_ci
38cc1dc7a3Sopenharmony_ci#ifndef ASTCENC_F16C
39cc1dc7a3Sopenharmony_ci  #if defined(__F16C__)
40cc1dc7a3Sopenharmony_ci    #define ASTCENC_F16C 1
41cc1dc7a3Sopenharmony_ci  #else
42cc1dc7a3Sopenharmony_ci    #define ASTCENC_F16C 0
43cc1dc7a3Sopenharmony_ci  #endif
44cc1dc7a3Sopenharmony_ci#endif
45cc1dc7a3Sopenharmony_ci
46cc1dc7a3Sopenharmony_ci#ifndef ASTCENC_SSE
47cc1dc7a3Sopenharmony_ci  #if defined(__SSE4_2__)
48cc1dc7a3Sopenharmony_ci    #define ASTCENC_SSE 42
49cc1dc7a3Sopenharmony_ci  #elif defined(__SSE4_1__)
50cc1dc7a3Sopenharmony_ci    #define ASTCENC_SSE 41
51cc1dc7a3Sopenharmony_ci  #elif defined(__SSE2__)
52cc1dc7a3Sopenharmony_ci    #define ASTCENC_SSE 20
53cc1dc7a3Sopenharmony_ci  #else
54cc1dc7a3Sopenharmony_ci    #define ASTCENC_SSE 0
55cc1dc7a3Sopenharmony_ci  #endif
56cc1dc7a3Sopenharmony_ci#endif
57cc1dc7a3Sopenharmony_ci
58cc1dc7a3Sopenharmony_ci#ifndef ASTCENC_AVX
59cc1dc7a3Sopenharmony_ci  #if defined(__AVX2__)
60cc1dc7a3Sopenharmony_ci    #define ASTCENC_AVX 2
61cc1dc7a3Sopenharmony_ci  #elif defined(__AVX__)
62cc1dc7a3Sopenharmony_ci    #define ASTCENC_AVX 1
63cc1dc7a3Sopenharmony_ci  #else
64cc1dc7a3Sopenharmony_ci    #define ASTCENC_AVX 0
65cc1dc7a3Sopenharmony_ci  #endif
66cc1dc7a3Sopenharmony_ci#endif
67cc1dc7a3Sopenharmony_ci
68cc1dc7a3Sopenharmony_ci#ifndef ASTCENC_NEON
69cc1dc7a3Sopenharmony_ci  #if defined(__aarch64__)
70cc1dc7a3Sopenharmony_ci    #define ASTCENC_NEON 1
71cc1dc7a3Sopenharmony_ci  #else
72cc1dc7a3Sopenharmony_ci    #define ASTCENC_NEON 0
73cc1dc7a3Sopenharmony_ci  #endif
74cc1dc7a3Sopenharmony_ci#endif
75cc1dc7a3Sopenharmony_ci
76cc1dc7a3Sopenharmony_ci// Force vector-sized SIMD alignment
77cc1dc7a3Sopenharmony_ci#if ASTCENC_AVX
78cc1dc7a3Sopenharmony_ci  #define ASTCENC_VECALIGN 32
79cc1dc7a3Sopenharmony_ci#elif ASTCENC_SSE || ASTCENC_NEON
80cc1dc7a3Sopenharmony_ci  #define ASTCENC_VECALIGN 16
81cc1dc7a3Sopenharmony_ci// Use default alignment for non-SIMD builds
82cc1dc7a3Sopenharmony_ci#else
83cc1dc7a3Sopenharmony_ci  #define ASTCENC_VECALIGN 0
84cc1dc7a3Sopenharmony_ci#endif
85cc1dc7a3Sopenharmony_ci
86cc1dc7a3Sopenharmony_ci// C++11 states that alignas(0) should be ignored but GCC doesn't do
87cc1dc7a3Sopenharmony_ci// this on some versions, so workaround and avoid emitting alignas(0)
88cc1dc7a3Sopenharmony_ci#if ASTCENC_VECALIGN > 0
89cc1dc7a3Sopenharmony_ci	#define ASTCENC_ALIGNAS alignas(ASTCENC_VECALIGN)
90cc1dc7a3Sopenharmony_ci#else
91cc1dc7a3Sopenharmony_ci	#define ASTCENC_ALIGNAS
92cc1dc7a3Sopenharmony_ci#endif
93cc1dc7a3Sopenharmony_ci
94cc1dc7a3Sopenharmony_ci#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
95cc1dc7a3Sopenharmony_ci	#include <immintrin.h>
96cc1dc7a3Sopenharmony_ci#endif
97cc1dc7a3Sopenharmony_ci
98cc1dc7a3Sopenharmony_ci/* ============================================================================
99cc1dc7a3Sopenharmony_ci  Fast math library; note that many of the higher-order functions in this set
100cc1dc7a3Sopenharmony_ci  use approximations which are less accurate, but faster, than <cmath> standard
101cc1dc7a3Sopenharmony_ci  library equivalents.
102cc1dc7a3Sopenharmony_ci
103cc1dc7a3Sopenharmony_ci  Note: Many of these are not necessarily faster than simple C versions when
104cc1dc7a3Sopenharmony_ci  used on a single scalar value, but are included for testing purposes as most
105cc1dc7a3Sopenharmony_ci  have an option based on SSE intrinsics and therefore provide an obvious route
106cc1dc7a3Sopenharmony_ci  to future vectorization.
107cc1dc7a3Sopenharmony_ci============================================================================ */
108cc1dc7a3Sopenharmony_ci
109cc1dc7a3Sopenharmony_ci// Union for manipulation of float bit patterns
110cc1dc7a3Sopenharmony_citypedef union
111cc1dc7a3Sopenharmony_ci{
112cc1dc7a3Sopenharmony_ci	uint32_t u;
113cc1dc7a3Sopenharmony_ci	int32_t s;
114cc1dc7a3Sopenharmony_ci	float f;
115cc1dc7a3Sopenharmony_ci} if32;
116cc1dc7a3Sopenharmony_ci
117cc1dc7a3Sopenharmony_ci// These are namespaced to avoid colliding with C standard library functions.
118cc1dc7a3Sopenharmony_cinamespace astc
119cc1dc7a3Sopenharmony_ci{
120cc1dc7a3Sopenharmony_ci
121cc1dc7a3Sopenharmony_cistatic const float PI          = 3.14159265358979323846f;
122cc1dc7a3Sopenharmony_cistatic const float PI_OVER_TWO = 1.57079632679489661923f;
123cc1dc7a3Sopenharmony_ci
124cc1dc7a3Sopenharmony_ci/**
125cc1dc7a3Sopenharmony_ci * @brief SP float absolute value.
126cc1dc7a3Sopenharmony_ci *
127cc1dc7a3Sopenharmony_ci * @param v   The value to make absolute.
128cc1dc7a3Sopenharmony_ci *
129cc1dc7a3Sopenharmony_ci * @return The absolute value.
130cc1dc7a3Sopenharmony_ci */
131cc1dc7a3Sopenharmony_cistatic inline float fabs(float v)
132cc1dc7a3Sopenharmony_ci{
133cc1dc7a3Sopenharmony_ci	return std::fabs(v);
134cc1dc7a3Sopenharmony_ci}
135cc1dc7a3Sopenharmony_ci
136cc1dc7a3Sopenharmony_ci/**
137cc1dc7a3Sopenharmony_ci * @brief Test if a float value is a nan.
138cc1dc7a3Sopenharmony_ci *
139cc1dc7a3Sopenharmony_ci * @param v    The value test.
140cc1dc7a3Sopenharmony_ci *
141cc1dc7a3Sopenharmony_ci * @return Zero is not a NaN, non-zero otherwise.
142cc1dc7a3Sopenharmony_ci */
143cc1dc7a3Sopenharmony_cistatic inline bool isnan(float v)
144cc1dc7a3Sopenharmony_ci{
145cc1dc7a3Sopenharmony_ci	return v != v;
146cc1dc7a3Sopenharmony_ci}
147cc1dc7a3Sopenharmony_ci
148cc1dc7a3Sopenharmony_ci/**
149cc1dc7a3Sopenharmony_ci * @brief Return the minimum of two values.
150cc1dc7a3Sopenharmony_ci *
151cc1dc7a3Sopenharmony_ci * For floats, NaNs are turned into @c q.
152cc1dc7a3Sopenharmony_ci *
153cc1dc7a3Sopenharmony_ci * @param p   The first value to compare.
154cc1dc7a3Sopenharmony_ci * @param q   The second value to compare.
155cc1dc7a3Sopenharmony_ci *
156cc1dc7a3Sopenharmony_ci * @return The smallest value.
157cc1dc7a3Sopenharmony_ci */
158cc1dc7a3Sopenharmony_citemplate<typename T>
159cc1dc7a3Sopenharmony_cistatic inline T min(T p, T q)
160cc1dc7a3Sopenharmony_ci{
161cc1dc7a3Sopenharmony_ci	return p < q ? p : q;
162cc1dc7a3Sopenharmony_ci}
163cc1dc7a3Sopenharmony_ci
164cc1dc7a3Sopenharmony_ci/**
165cc1dc7a3Sopenharmony_ci * @brief Return the minimum of three values.
166cc1dc7a3Sopenharmony_ci *
167cc1dc7a3Sopenharmony_ci * For floats, NaNs are turned into @c r.
168cc1dc7a3Sopenharmony_ci *
169cc1dc7a3Sopenharmony_ci * @param p   The first value to compare.
170cc1dc7a3Sopenharmony_ci * @param q   The second value to compare.
171cc1dc7a3Sopenharmony_ci * @param r   The third value to compare.
172cc1dc7a3Sopenharmony_ci *
173cc1dc7a3Sopenharmony_ci * @return The smallest value.
174cc1dc7a3Sopenharmony_ci */
175cc1dc7a3Sopenharmony_citemplate<typename T>
176cc1dc7a3Sopenharmony_cistatic inline T min(T p, T q, T r)
177cc1dc7a3Sopenharmony_ci{
178cc1dc7a3Sopenharmony_ci	return min(min(p, q), r);
179cc1dc7a3Sopenharmony_ci}
180cc1dc7a3Sopenharmony_ci
181cc1dc7a3Sopenharmony_ci/**
182cc1dc7a3Sopenharmony_ci * @brief Return the minimum of four values.
183cc1dc7a3Sopenharmony_ci *
184cc1dc7a3Sopenharmony_ci * For floats, NaNs are turned into @c s.
185cc1dc7a3Sopenharmony_ci *
186cc1dc7a3Sopenharmony_ci * @param p   The first value to compare.
187cc1dc7a3Sopenharmony_ci * @param q   The second value to compare.
188cc1dc7a3Sopenharmony_ci * @param r   The third value to compare.
189cc1dc7a3Sopenharmony_ci * @param s   The fourth value to compare.
190cc1dc7a3Sopenharmony_ci *
191cc1dc7a3Sopenharmony_ci * @return The smallest value.
192cc1dc7a3Sopenharmony_ci */
193cc1dc7a3Sopenharmony_citemplate<typename T>
194cc1dc7a3Sopenharmony_cistatic inline T min(T p, T q, T r, T s)
195cc1dc7a3Sopenharmony_ci{
196cc1dc7a3Sopenharmony_ci	return min(min(p, q), min(r, s));
197cc1dc7a3Sopenharmony_ci}
198cc1dc7a3Sopenharmony_ci
199cc1dc7a3Sopenharmony_ci/**
200cc1dc7a3Sopenharmony_ci * @brief Return the maximum of two values.
201cc1dc7a3Sopenharmony_ci *
202cc1dc7a3Sopenharmony_ci * For floats, NaNs are turned into @c q.
203cc1dc7a3Sopenharmony_ci *
204cc1dc7a3Sopenharmony_ci * @param p   The first value to compare.
205cc1dc7a3Sopenharmony_ci * @param q   The second value to compare.
206cc1dc7a3Sopenharmony_ci *
207cc1dc7a3Sopenharmony_ci * @return The largest value.
208cc1dc7a3Sopenharmony_ci */
209cc1dc7a3Sopenharmony_citemplate<typename T>
210cc1dc7a3Sopenharmony_cistatic inline T max(T p, T q)
211cc1dc7a3Sopenharmony_ci{
212cc1dc7a3Sopenharmony_ci	return p > q ? p : q;
213cc1dc7a3Sopenharmony_ci}
214cc1dc7a3Sopenharmony_ci
215cc1dc7a3Sopenharmony_ci/**
216cc1dc7a3Sopenharmony_ci * @brief Return the maximum of three values.
217cc1dc7a3Sopenharmony_ci *
218cc1dc7a3Sopenharmony_ci * For floats, NaNs are turned into @c r.
219cc1dc7a3Sopenharmony_ci *
220cc1dc7a3Sopenharmony_ci * @param p   The first value to compare.
221cc1dc7a3Sopenharmony_ci * @param q   The second value to compare.
222cc1dc7a3Sopenharmony_ci * @param r   The third value to compare.
223cc1dc7a3Sopenharmony_ci *
224cc1dc7a3Sopenharmony_ci * @return The largest value.
225cc1dc7a3Sopenharmony_ci */
226cc1dc7a3Sopenharmony_citemplate<typename T>
227cc1dc7a3Sopenharmony_cistatic inline T max(T p, T q, T r)
228cc1dc7a3Sopenharmony_ci{
229cc1dc7a3Sopenharmony_ci	return max(max(p, q), r);
230cc1dc7a3Sopenharmony_ci}
231cc1dc7a3Sopenharmony_ci
232cc1dc7a3Sopenharmony_ci/**
233cc1dc7a3Sopenharmony_ci * @brief Return the maximum of four values.
234cc1dc7a3Sopenharmony_ci *
235cc1dc7a3Sopenharmony_ci * For floats, NaNs are turned into @c s.
236cc1dc7a3Sopenharmony_ci *
237cc1dc7a3Sopenharmony_ci * @param p   The first value to compare.
238cc1dc7a3Sopenharmony_ci * @param q   The second value to compare.
239cc1dc7a3Sopenharmony_ci * @param r   The third value to compare.
240cc1dc7a3Sopenharmony_ci * @param s   The fourth value to compare.
241cc1dc7a3Sopenharmony_ci *
242cc1dc7a3Sopenharmony_ci * @return The largest value.
243cc1dc7a3Sopenharmony_ci */
244cc1dc7a3Sopenharmony_citemplate<typename T>
245cc1dc7a3Sopenharmony_cistatic inline T max(T p, T q, T r, T s)
246cc1dc7a3Sopenharmony_ci{
247cc1dc7a3Sopenharmony_ci	return max(max(p, q), max(r, s));
248cc1dc7a3Sopenharmony_ci}
249cc1dc7a3Sopenharmony_ci
250cc1dc7a3Sopenharmony_ci/**
251cc1dc7a3Sopenharmony_ci * @brief Clamp a value value between @c mn and @c mx.
252cc1dc7a3Sopenharmony_ci *
253cc1dc7a3Sopenharmony_ci * For floats, NaNs are turned into @c mn.
254cc1dc7a3Sopenharmony_ci *
255cc1dc7a3Sopenharmony_ci * @param v      The value to clamp.
256cc1dc7a3Sopenharmony_ci * @param mn     The min value (inclusive).
257cc1dc7a3Sopenharmony_ci * @param mx     The max value (inclusive).
258cc1dc7a3Sopenharmony_ci *
259cc1dc7a3Sopenharmony_ci * @return The clamped value.
260cc1dc7a3Sopenharmony_ci */
261cc1dc7a3Sopenharmony_citemplate<typename T>
262cc1dc7a3Sopenharmony_ciinline T clamp(T v, T mn, T mx)
263cc1dc7a3Sopenharmony_ci{
264cc1dc7a3Sopenharmony_ci	// Do not reorder; correct NaN handling relies on the fact that comparison
265cc1dc7a3Sopenharmony_ci	// with NaN returns false and will fall-though to the "min" value.
266cc1dc7a3Sopenharmony_ci	if (v > mx) return mx;
267cc1dc7a3Sopenharmony_ci	if (v > mn) return v;
268cc1dc7a3Sopenharmony_ci	return mn;
269cc1dc7a3Sopenharmony_ci}
270cc1dc7a3Sopenharmony_ci
271cc1dc7a3Sopenharmony_ci/**
272cc1dc7a3Sopenharmony_ci * @brief Clamp a float value between 0.0f and 1.0f.
273cc1dc7a3Sopenharmony_ci *
274cc1dc7a3Sopenharmony_ci * NaNs are turned into 0.0f.
275cc1dc7a3Sopenharmony_ci *
276cc1dc7a3Sopenharmony_ci * @param v   The value to clamp.
277cc1dc7a3Sopenharmony_ci *
278cc1dc7a3Sopenharmony_ci * @return The clamped value.
279cc1dc7a3Sopenharmony_ci */
280cc1dc7a3Sopenharmony_cistatic inline float clamp1f(float v)
281cc1dc7a3Sopenharmony_ci{
282cc1dc7a3Sopenharmony_ci	return astc::clamp(v, 0.0f, 1.0f);
283cc1dc7a3Sopenharmony_ci}
284cc1dc7a3Sopenharmony_ci
285cc1dc7a3Sopenharmony_ci/**
286cc1dc7a3Sopenharmony_ci * @brief Clamp a float value between 0.0f and 255.0f.
287cc1dc7a3Sopenharmony_ci *
288cc1dc7a3Sopenharmony_ci * NaNs are turned into 0.0f.
289cc1dc7a3Sopenharmony_ci *
290cc1dc7a3Sopenharmony_ci * @param v  The value to clamp.
291cc1dc7a3Sopenharmony_ci *
292cc1dc7a3Sopenharmony_ci * @return The clamped value.
293cc1dc7a3Sopenharmony_ci */
294cc1dc7a3Sopenharmony_cistatic inline float clamp255f(float v)
295cc1dc7a3Sopenharmony_ci{
296cc1dc7a3Sopenharmony_ci	return astc::clamp(v, 0.0f, 255.0f);
297cc1dc7a3Sopenharmony_ci}
298cc1dc7a3Sopenharmony_ci
299cc1dc7a3Sopenharmony_ci/**
300cc1dc7a3Sopenharmony_ci * @brief SP float round-down.
301cc1dc7a3Sopenharmony_ci *
302cc1dc7a3Sopenharmony_ci * @param v   The value to round.
303cc1dc7a3Sopenharmony_ci *
304cc1dc7a3Sopenharmony_ci * @return The rounded value.
305cc1dc7a3Sopenharmony_ci */
306cc1dc7a3Sopenharmony_cistatic inline float flt_rd(float v)
307cc1dc7a3Sopenharmony_ci{
308cc1dc7a3Sopenharmony_ci	return std::floor(v);
309cc1dc7a3Sopenharmony_ci}
310cc1dc7a3Sopenharmony_ci
311cc1dc7a3Sopenharmony_ci/**
312cc1dc7a3Sopenharmony_ci * @brief SP float round-to-nearest and convert to integer.
313cc1dc7a3Sopenharmony_ci *
314cc1dc7a3Sopenharmony_ci * @param v   The value to round.
315cc1dc7a3Sopenharmony_ci *
316cc1dc7a3Sopenharmony_ci * @return The rounded value.
317cc1dc7a3Sopenharmony_ci */
318cc1dc7a3Sopenharmony_cistatic inline int flt2int_rtn(float v)
319cc1dc7a3Sopenharmony_ci{
320cc1dc7a3Sopenharmony_ci
321cc1dc7a3Sopenharmony_ci	return static_cast<int>(v + 0.5f);
322cc1dc7a3Sopenharmony_ci}
323cc1dc7a3Sopenharmony_ci
324cc1dc7a3Sopenharmony_ci/**
325cc1dc7a3Sopenharmony_ci * @brief SP float round down and convert to integer.
326cc1dc7a3Sopenharmony_ci *
327cc1dc7a3Sopenharmony_ci * @param v   The value to round.
328cc1dc7a3Sopenharmony_ci *
329cc1dc7a3Sopenharmony_ci * @return The rounded value.
330cc1dc7a3Sopenharmony_ci */
331cc1dc7a3Sopenharmony_cistatic inline int flt2int_rd(float v)
332cc1dc7a3Sopenharmony_ci{
333cc1dc7a3Sopenharmony_ci	return static_cast<int>(v);
334cc1dc7a3Sopenharmony_ci}
335cc1dc7a3Sopenharmony_ci
336cc1dc7a3Sopenharmony_ci/**
337cc1dc7a3Sopenharmony_ci * @brief SP float bit-interpreted as an integer.
338cc1dc7a3Sopenharmony_ci *
339cc1dc7a3Sopenharmony_ci * @param v   The value to bitcast.
340cc1dc7a3Sopenharmony_ci *
341cc1dc7a3Sopenharmony_ci * @return The converted value.
342cc1dc7a3Sopenharmony_ci */
343cc1dc7a3Sopenharmony_cistatic inline int float_as_int(float v)
344cc1dc7a3Sopenharmony_ci{
345cc1dc7a3Sopenharmony_ci	union { int a; float b; } u;
346cc1dc7a3Sopenharmony_ci	u.b = v;
347cc1dc7a3Sopenharmony_ci	return u.a;
348cc1dc7a3Sopenharmony_ci}
349cc1dc7a3Sopenharmony_ci
350cc1dc7a3Sopenharmony_ci/**
351cc1dc7a3Sopenharmony_ci * @brief Integer bit-interpreted as an SP float.
352cc1dc7a3Sopenharmony_ci *
353cc1dc7a3Sopenharmony_ci * @param v   The value to bitcast.
354cc1dc7a3Sopenharmony_ci *
355cc1dc7a3Sopenharmony_ci * @return The converted value.
356cc1dc7a3Sopenharmony_ci */
357cc1dc7a3Sopenharmony_cistatic inline float int_as_float(int v)
358cc1dc7a3Sopenharmony_ci{
359cc1dc7a3Sopenharmony_ci	union { int a; float b; } u;
360cc1dc7a3Sopenharmony_ci	u.a = v;
361cc1dc7a3Sopenharmony_ci	return u.b;
362cc1dc7a3Sopenharmony_ci}
363cc1dc7a3Sopenharmony_ci
364cc1dc7a3Sopenharmony_ci/**
365cc1dc7a3Sopenharmony_ci * @brief Fast approximation of 1.0 / sqrt(val).
366cc1dc7a3Sopenharmony_ci *
367cc1dc7a3Sopenharmony_ci * @param v   The input value.
368cc1dc7a3Sopenharmony_ci *
369cc1dc7a3Sopenharmony_ci * @return The approximated result.
370cc1dc7a3Sopenharmony_ci */
371cc1dc7a3Sopenharmony_cistatic inline float rsqrt(float v)
372cc1dc7a3Sopenharmony_ci{
373cc1dc7a3Sopenharmony_ci	return 1.0f / std::sqrt(v);
374cc1dc7a3Sopenharmony_ci}
375cc1dc7a3Sopenharmony_ci
376cc1dc7a3Sopenharmony_ci/**
377cc1dc7a3Sopenharmony_ci * @brief Fast approximation of sqrt(val).
378cc1dc7a3Sopenharmony_ci *
379cc1dc7a3Sopenharmony_ci * @param v   The input value.
380cc1dc7a3Sopenharmony_ci *
381cc1dc7a3Sopenharmony_ci * @return The approximated result.
382cc1dc7a3Sopenharmony_ci */
383cc1dc7a3Sopenharmony_cistatic inline float sqrt(float v)
384cc1dc7a3Sopenharmony_ci{
385cc1dc7a3Sopenharmony_ci	return std::sqrt(v);
386cc1dc7a3Sopenharmony_ci}
387cc1dc7a3Sopenharmony_ci
388cc1dc7a3Sopenharmony_ci/**
389cc1dc7a3Sopenharmony_ci * @brief Extract mantissa and exponent of a float value.
390cc1dc7a3Sopenharmony_ci *
391cc1dc7a3Sopenharmony_ci * @param      v      The input value.
392cc1dc7a3Sopenharmony_ci * @param[out] expo   The output exponent.
393cc1dc7a3Sopenharmony_ci *
394cc1dc7a3Sopenharmony_ci * @return The mantissa.
395cc1dc7a3Sopenharmony_ci */
396cc1dc7a3Sopenharmony_cistatic inline float frexp(float v, int* expo)
397cc1dc7a3Sopenharmony_ci{
398cc1dc7a3Sopenharmony_ci	if32 p;
399cc1dc7a3Sopenharmony_ci	p.f = v;
400cc1dc7a3Sopenharmony_ci	*expo = ((p.u >> 23) & 0xFF) - 126;
401cc1dc7a3Sopenharmony_ci	p.u = (p.u & 0x807fffff) | 0x3f000000;
402cc1dc7a3Sopenharmony_ci	return p.f;
403cc1dc7a3Sopenharmony_ci}
404cc1dc7a3Sopenharmony_ci
405cc1dc7a3Sopenharmony_ci/**
406cc1dc7a3Sopenharmony_ci * @brief Initialize the seed structure for a random number generator.
407cc1dc7a3Sopenharmony_ci *
408cc1dc7a3Sopenharmony_ci * Important note: For the purposes of ASTC we want sets of random numbers to
409cc1dc7a3Sopenharmony_ci * use the codec, but we want the same seed value across instances and threads
410cc1dc7a3Sopenharmony_ci * to ensure that image output is stable across compressor runs and across
411cc1dc7a3Sopenharmony_ci * platforms. Every PRNG created by this call will therefore return the same
412cc1dc7a3Sopenharmony_ci * sequence of values ...
413cc1dc7a3Sopenharmony_ci *
414cc1dc7a3Sopenharmony_ci * @param state The state structure to initialize.
415cc1dc7a3Sopenharmony_ci */
416cc1dc7a3Sopenharmony_civoid rand_init(uint64_t state[2]);
417cc1dc7a3Sopenharmony_ci
418cc1dc7a3Sopenharmony_ci/**
419cc1dc7a3Sopenharmony_ci * @brief Return the next random number from the generator.
420cc1dc7a3Sopenharmony_ci *
421cc1dc7a3Sopenharmony_ci * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
422cc1dc7a3Sopenharmony_ci * public-domain implementation given by David Blackman & Sebastiano Vigna at
423cc1dc7a3Sopenharmony_ci * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
424cc1dc7a3Sopenharmony_ci *
425cc1dc7a3Sopenharmony_ci * @param state The state structure to use/update.
426cc1dc7a3Sopenharmony_ci */
427cc1dc7a3Sopenharmony_ciuint64_t rand(uint64_t state[2]);
428cc1dc7a3Sopenharmony_ci
429cc1dc7a3Sopenharmony_ci}
430cc1dc7a3Sopenharmony_ci
431cc1dc7a3Sopenharmony_ci/* ============================================================================
432cc1dc7a3Sopenharmony_ci  Softfloat library with fp32 and fp16 conversion functionality.
433cc1dc7a3Sopenharmony_ci============================================================================ */
434cc1dc7a3Sopenharmony_ci#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
435cc1dc7a3Sopenharmony_ci	/* narrowing float->float conversions */
436cc1dc7a3Sopenharmony_ci	uint16_t float_to_sf16(float val);
437cc1dc7a3Sopenharmony_ci	float sf16_to_float(uint16_t val);
438cc1dc7a3Sopenharmony_ci#endif
439cc1dc7a3Sopenharmony_ci
440cc1dc7a3Sopenharmony_ci/*********************************
441cc1dc7a3Sopenharmony_ci  Vector library
442cc1dc7a3Sopenharmony_ci*********************************/
443cc1dc7a3Sopenharmony_ci#include "astcenc_vecmathlib.h"
444cc1dc7a3Sopenharmony_ci
445cc1dc7a3Sopenharmony_ci/*********************************
446cc1dc7a3Sopenharmony_ci  Declaration of line types
447cc1dc7a3Sopenharmony_ci*********************************/
448cc1dc7a3Sopenharmony_ci// parametric line, 2D: The line is given by line = a + b * t.
449cc1dc7a3Sopenharmony_ci
450cc1dc7a3Sopenharmony_cistruct line2
451cc1dc7a3Sopenharmony_ci{
452cc1dc7a3Sopenharmony_ci	vfloat4 a;
453cc1dc7a3Sopenharmony_ci	vfloat4 b;
454cc1dc7a3Sopenharmony_ci};
455cc1dc7a3Sopenharmony_ci
456cc1dc7a3Sopenharmony_ci// parametric line, 3D
457cc1dc7a3Sopenharmony_cistruct line3
458cc1dc7a3Sopenharmony_ci{
459cc1dc7a3Sopenharmony_ci	vfloat4 a;
460cc1dc7a3Sopenharmony_ci	vfloat4 b;
461cc1dc7a3Sopenharmony_ci};
462cc1dc7a3Sopenharmony_ci
463cc1dc7a3Sopenharmony_cistruct line4
464cc1dc7a3Sopenharmony_ci{
465cc1dc7a3Sopenharmony_ci	vfloat4 a;
466cc1dc7a3Sopenharmony_ci	vfloat4 b;
467cc1dc7a3Sopenharmony_ci};
468cc1dc7a3Sopenharmony_ci
469cc1dc7a3Sopenharmony_ci
470cc1dc7a3Sopenharmony_cistruct processed_line2
471cc1dc7a3Sopenharmony_ci{
472cc1dc7a3Sopenharmony_ci	vfloat4 amod;
473cc1dc7a3Sopenharmony_ci	vfloat4 bs;
474cc1dc7a3Sopenharmony_ci};
475cc1dc7a3Sopenharmony_ci
476cc1dc7a3Sopenharmony_cistruct processed_line3
477cc1dc7a3Sopenharmony_ci{
478cc1dc7a3Sopenharmony_ci	vfloat4 amod;
479cc1dc7a3Sopenharmony_ci	vfloat4 bs;
480cc1dc7a3Sopenharmony_ci};
481cc1dc7a3Sopenharmony_ci
482cc1dc7a3Sopenharmony_cistruct processed_line4
483cc1dc7a3Sopenharmony_ci{
484cc1dc7a3Sopenharmony_ci	vfloat4 amod;
485cc1dc7a3Sopenharmony_ci	vfloat4 bs;
486cc1dc7a3Sopenharmony_ci};
487cc1dc7a3Sopenharmony_ci
488cc1dc7a3Sopenharmony_ci#endif
489