1cc1dc7a3Sopenharmony_ci// SPDX-License-Identifier: Apache-2.0 2cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 3cc1dc7a3Sopenharmony_ci// Copyright 2011-2024 Arm Limited 4cc1dc7a3Sopenharmony_ci// 5cc1dc7a3Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); you may not 6cc1dc7a3Sopenharmony_ci// use this file except in compliance with the License. You may obtain a copy 7cc1dc7a3Sopenharmony_ci// of the License at: 8cc1dc7a3Sopenharmony_ci// 9cc1dc7a3Sopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0 10cc1dc7a3Sopenharmony_ci// 11cc1dc7a3Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software 12cc1dc7a3Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13cc1dc7a3Sopenharmony_ci// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14cc1dc7a3Sopenharmony_ci// License for the specific language governing permissions and limitations 15cc1dc7a3Sopenharmony_ci// under the License. 16cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 17cc1dc7a3Sopenharmony_ci 18cc1dc7a3Sopenharmony_ci/* 19cc1dc7a3Sopenharmony_ci * This module implements a variety of mathematical data types and library 20cc1dc7a3Sopenharmony_ci * functions used by the codec. 21cc1dc7a3Sopenharmony_ci */ 22cc1dc7a3Sopenharmony_ci 23cc1dc7a3Sopenharmony_ci#ifndef ASTC_MATHLIB_H_INCLUDED 24cc1dc7a3Sopenharmony_ci#define ASTC_MATHLIB_H_INCLUDED 25cc1dc7a3Sopenharmony_ci 26cc1dc7a3Sopenharmony_ci#include <cassert> 27cc1dc7a3Sopenharmony_ci#include <cstdint> 28cc1dc7a3Sopenharmony_ci#include <cmath> 29cc1dc7a3Sopenharmony_ci 30cc1dc7a3Sopenharmony_ci#ifndef ASTCENC_POPCNT 31cc1dc7a3Sopenharmony_ci #if defined(__POPCNT__) 32cc1dc7a3Sopenharmony_ci #define ASTCENC_POPCNT 1 33cc1dc7a3Sopenharmony_ci #else 34cc1dc7a3Sopenharmony_ci #define ASTCENC_POPCNT 0 35cc1dc7a3Sopenharmony_ci #endif 36cc1dc7a3Sopenharmony_ci#endif 37cc1dc7a3Sopenharmony_ci 38cc1dc7a3Sopenharmony_ci#ifndef ASTCENC_F16C 39cc1dc7a3Sopenharmony_ci #if defined(__F16C__) 40cc1dc7a3Sopenharmony_ci #define ASTCENC_F16C 1 41cc1dc7a3Sopenharmony_ci #else 42cc1dc7a3Sopenharmony_ci #define ASTCENC_F16C 0 43cc1dc7a3Sopenharmony_ci #endif 44cc1dc7a3Sopenharmony_ci#endif 45cc1dc7a3Sopenharmony_ci 46cc1dc7a3Sopenharmony_ci#ifndef ASTCENC_SSE 47cc1dc7a3Sopenharmony_ci #if defined(__SSE4_2__) 48cc1dc7a3Sopenharmony_ci #define ASTCENC_SSE 42 49cc1dc7a3Sopenharmony_ci #elif defined(__SSE4_1__) 50cc1dc7a3Sopenharmony_ci #define ASTCENC_SSE 41 51cc1dc7a3Sopenharmony_ci #elif defined(__SSE2__) 52cc1dc7a3Sopenharmony_ci #define ASTCENC_SSE 20 53cc1dc7a3Sopenharmony_ci #else 54cc1dc7a3Sopenharmony_ci #define ASTCENC_SSE 0 55cc1dc7a3Sopenharmony_ci #endif 56cc1dc7a3Sopenharmony_ci#endif 57cc1dc7a3Sopenharmony_ci 58cc1dc7a3Sopenharmony_ci#ifndef ASTCENC_AVX 59cc1dc7a3Sopenharmony_ci #if defined(__AVX2__) 60cc1dc7a3Sopenharmony_ci #define ASTCENC_AVX 2 61cc1dc7a3Sopenharmony_ci #elif defined(__AVX__) 62cc1dc7a3Sopenharmony_ci #define ASTCENC_AVX 1 63cc1dc7a3Sopenharmony_ci #else 64cc1dc7a3Sopenharmony_ci #define ASTCENC_AVX 0 65cc1dc7a3Sopenharmony_ci #endif 66cc1dc7a3Sopenharmony_ci#endif 67cc1dc7a3Sopenharmony_ci 68cc1dc7a3Sopenharmony_ci#ifndef ASTCENC_NEON 69cc1dc7a3Sopenharmony_ci #if defined(__aarch64__) 70cc1dc7a3Sopenharmony_ci #define ASTCENC_NEON 1 71cc1dc7a3Sopenharmony_ci #else 72cc1dc7a3Sopenharmony_ci #define ASTCENC_NEON 0 73cc1dc7a3Sopenharmony_ci #endif 74cc1dc7a3Sopenharmony_ci#endif 75cc1dc7a3Sopenharmony_ci 76cc1dc7a3Sopenharmony_ci// Force vector-sized SIMD alignment 77cc1dc7a3Sopenharmony_ci#if ASTCENC_AVX 78cc1dc7a3Sopenharmony_ci #define ASTCENC_VECALIGN 32 79cc1dc7a3Sopenharmony_ci#elif ASTCENC_SSE || ASTCENC_NEON 80cc1dc7a3Sopenharmony_ci #define ASTCENC_VECALIGN 16 81cc1dc7a3Sopenharmony_ci// Use default alignment for non-SIMD builds 82cc1dc7a3Sopenharmony_ci#else 83cc1dc7a3Sopenharmony_ci #define ASTCENC_VECALIGN 0 84cc1dc7a3Sopenharmony_ci#endif 85cc1dc7a3Sopenharmony_ci 86cc1dc7a3Sopenharmony_ci// C++11 states that alignas(0) should be ignored but GCC doesn't do 87cc1dc7a3Sopenharmony_ci// this on some versions, so workaround and avoid emitting alignas(0) 88cc1dc7a3Sopenharmony_ci#if ASTCENC_VECALIGN > 0 89cc1dc7a3Sopenharmony_ci #define ASTCENC_ALIGNAS alignas(ASTCENC_VECALIGN) 90cc1dc7a3Sopenharmony_ci#else 91cc1dc7a3Sopenharmony_ci #define ASTCENC_ALIGNAS 92cc1dc7a3Sopenharmony_ci#endif 93cc1dc7a3Sopenharmony_ci 94cc1dc7a3Sopenharmony_ci#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0 95cc1dc7a3Sopenharmony_ci #include <immintrin.h> 96cc1dc7a3Sopenharmony_ci#endif 97cc1dc7a3Sopenharmony_ci 98cc1dc7a3Sopenharmony_ci/* ============================================================================ 99cc1dc7a3Sopenharmony_ci Fast math library; note that many of the higher-order functions in this set 100cc1dc7a3Sopenharmony_ci use approximations which are less accurate, but faster, than <cmath> standard 101cc1dc7a3Sopenharmony_ci library equivalents. 102cc1dc7a3Sopenharmony_ci 103cc1dc7a3Sopenharmony_ci Note: Many of these are not necessarily faster than simple C versions when 104cc1dc7a3Sopenharmony_ci used on a single scalar value, but are included for testing purposes as most 105cc1dc7a3Sopenharmony_ci have an option based on SSE intrinsics and therefore provide an obvious route 106cc1dc7a3Sopenharmony_ci to future vectorization. 107cc1dc7a3Sopenharmony_ci============================================================================ */ 108cc1dc7a3Sopenharmony_ci 109cc1dc7a3Sopenharmony_ci// Union for manipulation of float bit patterns 110cc1dc7a3Sopenharmony_citypedef union 111cc1dc7a3Sopenharmony_ci{ 112cc1dc7a3Sopenharmony_ci uint32_t u; 113cc1dc7a3Sopenharmony_ci int32_t s; 114cc1dc7a3Sopenharmony_ci float f; 115cc1dc7a3Sopenharmony_ci} if32; 116cc1dc7a3Sopenharmony_ci 117cc1dc7a3Sopenharmony_ci// These are namespaced to avoid colliding with C standard library functions. 118cc1dc7a3Sopenharmony_cinamespace astc 119cc1dc7a3Sopenharmony_ci{ 120cc1dc7a3Sopenharmony_ci 121cc1dc7a3Sopenharmony_cistatic const float PI = 3.14159265358979323846f; 122cc1dc7a3Sopenharmony_cistatic const float PI_OVER_TWO = 1.57079632679489661923f; 123cc1dc7a3Sopenharmony_ci 124cc1dc7a3Sopenharmony_ci/** 125cc1dc7a3Sopenharmony_ci * @brief SP float absolute value. 126cc1dc7a3Sopenharmony_ci * 127cc1dc7a3Sopenharmony_ci * @param v The value to make absolute. 128cc1dc7a3Sopenharmony_ci * 129cc1dc7a3Sopenharmony_ci * @return The absolute value. 130cc1dc7a3Sopenharmony_ci */ 131cc1dc7a3Sopenharmony_cistatic inline float fabs(float v) 132cc1dc7a3Sopenharmony_ci{ 133cc1dc7a3Sopenharmony_ci return std::fabs(v); 134cc1dc7a3Sopenharmony_ci} 135cc1dc7a3Sopenharmony_ci 136cc1dc7a3Sopenharmony_ci/** 137cc1dc7a3Sopenharmony_ci * @brief Test if a float value is a nan. 138cc1dc7a3Sopenharmony_ci * 139cc1dc7a3Sopenharmony_ci * @param v The value test. 140cc1dc7a3Sopenharmony_ci * 141cc1dc7a3Sopenharmony_ci * @return Zero is not a NaN, non-zero otherwise. 142cc1dc7a3Sopenharmony_ci */ 143cc1dc7a3Sopenharmony_cistatic inline bool isnan(float v) 144cc1dc7a3Sopenharmony_ci{ 145cc1dc7a3Sopenharmony_ci return v != v; 146cc1dc7a3Sopenharmony_ci} 147cc1dc7a3Sopenharmony_ci 148cc1dc7a3Sopenharmony_ci/** 149cc1dc7a3Sopenharmony_ci * @brief Return the minimum of two values. 150cc1dc7a3Sopenharmony_ci * 151cc1dc7a3Sopenharmony_ci * For floats, NaNs are turned into @c q. 152cc1dc7a3Sopenharmony_ci * 153cc1dc7a3Sopenharmony_ci * @param p The first value to compare. 154cc1dc7a3Sopenharmony_ci * @param q The second value to compare. 155cc1dc7a3Sopenharmony_ci * 156cc1dc7a3Sopenharmony_ci * @return The smallest value. 157cc1dc7a3Sopenharmony_ci */ 158cc1dc7a3Sopenharmony_citemplate<typename T> 159cc1dc7a3Sopenharmony_cistatic inline T min(T p, T q) 160cc1dc7a3Sopenharmony_ci{ 161cc1dc7a3Sopenharmony_ci return p < q ? p : q; 162cc1dc7a3Sopenharmony_ci} 163cc1dc7a3Sopenharmony_ci 164cc1dc7a3Sopenharmony_ci/** 165cc1dc7a3Sopenharmony_ci * @brief Return the minimum of three values. 166cc1dc7a3Sopenharmony_ci * 167cc1dc7a3Sopenharmony_ci * For floats, NaNs are turned into @c r. 168cc1dc7a3Sopenharmony_ci * 169cc1dc7a3Sopenharmony_ci * @param p The first value to compare. 170cc1dc7a3Sopenharmony_ci * @param q The second value to compare. 171cc1dc7a3Sopenharmony_ci * @param r The third value to compare. 172cc1dc7a3Sopenharmony_ci * 173cc1dc7a3Sopenharmony_ci * @return The smallest value. 174cc1dc7a3Sopenharmony_ci */ 175cc1dc7a3Sopenharmony_citemplate<typename T> 176cc1dc7a3Sopenharmony_cistatic inline T min(T p, T q, T r) 177cc1dc7a3Sopenharmony_ci{ 178cc1dc7a3Sopenharmony_ci return min(min(p, q), r); 179cc1dc7a3Sopenharmony_ci} 180cc1dc7a3Sopenharmony_ci 181cc1dc7a3Sopenharmony_ci/** 182cc1dc7a3Sopenharmony_ci * @brief Return the minimum of four values. 183cc1dc7a3Sopenharmony_ci * 184cc1dc7a3Sopenharmony_ci * For floats, NaNs are turned into @c s. 185cc1dc7a3Sopenharmony_ci * 186cc1dc7a3Sopenharmony_ci * @param p The first value to compare. 187cc1dc7a3Sopenharmony_ci * @param q The second value to compare. 188cc1dc7a3Sopenharmony_ci * @param r The third value to compare. 189cc1dc7a3Sopenharmony_ci * @param s The fourth value to compare. 190cc1dc7a3Sopenharmony_ci * 191cc1dc7a3Sopenharmony_ci * @return The smallest value. 192cc1dc7a3Sopenharmony_ci */ 193cc1dc7a3Sopenharmony_citemplate<typename T> 194cc1dc7a3Sopenharmony_cistatic inline T min(T p, T q, T r, T s) 195cc1dc7a3Sopenharmony_ci{ 196cc1dc7a3Sopenharmony_ci return min(min(p, q), min(r, s)); 197cc1dc7a3Sopenharmony_ci} 198cc1dc7a3Sopenharmony_ci 199cc1dc7a3Sopenharmony_ci/** 200cc1dc7a3Sopenharmony_ci * @brief Return the maximum of two values. 201cc1dc7a3Sopenharmony_ci * 202cc1dc7a3Sopenharmony_ci * For floats, NaNs are turned into @c q. 203cc1dc7a3Sopenharmony_ci * 204cc1dc7a3Sopenharmony_ci * @param p The first value to compare. 205cc1dc7a3Sopenharmony_ci * @param q The second value to compare. 206cc1dc7a3Sopenharmony_ci * 207cc1dc7a3Sopenharmony_ci * @return The largest value. 208cc1dc7a3Sopenharmony_ci */ 209cc1dc7a3Sopenharmony_citemplate<typename T> 210cc1dc7a3Sopenharmony_cistatic inline T max(T p, T q) 211cc1dc7a3Sopenharmony_ci{ 212cc1dc7a3Sopenharmony_ci return p > q ? p : q; 213cc1dc7a3Sopenharmony_ci} 214cc1dc7a3Sopenharmony_ci 215cc1dc7a3Sopenharmony_ci/** 216cc1dc7a3Sopenharmony_ci * @brief Return the maximum of three values. 217cc1dc7a3Sopenharmony_ci * 218cc1dc7a3Sopenharmony_ci * For floats, NaNs are turned into @c r. 219cc1dc7a3Sopenharmony_ci * 220cc1dc7a3Sopenharmony_ci * @param p The first value to compare. 221cc1dc7a3Sopenharmony_ci * @param q The second value to compare. 222cc1dc7a3Sopenharmony_ci * @param r The third value to compare. 223cc1dc7a3Sopenharmony_ci * 224cc1dc7a3Sopenharmony_ci * @return The largest value. 225cc1dc7a3Sopenharmony_ci */ 226cc1dc7a3Sopenharmony_citemplate<typename T> 227cc1dc7a3Sopenharmony_cistatic inline T max(T p, T q, T r) 228cc1dc7a3Sopenharmony_ci{ 229cc1dc7a3Sopenharmony_ci return max(max(p, q), r); 230cc1dc7a3Sopenharmony_ci} 231cc1dc7a3Sopenharmony_ci 232cc1dc7a3Sopenharmony_ci/** 233cc1dc7a3Sopenharmony_ci * @brief Return the maximum of four values. 234cc1dc7a3Sopenharmony_ci * 235cc1dc7a3Sopenharmony_ci * For floats, NaNs are turned into @c s. 236cc1dc7a3Sopenharmony_ci * 237cc1dc7a3Sopenharmony_ci * @param p The first value to compare. 238cc1dc7a3Sopenharmony_ci * @param q The second value to compare. 239cc1dc7a3Sopenharmony_ci * @param r The third value to compare. 240cc1dc7a3Sopenharmony_ci * @param s The fourth value to compare. 241cc1dc7a3Sopenharmony_ci * 242cc1dc7a3Sopenharmony_ci * @return The largest value. 243cc1dc7a3Sopenharmony_ci */ 244cc1dc7a3Sopenharmony_citemplate<typename T> 245cc1dc7a3Sopenharmony_cistatic inline T max(T p, T q, T r, T s) 246cc1dc7a3Sopenharmony_ci{ 247cc1dc7a3Sopenharmony_ci return max(max(p, q), max(r, s)); 248cc1dc7a3Sopenharmony_ci} 249cc1dc7a3Sopenharmony_ci 250cc1dc7a3Sopenharmony_ci/** 251cc1dc7a3Sopenharmony_ci * @brief Clamp a value value between @c mn and @c mx. 252cc1dc7a3Sopenharmony_ci * 253cc1dc7a3Sopenharmony_ci * For floats, NaNs are turned into @c mn. 254cc1dc7a3Sopenharmony_ci * 255cc1dc7a3Sopenharmony_ci * @param v The value to clamp. 256cc1dc7a3Sopenharmony_ci * @param mn The min value (inclusive). 257cc1dc7a3Sopenharmony_ci * @param mx The max value (inclusive). 258cc1dc7a3Sopenharmony_ci * 259cc1dc7a3Sopenharmony_ci * @return The clamped value. 260cc1dc7a3Sopenharmony_ci */ 261cc1dc7a3Sopenharmony_citemplate<typename T> 262cc1dc7a3Sopenharmony_ciinline T clamp(T v, T mn, T mx) 263cc1dc7a3Sopenharmony_ci{ 264cc1dc7a3Sopenharmony_ci // Do not reorder; correct NaN handling relies on the fact that comparison 265cc1dc7a3Sopenharmony_ci // with NaN returns false and will fall-though to the "min" value. 266cc1dc7a3Sopenharmony_ci if (v > mx) return mx; 267cc1dc7a3Sopenharmony_ci if (v > mn) return v; 268cc1dc7a3Sopenharmony_ci return mn; 269cc1dc7a3Sopenharmony_ci} 270cc1dc7a3Sopenharmony_ci 271cc1dc7a3Sopenharmony_ci/** 272cc1dc7a3Sopenharmony_ci * @brief Clamp a float value between 0.0f and 1.0f. 273cc1dc7a3Sopenharmony_ci * 274cc1dc7a3Sopenharmony_ci * NaNs are turned into 0.0f. 275cc1dc7a3Sopenharmony_ci * 276cc1dc7a3Sopenharmony_ci * @param v The value to clamp. 277cc1dc7a3Sopenharmony_ci * 278cc1dc7a3Sopenharmony_ci * @return The clamped value. 279cc1dc7a3Sopenharmony_ci */ 280cc1dc7a3Sopenharmony_cistatic inline float clamp1f(float v) 281cc1dc7a3Sopenharmony_ci{ 282cc1dc7a3Sopenharmony_ci return astc::clamp(v, 0.0f, 1.0f); 283cc1dc7a3Sopenharmony_ci} 284cc1dc7a3Sopenharmony_ci 285cc1dc7a3Sopenharmony_ci/** 286cc1dc7a3Sopenharmony_ci * @brief Clamp a float value between 0.0f and 255.0f. 287cc1dc7a3Sopenharmony_ci * 288cc1dc7a3Sopenharmony_ci * NaNs are turned into 0.0f. 289cc1dc7a3Sopenharmony_ci * 290cc1dc7a3Sopenharmony_ci * @param v The value to clamp. 291cc1dc7a3Sopenharmony_ci * 292cc1dc7a3Sopenharmony_ci * @return The clamped value. 293cc1dc7a3Sopenharmony_ci */ 294cc1dc7a3Sopenharmony_cistatic inline float clamp255f(float v) 295cc1dc7a3Sopenharmony_ci{ 296cc1dc7a3Sopenharmony_ci return astc::clamp(v, 0.0f, 255.0f); 297cc1dc7a3Sopenharmony_ci} 298cc1dc7a3Sopenharmony_ci 299cc1dc7a3Sopenharmony_ci/** 300cc1dc7a3Sopenharmony_ci * @brief SP float round-down. 301cc1dc7a3Sopenharmony_ci * 302cc1dc7a3Sopenharmony_ci * @param v The value to round. 303cc1dc7a3Sopenharmony_ci * 304cc1dc7a3Sopenharmony_ci * @return The rounded value. 305cc1dc7a3Sopenharmony_ci */ 306cc1dc7a3Sopenharmony_cistatic inline float flt_rd(float v) 307cc1dc7a3Sopenharmony_ci{ 308cc1dc7a3Sopenharmony_ci return std::floor(v); 309cc1dc7a3Sopenharmony_ci} 310cc1dc7a3Sopenharmony_ci 311cc1dc7a3Sopenharmony_ci/** 312cc1dc7a3Sopenharmony_ci * @brief SP float round-to-nearest and convert to integer. 313cc1dc7a3Sopenharmony_ci * 314cc1dc7a3Sopenharmony_ci * @param v The value to round. 315cc1dc7a3Sopenharmony_ci * 316cc1dc7a3Sopenharmony_ci * @return The rounded value. 317cc1dc7a3Sopenharmony_ci */ 318cc1dc7a3Sopenharmony_cistatic inline int flt2int_rtn(float v) 319cc1dc7a3Sopenharmony_ci{ 320cc1dc7a3Sopenharmony_ci 321cc1dc7a3Sopenharmony_ci return static_cast<int>(v + 0.5f); 322cc1dc7a3Sopenharmony_ci} 323cc1dc7a3Sopenharmony_ci 324cc1dc7a3Sopenharmony_ci/** 325cc1dc7a3Sopenharmony_ci * @brief SP float round down and convert to integer. 326cc1dc7a3Sopenharmony_ci * 327cc1dc7a3Sopenharmony_ci * @param v The value to round. 328cc1dc7a3Sopenharmony_ci * 329cc1dc7a3Sopenharmony_ci * @return The rounded value. 330cc1dc7a3Sopenharmony_ci */ 331cc1dc7a3Sopenharmony_cistatic inline int flt2int_rd(float v) 332cc1dc7a3Sopenharmony_ci{ 333cc1dc7a3Sopenharmony_ci return static_cast<int>(v); 334cc1dc7a3Sopenharmony_ci} 335cc1dc7a3Sopenharmony_ci 336cc1dc7a3Sopenharmony_ci/** 337cc1dc7a3Sopenharmony_ci * @brief SP float bit-interpreted as an integer. 338cc1dc7a3Sopenharmony_ci * 339cc1dc7a3Sopenharmony_ci * @param v The value to bitcast. 340cc1dc7a3Sopenharmony_ci * 341cc1dc7a3Sopenharmony_ci * @return The converted value. 342cc1dc7a3Sopenharmony_ci */ 343cc1dc7a3Sopenharmony_cistatic inline int float_as_int(float v) 344cc1dc7a3Sopenharmony_ci{ 345cc1dc7a3Sopenharmony_ci union { int a; float b; } u; 346cc1dc7a3Sopenharmony_ci u.b = v; 347cc1dc7a3Sopenharmony_ci return u.a; 348cc1dc7a3Sopenharmony_ci} 349cc1dc7a3Sopenharmony_ci 350cc1dc7a3Sopenharmony_ci/** 351cc1dc7a3Sopenharmony_ci * @brief Integer bit-interpreted as an SP float. 352cc1dc7a3Sopenharmony_ci * 353cc1dc7a3Sopenharmony_ci * @param v The value to bitcast. 354cc1dc7a3Sopenharmony_ci * 355cc1dc7a3Sopenharmony_ci * @return The converted value. 356cc1dc7a3Sopenharmony_ci */ 357cc1dc7a3Sopenharmony_cistatic inline float int_as_float(int v) 358cc1dc7a3Sopenharmony_ci{ 359cc1dc7a3Sopenharmony_ci union { int a; float b; } u; 360cc1dc7a3Sopenharmony_ci u.a = v; 361cc1dc7a3Sopenharmony_ci return u.b; 362cc1dc7a3Sopenharmony_ci} 363cc1dc7a3Sopenharmony_ci 364cc1dc7a3Sopenharmony_ci/** 365cc1dc7a3Sopenharmony_ci * @brief Fast approximation of 1.0 / sqrt(val). 366cc1dc7a3Sopenharmony_ci * 367cc1dc7a3Sopenharmony_ci * @param v The input value. 368cc1dc7a3Sopenharmony_ci * 369cc1dc7a3Sopenharmony_ci * @return The approximated result. 370cc1dc7a3Sopenharmony_ci */ 371cc1dc7a3Sopenharmony_cistatic inline float rsqrt(float v) 372cc1dc7a3Sopenharmony_ci{ 373cc1dc7a3Sopenharmony_ci return 1.0f / std::sqrt(v); 374cc1dc7a3Sopenharmony_ci} 375cc1dc7a3Sopenharmony_ci 376cc1dc7a3Sopenharmony_ci/** 377cc1dc7a3Sopenharmony_ci * @brief Fast approximation of sqrt(val). 378cc1dc7a3Sopenharmony_ci * 379cc1dc7a3Sopenharmony_ci * @param v The input value. 380cc1dc7a3Sopenharmony_ci * 381cc1dc7a3Sopenharmony_ci * @return The approximated result. 382cc1dc7a3Sopenharmony_ci */ 383cc1dc7a3Sopenharmony_cistatic inline float sqrt(float v) 384cc1dc7a3Sopenharmony_ci{ 385cc1dc7a3Sopenharmony_ci return std::sqrt(v); 386cc1dc7a3Sopenharmony_ci} 387cc1dc7a3Sopenharmony_ci 388cc1dc7a3Sopenharmony_ci/** 389cc1dc7a3Sopenharmony_ci * @brief Extract mantissa and exponent of a float value. 390cc1dc7a3Sopenharmony_ci * 391cc1dc7a3Sopenharmony_ci * @param v The input value. 392cc1dc7a3Sopenharmony_ci * @param[out] expo The output exponent. 393cc1dc7a3Sopenharmony_ci * 394cc1dc7a3Sopenharmony_ci * @return The mantissa. 395cc1dc7a3Sopenharmony_ci */ 396cc1dc7a3Sopenharmony_cistatic inline float frexp(float v, int* expo) 397cc1dc7a3Sopenharmony_ci{ 398cc1dc7a3Sopenharmony_ci if32 p; 399cc1dc7a3Sopenharmony_ci p.f = v; 400cc1dc7a3Sopenharmony_ci *expo = ((p.u >> 23) & 0xFF) - 126; 401cc1dc7a3Sopenharmony_ci p.u = (p.u & 0x807fffff) | 0x3f000000; 402cc1dc7a3Sopenharmony_ci return p.f; 403cc1dc7a3Sopenharmony_ci} 404cc1dc7a3Sopenharmony_ci 405cc1dc7a3Sopenharmony_ci/** 406cc1dc7a3Sopenharmony_ci * @brief Initialize the seed structure for a random number generator. 407cc1dc7a3Sopenharmony_ci * 408cc1dc7a3Sopenharmony_ci * Important note: For the purposes of ASTC we want sets of random numbers to 409cc1dc7a3Sopenharmony_ci * use the codec, but we want the same seed value across instances and threads 410cc1dc7a3Sopenharmony_ci * to ensure that image output is stable across compressor runs and across 411cc1dc7a3Sopenharmony_ci * platforms. Every PRNG created by this call will therefore return the same 412cc1dc7a3Sopenharmony_ci * sequence of values ... 413cc1dc7a3Sopenharmony_ci * 414cc1dc7a3Sopenharmony_ci * @param state The state structure to initialize. 415cc1dc7a3Sopenharmony_ci */ 416cc1dc7a3Sopenharmony_civoid rand_init(uint64_t state[2]); 417cc1dc7a3Sopenharmony_ci 418cc1dc7a3Sopenharmony_ci/** 419cc1dc7a3Sopenharmony_ci * @brief Return the next random number from the generator. 420cc1dc7a3Sopenharmony_ci * 421cc1dc7a3Sopenharmony_ci * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the 422cc1dc7a3Sopenharmony_ci * public-domain implementation given by David Blackman & Sebastiano Vigna at 423cc1dc7a3Sopenharmony_ci * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c 424cc1dc7a3Sopenharmony_ci * 425cc1dc7a3Sopenharmony_ci * @param state The state structure to use/update. 426cc1dc7a3Sopenharmony_ci */ 427cc1dc7a3Sopenharmony_ciuint64_t rand(uint64_t state[2]); 428cc1dc7a3Sopenharmony_ci 429cc1dc7a3Sopenharmony_ci} 430cc1dc7a3Sopenharmony_ci 431cc1dc7a3Sopenharmony_ci/* ============================================================================ 432cc1dc7a3Sopenharmony_ci Softfloat library with fp32 and fp16 conversion functionality. 433cc1dc7a3Sopenharmony_ci============================================================================ */ 434cc1dc7a3Sopenharmony_ci#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0) 435cc1dc7a3Sopenharmony_ci /* narrowing float->float conversions */ 436cc1dc7a3Sopenharmony_ci uint16_t float_to_sf16(float val); 437cc1dc7a3Sopenharmony_ci float sf16_to_float(uint16_t val); 438cc1dc7a3Sopenharmony_ci#endif 439cc1dc7a3Sopenharmony_ci 440cc1dc7a3Sopenharmony_ci/********************************* 441cc1dc7a3Sopenharmony_ci Vector library 442cc1dc7a3Sopenharmony_ci*********************************/ 443cc1dc7a3Sopenharmony_ci#include "astcenc_vecmathlib.h" 444cc1dc7a3Sopenharmony_ci 445cc1dc7a3Sopenharmony_ci/********************************* 446cc1dc7a3Sopenharmony_ci Declaration of line types 447cc1dc7a3Sopenharmony_ci*********************************/ 448cc1dc7a3Sopenharmony_ci// parametric line, 2D: The line is given by line = a + b * t. 449cc1dc7a3Sopenharmony_ci 450cc1dc7a3Sopenharmony_cistruct line2 451cc1dc7a3Sopenharmony_ci{ 452cc1dc7a3Sopenharmony_ci vfloat4 a; 453cc1dc7a3Sopenharmony_ci vfloat4 b; 454cc1dc7a3Sopenharmony_ci}; 455cc1dc7a3Sopenharmony_ci 456cc1dc7a3Sopenharmony_ci// parametric line, 3D 457cc1dc7a3Sopenharmony_cistruct line3 458cc1dc7a3Sopenharmony_ci{ 459cc1dc7a3Sopenharmony_ci vfloat4 a; 460cc1dc7a3Sopenharmony_ci vfloat4 b; 461cc1dc7a3Sopenharmony_ci}; 462cc1dc7a3Sopenharmony_ci 463cc1dc7a3Sopenharmony_cistruct line4 464cc1dc7a3Sopenharmony_ci{ 465cc1dc7a3Sopenharmony_ci vfloat4 a; 466cc1dc7a3Sopenharmony_ci vfloat4 b; 467cc1dc7a3Sopenharmony_ci}; 468cc1dc7a3Sopenharmony_ci 469cc1dc7a3Sopenharmony_ci 470cc1dc7a3Sopenharmony_cistruct processed_line2 471cc1dc7a3Sopenharmony_ci{ 472cc1dc7a3Sopenharmony_ci vfloat4 amod; 473cc1dc7a3Sopenharmony_ci vfloat4 bs; 474cc1dc7a3Sopenharmony_ci}; 475cc1dc7a3Sopenharmony_ci 476cc1dc7a3Sopenharmony_cistruct processed_line3 477cc1dc7a3Sopenharmony_ci{ 478cc1dc7a3Sopenharmony_ci vfloat4 amod; 479cc1dc7a3Sopenharmony_ci vfloat4 bs; 480cc1dc7a3Sopenharmony_ci}; 481cc1dc7a3Sopenharmony_ci 482cc1dc7a3Sopenharmony_cistruct processed_line4 483cc1dc7a3Sopenharmony_ci{ 484cc1dc7a3Sopenharmony_ci vfloat4 amod; 485cc1dc7a3Sopenharmony_ci vfloat4 bs; 486cc1dc7a3Sopenharmony_ci}; 487cc1dc7a3Sopenharmony_ci 488cc1dc7a3Sopenharmony_ci#endif 489