1// SPDX-License-Identifier: Apache-2.0 2// ---------------------------------------------------------------------------- 3// Copyright 2011-2024 Arm Limited 4// 5// Licensed under the Apache License, Version 2.0 (the "License"); you may not 6// use this file except in compliance with the License. You may obtain a copy 7// of the License at: 8// 9// http://www.apache.org/licenses/LICENSE-2.0 10// 11// Unless required by applicable law or agreed to in writing, software 12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14// License for the specific language governing permissions and limitations 15// under the License. 16// ---------------------------------------------------------------------------- 17 18/* 19 * This module implements a variety of mathematical data types and library 20 * functions used by the codec. 21 */ 22 23#ifndef ASTC_MATHLIB_H_INCLUDED 24#define ASTC_MATHLIB_H_INCLUDED 25 26#include <cassert> 27#include <cstdint> 28#include <cmath> 29 30#ifndef ASTCENC_POPCNT 31 #if defined(__POPCNT__) 32 #define ASTCENC_POPCNT 1 33 #else 34 #define ASTCENC_POPCNT 0 35 #endif 36#endif 37 38#ifndef ASTCENC_F16C 39 #if defined(__F16C__) 40 #define ASTCENC_F16C 1 41 #else 42 #define ASTCENC_F16C 0 43 #endif 44#endif 45 46#ifndef ASTCENC_SSE 47 #if defined(__SSE4_2__) 48 #define ASTCENC_SSE 42 49 #elif defined(__SSE4_1__) 50 #define ASTCENC_SSE 41 51 #elif defined(__SSE2__) 52 #define ASTCENC_SSE 20 53 #else 54 #define ASTCENC_SSE 0 55 #endif 56#endif 57 58#ifndef ASTCENC_AVX 59 #if defined(__AVX2__) 60 #define ASTCENC_AVX 2 61 #elif defined(__AVX__) 62 #define ASTCENC_AVX 1 63 #else 64 #define ASTCENC_AVX 0 65 #endif 66#endif 67 68#ifndef ASTCENC_NEON 69 #if defined(__aarch64__) 70 #define ASTCENC_NEON 1 71 #else 72 #define ASTCENC_NEON 0 73 #endif 74#endif 75 76// Force vector-sized SIMD alignment 77#if ASTCENC_AVX 78 #define ASTCENC_VECALIGN 32 79#elif ASTCENC_SSE || ASTCENC_NEON 80 #define ASTCENC_VECALIGN 16 81// Use default alignment for non-SIMD builds 82#else 83 #define ASTCENC_VECALIGN 0 84#endif 85 86// C++11 states that alignas(0) should be ignored but GCC doesn't do 87// this on some versions, so workaround and avoid emitting alignas(0) 88#if ASTCENC_VECALIGN > 0 89 #define ASTCENC_ALIGNAS alignas(ASTCENC_VECALIGN) 90#else 91 #define ASTCENC_ALIGNAS 92#endif 93 94#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0 95 #include <immintrin.h> 96#endif 97 98/* ============================================================================ 99 Fast math library; note that many of the higher-order functions in this set 100 use approximations which are less accurate, but faster, than <cmath> standard 101 library equivalents. 102 103 Note: Many of these are not necessarily faster than simple C versions when 104 used on a single scalar value, but are included for testing purposes as most 105 have an option based on SSE intrinsics and therefore provide an obvious route 106 to future vectorization. 107============================================================================ */ 108 109// Union for manipulation of float bit patterns 110typedef union 111{ 112 uint32_t u; 113 int32_t s; 114 float f; 115} if32; 116 117// These are namespaced to avoid colliding with C standard library functions. 118namespace astc 119{ 120 121static const float PI = 3.14159265358979323846f; 122static const float PI_OVER_TWO = 1.57079632679489661923f; 123 124/** 125 * @brief SP float absolute value. 126 * 127 * @param v The value to make absolute. 128 * 129 * @return The absolute value. 130 */ 131static inline float fabs(float v) 132{ 133 return std::fabs(v); 134} 135 136/** 137 * @brief Test if a float value is a nan. 138 * 139 * @param v The value test. 140 * 141 * @return Zero is not a NaN, non-zero otherwise. 142 */ 143static inline bool isnan(float v) 144{ 145 return v != v; 146} 147 148/** 149 * @brief Return the minimum of two values. 150 * 151 * For floats, NaNs are turned into @c q. 152 * 153 * @param p The first value to compare. 154 * @param q The second value to compare. 155 * 156 * @return The smallest value. 157 */ 158template<typename T> 159static inline T min(T p, T q) 160{ 161 return p < q ? p : q; 162} 163 164/** 165 * @brief Return the minimum of three values. 166 * 167 * For floats, NaNs are turned into @c r. 168 * 169 * @param p The first value to compare. 170 * @param q The second value to compare. 171 * @param r The third value to compare. 172 * 173 * @return The smallest value. 174 */ 175template<typename T> 176static inline T min(T p, T q, T r) 177{ 178 return min(min(p, q), r); 179} 180 181/** 182 * @brief Return the minimum of four values. 183 * 184 * For floats, NaNs are turned into @c s. 185 * 186 * @param p The first value to compare. 187 * @param q The second value to compare. 188 * @param r The third value to compare. 189 * @param s The fourth value to compare. 190 * 191 * @return The smallest value. 192 */ 193template<typename T> 194static inline T min(T p, T q, T r, T s) 195{ 196 return min(min(p, q), min(r, s)); 197} 198 199/** 200 * @brief Return the maximum of two values. 201 * 202 * For floats, NaNs are turned into @c q. 203 * 204 * @param p The first value to compare. 205 * @param q The second value to compare. 206 * 207 * @return The largest value. 208 */ 209template<typename T> 210static inline T max(T p, T q) 211{ 212 return p > q ? p : q; 213} 214 215/** 216 * @brief Return the maximum of three values. 217 * 218 * For floats, NaNs are turned into @c r. 219 * 220 * @param p The first value to compare. 221 * @param q The second value to compare. 222 * @param r The third value to compare. 223 * 224 * @return The largest value. 225 */ 226template<typename T> 227static inline T max(T p, T q, T r) 228{ 229 return max(max(p, q), r); 230} 231 232/** 233 * @brief Return the maximum of four values. 234 * 235 * For floats, NaNs are turned into @c s. 236 * 237 * @param p The first value to compare. 238 * @param q The second value to compare. 239 * @param r The third value to compare. 240 * @param s The fourth value to compare. 241 * 242 * @return The largest value. 243 */ 244template<typename T> 245static inline T max(T p, T q, T r, T s) 246{ 247 return max(max(p, q), max(r, s)); 248} 249 250/** 251 * @brief Clamp a value value between @c mn and @c mx. 252 * 253 * For floats, NaNs are turned into @c mn. 254 * 255 * @param v The value to clamp. 256 * @param mn The min value (inclusive). 257 * @param mx The max value (inclusive). 258 * 259 * @return The clamped value. 260 */ 261template<typename T> 262inline T clamp(T v, T mn, T mx) 263{ 264 // Do not reorder; correct NaN handling relies on the fact that comparison 265 // with NaN returns false and will fall-though to the "min" value. 266 if (v > mx) return mx; 267 if (v > mn) return v; 268 return mn; 269} 270 271/** 272 * @brief Clamp a float value between 0.0f and 1.0f. 273 * 274 * NaNs are turned into 0.0f. 275 * 276 * @param v The value to clamp. 277 * 278 * @return The clamped value. 279 */ 280static inline float clamp1f(float v) 281{ 282 return astc::clamp(v, 0.0f, 1.0f); 283} 284 285/** 286 * @brief Clamp a float value between 0.0f and 255.0f. 287 * 288 * NaNs are turned into 0.0f. 289 * 290 * @param v The value to clamp. 291 * 292 * @return The clamped value. 293 */ 294static inline float clamp255f(float v) 295{ 296 return astc::clamp(v, 0.0f, 255.0f); 297} 298 299/** 300 * @brief SP float round-down. 301 * 302 * @param v The value to round. 303 * 304 * @return The rounded value. 305 */ 306static inline float flt_rd(float v) 307{ 308 return std::floor(v); 309} 310 311/** 312 * @brief SP float round-to-nearest and convert to integer. 313 * 314 * @param v The value to round. 315 * 316 * @return The rounded value. 317 */ 318static inline int flt2int_rtn(float v) 319{ 320 321 return static_cast<int>(v + 0.5f); 322} 323 324/** 325 * @brief SP float round down and convert to integer. 326 * 327 * @param v The value to round. 328 * 329 * @return The rounded value. 330 */ 331static inline int flt2int_rd(float v) 332{ 333 return static_cast<int>(v); 334} 335 336/** 337 * @brief SP float bit-interpreted as an integer. 338 * 339 * @param v The value to bitcast. 340 * 341 * @return The converted value. 342 */ 343static inline int float_as_int(float v) 344{ 345 union { int a; float b; } u; 346 u.b = v; 347 return u.a; 348} 349 350/** 351 * @brief Integer bit-interpreted as an SP float. 352 * 353 * @param v The value to bitcast. 354 * 355 * @return The converted value. 356 */ 357static inline float int_as_float(int v) 358{ 359 union { int a; float b; } u; 360 u.a = v; 361 return u.b; 362} 363 364/** 365 * @brief Fast approximation of 1.0 / sqrt(val). 366 * 367 * @param v The input value. 368 * 369 * @return The approximated result. 370 */ 371static inline float rsqrt(float v) 372{ 373 return 1.0f / std::sqrt(v); 374} 375 376/** 377 * @brief Fast approximation of sqrt(val). 378 * 379 * @param v The input value. 380 * 381 * @return The approximated result. 382 */ 383static inline float sqrt(float v) 384{ 385 return std::sqrt(v); 386} 387 388/** 389 * @brief Extract mantissa and exponent of a float value. 390 * 391 * @param v The input value. 392 * @param[out] expo The output exponent. 393 * 394 * @return The mantissa. 395 */ 396static inline float frexp(float v, int* expo) 397{ 398 if32 p; 399 p.f = v; 400 *expo = ((p.u >> 23) & 0xFF) - 126; 401 p.u = (p.u & 0x807fffff) | 0x3f000000; 402 return p.f; 403} 404 405/** 406 * @brief Initialize the seed structure for a random number generator. 407 * 408 * Important note: For the purposes of ASTC we want sets of random numbers to 409 * use the codec, but we want the same seed value across instances and threads 410 * to ensure that image output is stable across compressor runs and across 411 * platforms. Every PRNG created by this call will therefore return the same 412 * sequence of values ... 413 * 414 * @param state The state structure to initialize. 415 */ 416void rand_init(uint64_t state[2]); 417 418/** 419 * @brief Return the next random number from the generator. 420 * 421 * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the 422 * public-domain implementation given by David Blackman & Sebastiano Vigna at 423 * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c 424 * 425 * @param state The state structure to use/update. 426 */ 427uint64_t rand(uint64_t state[2]); 428 429} 430 431/* ============================================================================ 432 Softfloat library with fp32 and fp16 conversion functionality. 433============================================================================ */ 434#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0) 435 /* narrowing float->float conversions */ 436 uint16_t float_to_sf16(float val); 437 float sf16_to_float(uint16_t val); 438#endif 439 440/********************************* 441 Vector library 442*********************************/ 443#include "astcenc_vecmathlib.h" 444 445/********************************* 446 Declaration of line types 447*********************************/ 448// parametric line, 2D: The line is given by line = a + b * t. 449 450struct line2 451{ 452 vfloat4 a; 453 vfloat4 b; 454}; 455 456// parametric line, 3D 457struct line3 458{ 459 vfloat4 a; 460 vfloat4 b; 461}; 462 463struct line4 464{ 465 vfloat4 a; 466 vfloat4 b; 467}; 468 469 470struct processed_line2 471{ 472 vfloat4 amod; 473 vfloat4 bs; 474}; 475 476struct processed_line3 477{ 478 vfloat4 amod; 479 vfloat4 bs; 480}; 481 482struct processed_line4 483{ 484 vfloat4 amod; 485 vfloat4 bs; 486}; 487 488#endif 489