1/*
2 * Configuration for math routines.
3 *
4 * Copyright (c) 2017-2020, Arm Limited.
5 * SPDX-License-Identifier: MIT
6 */
7
8#ifndef _MATH_CONFIG_H
9#define _MATH_CONFIG_H
10
11#include <math.h>
12#include <stdint.h>
13
14#ifndef WANT_ROUNDING
15/* If defined to 1, return correct results for special cases in non-nearest
16   rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
17   This may be set to 0 if there is no fenv support or if math functions only
18   get called in round to nearest mode.  */
19# define WANT_ROUNDING 1
20#endif
21#ifndef WANT_ERRNO
22/* If defined to 1, set errno in math functions according to ISO C.  Many math
23   libraries do not set errno, so this is 0 by default.  It may need to be
24   set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.  */
25# define WANT_ERRNO 0
26#endif
27#ifndef WANT_ERRNO_UFLOW
28/* Set errno to ERANGE if result underflows to 0 (in all rounding modes).  */
29# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
30#endif
31
32/* Compiler can inline round as a single instruction.  */
33#ifndef HAVE_FAST_ROUND
34# if __aarch64__
35#   define HAVE_FAST_ROUND 1
36# else
37#   define HAVE_FAST_ROUND 0
38# endif
39#endif
40
41/* Compiler can inline lround, but not (long)round(x).  */
42#ifndef HAVE_FAST_LROUND
43# if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
44#   define HAVE_FAST_LROUND 1
45# else
46#   define HAVE_FAST_LROUND 0
47# endif
48#endif
49
50/* Compiler can inline fma as a single instruction.  */
51#ifndef HAVE_FAST_FMA
52# if defined FP_FAST_FMA || __aarch64__
53#   define HAVE_FAST_FMA 1
54# else
55#   define HAVE_FAST_FMA 0
56# endif
57#endif
58
59/* Provide *_finite symbols and some of the glibc hidden symbols
60   so libmathlib can be used with binaries compiled against glibc
61   to interpose math functions with both static and dynamic linking.  */
62#ifndef USE_GLIBC_ABI
63# if __GNUC__
64#   define USE_GLIBC_ABI 1
65# else
66#   define USE_GLIBC_ABI 0
67# endif
68#endif
69
70/* Optionally used extensions.  */
71#ifdef __GNUC__
72# define HIDDEN __attribute__ ((__visibility__ ("hidden")))
73# define NOINLINE __attribute__ ((noinline))
74# define UNUSED __attribute__ ((unused))
75# define likely(x) __builtin_expect (!!(x), 1)
76# define unlikely(x) __builtin_expect (x, 0)
77# if __GNUC__ >= 9
78#   define attribute_copy(f) __attribute__ ((copy (f)))
79# else
80#   define attribute_copy(f)
81# endif
82# define strong_alias(f, a) \
83  extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
84# define hidden_alias(f, a) \
85  extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
86  attribute_copy (f);
87#else
88# define HIDDEN
89# define NOINLINE
90# define UNUSED
91# define likely(x) (x)
92# define unlikely(x) (x)
93#endif
94
95#if HAVE_FAST_ROUND
96/* When set, the roundtoint and converttoint functions are provided with
97   the semantics documented below.  */
98# define TOINT_INTRINSICS 1
99
100/* Round x to nearest int in all rounding modes, ties have to be rounded
101   consistently with converttoint so the results match.  If the result
102   would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
103static inline double_t
104roundtoint (double_t x)
105{
106  return round (x);
107}
108
109/* Convert x to nearest int in all rounding modes, ties have to be rounded
110   consistently with roundtoint.  If the result is not representible in an
111   int32_t then the semantics is unspecified.  */
112static inline int32_t
113converttoint (double_t x)
114{
115# if HAVE_FAST_LROUND
116  return lround (x);
117# else
118  return (long) round (x);
119# endif
120}
121#endif
122
123static inline uint32_t
124asuint (float f)
125{
126  union
127  {
128    float f;
129    uint32_t i;
130  } u = {f};
131  return u.i;
132}
133
134static inline float
135asfloat (uint32_t i)
136{
137  union
138  {
139    uint32_t i;
140    float f;
141  } u = {i};
142  return u.f;
143}
144
145static inline uint64_t
146asuint64 (double f)
147{
148  union
149  {
150    double f;
151    uint64_t i;
152  } u = {f};
153  return u.i;
154}
155
156static inline double
157asdouble (uint64_t i)
158{
159  union
160  {
161    uint64_t i;
162    double f;
163  } u = {i};
164  return u.f;
165}
166
167#ifndef IEEE_754_2008_SNAN
168# define IEEE_754_2008_SNAN 1
169#endif
170static inline int
171issignalingf_inline (float x)
172{
173  uint32_t ix = asuint (x);
174  if (!IEEE_754_2008_SNAN)
175    return (ix & 0x7fc00000) == 0x7fc00000;
176  return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
177}
178
179static inline int
180issignaling_inline (double x)
181{
182  uint64_t ix = asuint64 (x);
183  if (!IEEE_754_2008_SNAN)
184    return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
185  return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
186}
187
188#if __aarch64__ && __GNUC__
189/* Prevent the optimization of a floating-point expression.  */
190static inline float
191opt_barrier_float (float x)
192{
193  __asm__ __volatile__ ("" : "+w" (x));
194  return x;
195}
196static inline double
197opt_barrier_double (double x)
198{
199  __asm__ __volatile__ ("" : "+w" (x));
200  return x;
201}
202/* Force the evaluation of a floating-point expression for its side-effect.  */
203static inline void
204force_eval_float (float x)
205{
206  __asm__ __volatile__ ("" : "+w" (x));
207}
208static inline void
209force_eval_double (double x)
210{
211  __asm__ __volatile__ ("" : "+w" (x));
212}
213#else
214static inline float
215opt_barrier_float (float x)
216{
217  volatile float y = x;
218  return y;
219}
220static inline double
221opt_barrier_double (double x)
222{
223  volatile double y = x;
224  return y;
225}
226static inline void
227force_eval_float (float x)
228{
229  volatile float y UNUSED = x;
230}
231static inline void
232force_eval_double (double x)
233{
234  volatile double y UNUSED = x;
235}
236#endif
237
238/* Evaluate an expression as the specified type, normally a type
239   cast should be enough, but compilers implement non-standard
240   excess-precision handling, so when FLT_EVAL_METHOD != 0 then
241   these functions may need to be customized.  */
242static inline float
243eval_as_float (float x)
244{
245  return x;
246}
247static inline double
248eval_as_double (double x)
249{
250  return x;
251}
252
253/* Error handling tail calls for special cases, with a sign argument.
254   The sign of the return value is set if the argument is non-zero.  */
255
256/* The result overflows.  */
257HIDDEN float __math_oflowf (uint32_t);
258/* The result underflows to 0 in nearest rounding mode.  */
259HIDDEN float __math_uflowf (uint32_t);
260/* The result underflows to 0 in some directed rounding mode only.  */
261HIDDEN float __math_may_uflowf (uint32_t);
262/* Division by zero.  */
263HIDDEN float __math_divzerof (uint32_t);
264/* The result overflows.  */
265HIDDEN double __math_oflow (uint32_t);
266/* The result underflows to 0 in nearest rounding mode.  */
267HIDDEN double __math_uflow (uint32_t);
268/* The result underflows to 0 in some directed rounding mode only.  */
269HIDDEN double __math_may_uflow (uint32_t);
270/* Division by zero.  */
271HIDDEN double __math_divzero (uint32_t);
272
273/* Error handling using input checking.  */
274
275/* Invalid input unless it is a quiet NaN.  */
276HIDDEN float __math_invalidf (float);
277/* Invalid input unless it is a quiet NaN.  */
278HIDDEN double __math_invalid (double);
279
280/* Error handling using output checking, only for errno setting.  */
281
282/* Check if the result overflowed to infinity.  */
283HIDDEN double __math_check_oflow (double);
284/* Check if the result underflowed to 0.  */
285HIDDEN double __math_check_uflow (double);
286
287/* Check if the result overflowed to infinity.  */
288static inline double
289check_oflow (double x)
290{
291  return WANT_ERRNO ? __math_check_oflow (x) : x;
292}
293
294/* Check if the result underflowed to 0.  */
295static inline double
296check_uflow (double x)
297{
298  return WANT_ERRNO ? __math_check_uflow (x) : x;
299}
300
301/* Check if the result overflowed to infinity.  */
302HIDDEN float __math_check_oflowf (float);
303/* Check if the result underflowed to 0.  */
304HIDDEN float __math_check_uflowf (float);
305
306/* Check if the result overflowed to infinity.  */
307static inline float
308check_oflowf (float x)
309{
310  return WANT_ERRNO ? __math_check_oflowf (x) : x;
311}
312
313/* Check if the result underflowed to 0.  */
314static inline float
315check_uflowf (float x)
316{
317  return WANT_ERRNO ? __math_check_uflowf (x) : x;
318}
319
320/* Shared between expf, exp2f and powf.  */
321#define EXP2F_TABLE_BITS 5
322#define EXP2F_POLY_ORDER 3
323extern const struct exp2f_data
324{
325  uint64_t tab[1 << EXP2F_TABLE_BITS];
326  double shift_scaled;
327  double poly[EXP2F_POLY_ORDER];
328  double shift;
329  double invln2_scaled;
330  double poly_scaled[EXP2F_POLY_ORDER];
331} __exp2f_data HIDDEN;
332
333#define LOGF_TABLE_BITS 4
334#define LOGF_POLY_ORDER 4
335extern const struct logf_data
336{
337  struct
338  {
339    double invc, logc;
340  } tab[1 << LOGF_TABLE_BITS];
341  double ln2;
342  double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
343} __logf_data HIDDEN;
344
345#define LOG2F_TABLE_BITS 4
346#define LOG2F_POLY_ORDER 4
347extern const struct log2f_data
348{
349  struct
350  {
351    double invc, logc;
352  } tab[1 << LOG2F_TABLE_BITS];
353  double poly[LOG2F_POLY_ORDER];
354} __log2f_data HIDDEN;
355
356#define POWF_LOG2_TABLE_BITS 4
357#define POWF_LOG2_POLY_ORDER 5
358#if TOINT_INTRINSICS
359# define POWF_SCALE_BITS EXP2F_TABLE_BITS
360#else
361# define POWF_SCALE_BITS 0
362#endif
363#define POWF_SCALE ((double) (1 << POWF_SCALE_BITS))
364extern const struct powf_log2_data
365{
366  struct
367  {
368    double invc, logc;
369  } tab[1 << POWF_LOG2_TABLE_BITS];
370  double poly[POWF_LOG2_POLY_ORDER];
371} __powf_log2_data HIDDEN;
372
373
374#define EXP_TABLE_BITS 7
375#define EXP_POLY_ORDER 5
376/* Use polynomial that is optimized for a wider input range.  This may be
377   needed for good precision in non-nearest rounding and !TOINT_INTRINSICS.  */
378#define EXP_POLY_WIDE 0
379/* Use close to nearest rounding toint when !TOINT_INTRINSICS.  This may be
380   needed for good precision in non-nearest rouning and !EXP_POLY_WIDE.  */
381#define EXP_USE_TOINT_NARROW 0
382#define EXP2_POLY_ORDER 5
383#define EXP2_POLY_WIDE 0
384extern const struct exp_data
385{
386  double invln2N;
387  double shift;
388  double negln2hiN;
389  double negln2loN;
390  double poly[4]; /* Last four coefficients.  */
391  double exp2_shift;
392  double exp2_poly[EXP2_POLY_ORDER];
393  uint64_t tab[2*(1 << EXP_TABLE_BITS)];
394} __exp_data HIDDEN;
395
396#define LOG_TABLE_BITS 7
397#define LOG_POLY_ORDER 6
398#define LOG_POLY1_ORDER 12
399extern const struct log_data
400{
401  double ln2hi;
402  double ln2lo;
403  double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
404  double poly1[LOG_POLY1_ORDER - 1];
405  struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
406#if !HAVE_FAST_FMA
407  struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
408#endif
409} __log_data HIDDEN;
410
411#define LOG2_TABLE_BITS 6
412#define LOG2_POLY_ORDER 7
413#define LOG2_POLY1_ORDER 11
414extern const struct log2_data
415{
416  double invln2hi;
417  double invln2lo;
418  double poly[LOG2_POLY_ORDER - 1];
419  double poly1[LOG2_POLY1_ORDER - 1];
420  struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
421#if !HAVE_FAST_FMA
422  struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
423#endif
424} __log2_data HIDDEN;
425
426#define POW_LOG_TABLE_BITS 7
427#define POW_LOG_POLY_ORDER 8
428extern const struct pow_log_data
429{
430  double ln2hi;
431  double ln2lo;
432  double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
433  /* Note: the pad field is unused, but allows slightly faster indexing.  */
434  struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS];
435} __pow_log_data HIDDEN;
436
437extern const struct erff_data
438{
439  float erff_poly_A[6];
440  float erff_poly_B[7];
441} __erff_data HIDDEN;
442
443#define ERF_POLY_A_ORDER 19
444#define ERF_POLY_A_NCOEFFS 10
445#define ERFC_POLY_C_NCOEFFS 16
446#define ERFC_POLY_D_NCOEFFS 18
447#define ERFC_POLY_E_NCOEFFS 14
448#define ERFC_POLY_F_NCOEFFS 17
449extern const struct erf_data
450{
451  double erf_poly_A[ERF_POLY_A_NCOEFFS];
452  double erf_ratio_N_A[5];
453  double erf_ratio_D_A[5];
454  double erf_ratio_N_B[7];
455  double erf_ratio_D_B[6];
456  double erfc_poly_C[ERFC_POLY_C_NCOEFFS];
457  double erfc_poly_D[ERFC_POLY_D_NCOEFFS];
458  double erfc_poly_E[ERFC_POLY_E_NCOEFFS];
459  double erfc_poly_F[ERFC_POLY_F_NCOEFFS];
460} __erf_data HIDDEN;
461
462#endif
463