xref: /third_party/optimized-routines/math/erf.c (revision bbbf1280)
1/*
2 * Double-precision erf(x) function.
3 *
4 * Copyright (c) 2020, Arm Limited.
5 * SPDX-License-Identifier: MIT
6 */
7
8#include "math_config.h"
9#include <math.h>
10#include <stdint.h>
11
12#define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3
13#define C 0x1.b0ac16p-1
14#define PA __erf_data.erf_poly_A
15#define NA __erf_data.erf_ratio_N_A
16#define DA __erf_data.erf_ratio_D_A
17#define NB __erf_data.erf_ratio_N_B
18#define DB __erf_data.erf_ratio_D_B
19#define PC __erf_data.erfc_poly_C
20#define PD __erf_data.erfc_poly_D
21#define PE __erf_data.erfc_poly_E
22#define PF __erf_data.erfc_poly_F
23
24/* Top 32 bits of a double.  */
25static inline uint32_t
26top32 (double x)
27{
28  return asuint64 (x) >> 32;
29}
30
31/* Fast erf implementation using a mix of
32   rational and polynomial approximations.
33   Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0.  */
34double
35erf (double x)
36{
37  /* Get top word and sign.  */
38  uint32_t ix = top32 (x);
39  uint32_t ia = ix & 0x7fffffff;
40  uint32_t sign = ix >> 31;
41
42  /* Normalized and subnormal cases */
43  if (ia < 0x3feb0000)
44    { /* a = |x| < 0.84375.  */
45
46      if (ia < 0x3e300000)
47	{ /* a < 2^(-28).  */
48	  if (ia < 0x00800000)
49	    { /* a < 2^(-1015).  */
50	      double y =  fma (TwoOverSqrtPiMinusOne, x, x);
51	      return check_uflow (y);
52	    }
53	  return x + TwoOverSqrtPiMinusOne * x;
54	}
55
56      double x2 = x * x;
57
58      if (ia < 0x3fe00000)
59	{ /* a < 0.5  - Use polynomial approximation.  */
60	  double r1 = fma (x2, PA[1], PA[0]);
61	  double r2 = fma (x2, PA[3], PA[2]);
62	  double r3 = fma (x2, PA[5], PA[4]);
63	  double r4 = fma (x2, PA[7], PA[6]);
64	  double r5 = fma (x2, PA[9], PA[8]);
65	  double x4 = x2 * x2;
66	  double r = r5;
67	  r = fma (x4, r, r4);
68	  r = fma (x4, r, r3);
69	  r = fma (x4, r, r2);
70	  r = fma (x4, r, r1);
71	  return fma (r, x, x); /* This fma is crucial for accuracy.  */
72	}
73      else
74	{ /* 0.5 <= a < 0.84375 - Use rational approximation.  */
75	  double x4, x8, r1n, r2n, r1d, r2d, r3d;
76
77	  r1n = fma (x2, NA[1], NA[0]);
78	  x4 = x2 * x2;
79	  r2n = fma (x2, NA[3], NA[2]);
80	  x8 = x4 * x4;
81	  r1d = fma (x2, DA[0], 1.0);
82	  r2d = fma (x2, DA[2], DA[1]);
83	  r3d = fma (x2, DA[4], DA[3]);
84	  double P = r1n + x4 * r2n + x8 * NA[4];
85	  double Q = r1d + x4 * r2d + x8 * r3d;
86	  return fma (P / Q, x, x);
87	}
88    }
89  else if (ia < 0x3ff40000)
90    { /* 0.84375 <= |x| < 1.25.  */
91      double a2, a4, a6, r1n, r2n, r3n, r4n, r1d, r2d, r3d, r4d;
92      double a = fabs (x) - 1.0;
93      r1n = fma (a, NB[1], NB[0]);
94      a2 = a * a;
95      r1d = fma (a, DB[0], 1.0);
96      a4 = a2 * a2;
97      r2n = fma (a, NB[3], NB[2]);
98      a6 = a4 * a2;
99      r2d = fma (a, DB[2], DB[1]);
100      r3n = fma (a, NB[5], NB[4]);
101      r3d = fma (a, DB[4], DB[3]);
102      r4n = NB[6];
103      r4d = DB[5];
104      double P = r1n + a2 * r2n + a4 * r3n + a6 * r4n;
105      double Q = r1d + a2 * r2d + a4 * r3d + a6 * r4d;
106      if (sign)
107	return -C - P / Q;
108      else
109	return C + P / Q;
110    }
111  else if (ia < 0x40000000)
112    { /* 1.25 <= |x| < 2.0.  */
113      double a = fabs (x);
114      a = a - 1.25;
115
116      double r1 = fma (a, PC[1], PC[0]);
117      double r2 = fma (a, PC[3], PC[2]);
118      double r3 = fma (a, PC[5], PC[4]);
119      double r4 = fma (a, PC[7], PC[6]);
120      double r5 = fma (a, PC[9], PC[8]);
121      double r6 = fma (a, PC[11], PC[10]);
122      double r7 = fma (a, PC[13], PC[12]);
123      double r8 = fma (a, PC[15], PC[14]);
124
125      double a2 = a * a;
126
127      double r = r8;
128      r = fma (a2, r, r7);
129      r = fma (a2, r, r6);
130      r = fma (a2, r, r5);
131      r = fma (a2, r, r4);
132      r = fma (a2, r, r3);
133      r = fma (a2, r, r2);
134      r = fma (a2, r, r1);
135
136      if (sign)
137	return -1.0 + r;
138      else
139	return 1.0 - r;
140    }
141  else if (ia < 0x400a0000)
142    { /* 2 <= |x| < 3.25.  */
143      double a = fabs (x);
144      a = fma (0.5, a, -1.0);
145
146      double r1 = fma (a, PD[1], PD[0]);
147      double r2 = fma (a, PD[3], PD[2]);
148      double r3 = fma (a, PD[5], PD[4]);
149      double r4 = fma (a, PD[7], PD[6]);
150      double r5 = fma (a, PD[9], PD[8]);
151      double r6 = fma (a, PD[11], PD[10]);
152      double r7 = fma (a, PD[13], PD[12]);
153      double r8 = fma (a, PD[15], PD[14]);
154      double r9 = fma (a, PD[17], PD[16]);
155
156      double a2 = a * a;
157
158      double r = r9;
159      r = fma (a2, r, r8);
160      r = fma (a2, r, r7);
161      r = fma (a2, r, r6);
162      r = fma (a2, r, r5);
163      r = fma (a2, r, r4);
164      r = fma (a2, r, r3);
165      r = fma (a2, r, r2);
166      r = fma (a2, r, r1);
167
168      if (sign)
169	return -1.0 + r;
170      else
171	return 1.0 - r;
172    }
173  else if (ia < 0x40100000)
174    { /* 3.25 <= |x| < 4.0.  */
175      double a = fabs (x);
176      a = a - 3.25;
177
178      double r1 = fma (a, PE[1], PE[0]);
179      double r2 = fma (a, PE[3], PE[2]);
180      double r3 = fma (a, PE[5], PE[4]);
181      double r4 = fma (a, PE[7], PE[6]);
182      double r5 = fma (a, PE[9], PE[8]);
183      double r6 = fma (a, PE[11], PE[10]);
184      double r7 = fma (a, PE[13], PE[12]);
185
186      double a2 = a * a;
187
188      double r = r7;
189      r = fma (a2, r, r6);
190      r = fma (a2, r, r5);
191      r = fma (a2, r, r4);
192      r = fma (a2, r, r3);
193      r = fma (a2, r, r2);
194      r = fma (a2, r, r1);
195
196      if (sign)
197	return -1.0 + r;
198      else
199	return 1.0 - r;
200    }
201  else if (ia < 0x4017a000)
202    { /* 4 <= |x| < 5.90625.  */
203      double a = fabs (x);
204      a = fma (0.5, a, -2.0);
205
206      double r1 = fma (a, PF[1], PF[0]);
207      double r2 = fma (a, PF[3], PF[2]);
208      double r3 = fma (a, PF[5], PF[4]);
209      double r4 = fma (a, PF[7], PF[6]);
210      double r5 = fma (a, PF[9], PF[8]);
211      double r6 = fma (a, PF[11], PF[10]);
212      double r7 = fma (a, PF[13], PF[12]);
213      double r8 = fma (a, PF[15], PF[14]);
214      double r9 = PF[16];
215
216      double a2 = a * a;
217
218      double r = r9;
219      r = fma (a2, r, r8);
220      r = fma (a2, r, r7);
221      r = fma (a2, r, r6);
222      r = fma (a2, r, r5);
223      r = fma (a2, r, r4);
224      r = fma (a2, r, r3);
225      r = fma (a2, r, r2);
226      r = fma (a2, r, r1);
227
228      if (sign)
229	return -1.0 + r;
230      else
231	return 1.0 - r;
232    }
233  else
234    {
235      /* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1.  */
236      if (unlikely (ia >= 0x7ff00000))
237	return (double) (1.0 - (sign << 1)) + 1.0 / x;
238
239      if (sign)
240	return -1.0;
241      else
242	return 1.0;
243    }
244}
245