1570af302Sopenharmony_ci#include <stdint.h>
2570af302Sopenharmony_ci#include <math.h>
3570af302Sopenharmony_ci#include "libm.h"
4570af302Sopenharmony_ci#include "sqrt_data.h"
5570af302Sopenharmony_ci
6570af302Sopenharmony_ci#define FENV_SUPPORT 1
7570af302Sopenharmony_ci
8570af302Sopenharmony_ci/* returns a*b*2^-32 - e, with error 0 <= e < 1.  */
9570af302Sopenharmony_cistatic inline uint32_t mul32(uint32_t a, uint32_t b)
10570af302Sopenharmony_ci{
11570af302Sopenharmony_ci	return (uint64_t)a*b >> 32;
12570af302Sopenharmony_ci}
13570af302Sopenharmony_ci
14570af302Sopenharmony_ci/* returns a*b*2^-64 - e, with error 0 <= e < 3.  */
15570af302Sopenharmony_cistatic inline uint64_t mul64(uint64_t a, uint64_t b)
16570af302Sopenharmony_ci{
17570af302Sopenharmony_ci	uint64_t ahi = a>>32;
18570af302Sopenharmony_ci	uint64_t alo = a&0xffffffff;
19570af302Sopenharmony_ci	uint64_t bhi = b>>32;
20570af302Sopenharmony_ci	uint64_t blo = b&0xffffffff;
21570af302Sopenharmony_ci	return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
22570af302Sopenharmony_ci}
23570af302Sopenharmony_ci
24570af302Sopenharmony_cidouble sqrt(double x)
25570af302Sopenharmony_ci{
26570af302Sopenharmony_ci	uint64_t ix, top, m;
27570af302Sopenharmony_ci
28570af302Sopenharmony_ci	/* special case handling.  */
29570af302Sopenharmony_ci	ix = asuint64(x);
30570af302Sopenharmony_ci	top = ix >> 52;
31570af302Sopenharmony_ci	if (predict_false(top - 0x001 >= 0x7ff - 0x001)) {
32570af302Sopenharmony_ci		/* x < 0x1p-1022 or inf or nan.  */
33570af302Sopenharmony_ci		if (ix * 2 == 0)
34570af302Sopenharmony_ci			return x;
35570af302Sopenharmony_ci		if (ix == 0x7ff0000000000000)
36570af302Sopenharmony_ci			return x;
37570af302Sopenharmony_ci		if (ix > 0x7ff0000000000000)
38570af302Sopenharmony_ci			return __math_invalid(x);
39570af302Sopenharmony_ci		/* x is subnormal, normalize it.  */
40570af302Sopenharmony_ci		ix = asuint64(x * 0x1p52);
41570af302Sopenharmony_ci		top = ix >> 52;
42570af302Sopenharmony_ci		top -= 52;
43570af302Sopenharmony_ci	}
44570af302Sopenharmony_ci
45570af302Sopenharmony_ci	/* argument reduction:
46570af302Sopenharmony_ci	   x = 4^e m; with integer e, and m in [1, 4)
47570af302Sopenharmony_ci	   m: fixed point representation [2.62]
48570af302Sopenharmony_ci	   2^e is the exponent part of the result.  */
49570af302Sopenharmony_ci	int even = top & 1;
50570af302Sopenharmony_ci	m = (ix << 11) | 0x8000000000000000;
51570af302Sopenharmony_ci	if (even) m >>= 1;
52570af302Sopenharmony_ci	top = (top + 0x3ff) >> 1;
53570af302Sopenharmony_ci
54570af302Sopenharmony_ci	/* approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
55570af302Sopenharmony_ci
56570af302Sopenharmony_ci	   initial estimate:
57570af302Sopenharmony_ci	   7bit table lookup (1bit exponent and 6bit significand).
58570af302Sopenharmony_ci
59570af302Sopenharmony_ci	   iterative approximation:
60570af302Sopenharmony_ci	   using 2 goldschmidt iterations with 32bit int arithmetics
61570af302Sopenharmony_ci	   and a final iteration with 64bit int arithmetics.
62570af302Sopenharmony_ci
63570af302Sopenharmony_ci	   details:
64570af302Sopenharmony_ci
65570af302Sopenharmony_ci	   the relative error (e = r0 sqrt(m)-1) of a linear estimate
66570af302Sopenharmony_ci	   (r0 = a m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best,
67570af302Sopenharmony_ci	   a table lookup is faster and needs one less iteration
68570af302Sopenharmony_ci	   6 bit lookup table (128b) gives |e| < 0x1.f9p-8
69570af302Sopenharmony_ci	   7 bit lookup table (256b) gives |e| < 0x1.fdp-9
70570af302Sopenharmony_ci	   for single and double prec 6bit is enough but for quad
71570af302Sopenharmony_ci	   prec 7bit is needed (or modified iterations). to avoid
72570af302Sopenharmony_ci	   one more iteration >=13bit table would be needed (16k).
73570af302Sopenharmony_ci
74570af302Sopenharmony_ci	   a newton-raphson iteration for r is
75570af302Sopenharmony_ci	     w = r*r
76570af302Sopenharmony_ci	     u = 3 - m*w
77570af302Sopenharmony_ci	     r = r*u/2
78570af302Sopenharmony_ci	   can use a goldschmidt iteration for s at the end or
79570af302Sopenharmony_ci	     s = m*r
80570af302Sopenharmony_ci
81570af302Sopenharmony_ci	   first goldschmidt iteration is
82570af302Sopenharmony_ci	     s = m*r
83570af302Sopenharmony_ci	     u = 3 - s*r
84570af302Sopenharmony_ci	     r = r*u/2
85570af302Sopenharmony_ci	     s = s*u/2
86570af302Sopenharmony_ci	   next goldschmidt iteration is
87570af302Sopenharmony_ci	     u = 3 - s*r
88570af302Sopenharmony_ci	     r = r*u/2
89570af302Sopenharmony_ci	     s = s*u/2
90570af302Sopenharmony_ci	   and at the end r is not computed only s.
91570af302Sopenharmony_ci
92570af302Sopenharmony_ci	   they use the same amount of operations and converge at the
93570af302Sopenharmony_ci	   same quadratic rate, i.e. if
94570af302Sopenharmony_ci	     r1 sqrt(m) - 1 = e, then
95570af302Sopenharmony_ci	     r2 sqrt(m) - 1 = -3/2 e^2 - 1/2 e^3
96570af302Sopenharmony_ci	   the advantage of goldschmidt is that the mul for s and r
97570af302Sopenharmony_ci	   are independent (computed in parallel), however it is not
98570af302Sopenharmony_ci	   "self synchronizing": it only uses the input m in the
99570af302Sopenharmony_ci	   first iteration so rounding errors accumulate. at the end
100570af302Sopenharmony_ci	   or when switching to larger precision arithmetics rounding
101570af302Sopenharmony_ci	   errors dominate so the first iteration should be used.
102570af302Sopenharmony_ci
103570af302Sopenharmony_ci	   the fixed point representations are
104570af302Sopenharmony_ci	     m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30
105570af302Sopenharmony_ci	   and after switching to 64 bit
106570af302Sopenharmony_ci	     m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62  */
107570af302Sopenharmony_ci
108570af302Sopenharmony_ci	static const uint64_t three = 0xc0000000;
109570af302Sopenharmony_ci	uint64_t r, s, d, u, i;
110570af302Sopenharmony_ci
111570af302Sopenharmony_ci	i = (ix >> 46) % 128;
112570af302Sopenharmony_ci	r = (uint32_t)__rsqrt_tab[i] << 16;
113570af302Sopenharmony_ci	/* |r sqrt(m) - 1| < 0x1.fdp-9 */
114570af302Sopenharmony_ci	s = mul32(m>>32, r);
115570af302Sopenharmony_ci	/* |s/sqrt(m) - 1| < 0x1.fdp-9 */
116570af302Sopenharmony_ci	d = mul32(s, r);
117570af302Sopenharmony_ci	u = three - d;
118570af302Sopenharmony_ci	r = mul32(r, u) << 1;
119570af302Sopenharmony_ci	/* |r sqrt(m) - 1| < 0x1.7bp-16 */
120570af302Sopenharmony_ci	s = mul32(s, u) << 1;
121570af302Sopenharmony_ci	/* |s/sqrt(m) - 1| < 0x1.7bp-16 */
122570af302Sopenharmony_ci	d = mul32(s, r);
123570af302Sopenharmony_ci	u = three - d;
124570af302Sopenharmony_ci	r = mul32(r, u) << 1;
125570af302Sopenharmony_ci	/* |r sqrt(m) - 1| < 0x1.3704p-29 (measured worst-case) */
126570af302Sopenharmony_ci	r = r << 32;
127570af302Sopenharmony_ci	s = mul64(m, r);
128570af302Sopenharmony_ci	d = mul64(s, r);
129570af302Sopenharmony_ci	u = (three<<32) - d;
130570af302Sopenharmony_ci	s = mul64(s, u);  /* repr: 3.61 */
131570af302Sopenharmony_ci	/* -0x1p-57 < s - sqrt(m) < 0x1.8001p-61 */
132570af302Sopenharmony_ci	s = (s - 2) >> 9; /* repr: 12.52 */
133570af302Sopenharmony_ci	/* -0x1.09p-52 < s - sqrt(m) < -0x1.fffcp-63 */
134570af302Sopenharmony_ci
135570af302Sopenharmony_ci	/* s < sqrt(m) < s + 0x1.09p-52,
136570af302Sopenharmony_ci	   compute nearest rounded result:
137570af302Sopenharmony_ci	   the nearest result to 52 bits is either s or s+0x1p-52,
138570af302Sopenharmony_ci	   we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m.  */
139570af302Sopenharmony_ci	uint64_t d0, d1, d2;
140570af302Sopenharmony_ci	double y, t;
141570af302Sopenharmony_ci	d0 = (m << 42) - s*s;
142570af302Sopenharmony_ci	d1 = s - d0;
143570af302Sopenharmony_ci	d2 = d1 + s + 1;
144570af302Sopenharmony_ci	s += d1 >> 63;
145570af302Sopenharmony_ci	s &= 0x000fffffffffffff;
146570af302Sopenharmony_ci	s |= top << 52;
147570af302Sopenharmony_ci	y = asdouble(s);
148570af302Sopenharmony_ci	if (FENV_SUPPORT) {
149570af302Sopenharmony_ci		/* handle rounding modes and inexact exception:
150570af302Sopenharmony_ci		   only (s+1)^2 == 2^42 m case is exact otherwise
151570af302Sopenharmony_ci		   add a tiny value to cause the fenv effects.  */
152570af302Sopenharmony_ci		uint64_t tiny = predict_false(d2==0) ? 0 : 0x0010000000000000;
153570af302Sopenharmony_ci		tiny |= (d1^d2) & 0x8000000000000000;
154570af302Sopenharmony_ci		t = asdouble(tiny);
155570af302Sopenharmony_ci		y = eval_as_double(y + t);
156570af302Sopenharmony_ci	}
157570af302Sopenharmony_ci	return y;
158570af302Sopenharmony_ci}
159