1 # exp(x) = 2^hi + 2^hi (2^lo - 1)
2 # where hi+lo = log2e*x with 128bit precision
3 # exact log2e*x calculation depends on nearest rounding mode
4 # using the exact multiplication method of Dekker and Veltkamp
5 
6 .global expl
7 .type expl,@function
8 expl:
9 	fldt 8(%esp)
10 
11 		# interesting case: 0x1p-32 <= |x| < 16384
12 		# check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13]
13 	mov 16(%esp), %ax
14 	or $0x8000, %ax
15 	sub $0xbfdf, %ax
16 	cmp $45, %ax
17 	jbe 2f
18 	test %ax, %ax
19 	fld1
20 	js 1f
21 		# if |x|>=0x1p14 or nan return 2^trunc(x)
22 	fscale
23 	fstp %st(1)
24 	ret
25 		# if |x|<0x1p-32 return 1+x
26 1:	faddp
27 	ret
28 
29 		# should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc
30 		# it will be wrong on non-nearest rounding mode
31 2:	fldl2e
32 	sub $48, %esp
33 		# hi = log2e_hi*x
34 		# 2^hi = exp2l(hi)
35 	fmul %st(1),%st
36 	fld %st(0)
37 	fstpt (%esp)
38 	fstpt 16(%esp)
39 	fstpt 32(%esp)
40 	call exp2l@PLT
41 		# if 2^hi == inf return 2^hi
42 	fld %st(0)
43 	fstpt (%esp)
44 	cmpw $0x7fff, 8(%esp)
45 	je 1f
46 	fldt 32(%esp)
47 	fldt 16(%esp)
48 		# fpu stack: 2^hi x hi
49 		# exact mult: x*log2e
50 	fld %st(1)
51 		# c = 0x1p32+1
52 	movq $0x41f0000000100000,%rax
53 	pushq %rax
54 	fldl (%esp)
55 		# xh = x - c*x + c*x
56 		# xl = x - xh
57 	fmulp
58 	fld %st(2)
59 	fsub %st(1), %st
60 	faddp
61 	fld %st(2)
62 	fsub %st(1), %st
63 		# yh = log2e_hi - c*log2e_hi + c*log2e_hi
64 	movq $0x3ff7154765200000,%rax
65 	pushq %rax
66 	fldl (%esp)
67 		# fpu stack: 2^hi x hi xh xl yh
68 		# lo = hi - xh*yh + xl*yh
69 	fld %st(2)
70 	fmul %st(1), %st
71 	fsubp %st, %st(4)
72 	fmul %st(1), %st
73 	faddp %st, %st(3)
74 		# yl = log2e_hi - yh
75 	movq $0x3de705fc2f000000,%rax
76 	pushq %rax
77 	fldl (%esp)
78 		# fpu stack: 2^hi x lo xh xl yl
79 		# lo += xh*yl + xl*yl
80 	fmul %st, %st(2)
81 	fmulp %st, %st(1)
82 	fxch %st(2)
83 	faddp
84 	faddp
85 		# log2e_lo
86 	movq $0xbfbe,%rax
87 	pushq %rax
88 	movq $0x82f0025f2dc582ee,%rax
89 	pushq %rax
90 	fldt (%esp)
91 	add $40,%esp
92 		# fpu stack: 2^hi x lo log2e_lo
93 		# lo += log2e_lo*x
94 		# return 2^hi + 2^hi (2^lo - 1)
95 	fmulp %st, %st(2)
96 	faddp
97 	f2xm1
98 	fmul %st(1), %st
99 	faddp
100 1:	add $48, %esp
101 	ret
102