1570af302Sopenharmony_ci# exp(x) = 2^hi + 2^hi (2^lo - 1) 2570af302Sopenharmony_ci# where hi+lo = log2e*x with 128bit precision 3570af302Sopenharmony_ci# exact log2e*x calculation depends on nearest rounding mode 4570af302Sopenharmony_ci# using the exact multiplication method of Dekker and Veltkamp 5570af302Sopenharmony_ci 6570af302Sopenharmony_ci.global expl 7570af302Sopenharmony_ci.type expl,@function 8570af302Sopenharmony_ciexpl: 9570af302Sopenharmony_ci fldt 8(%esp) 10570af302Sopenharmony_ci 11570af302Sopenharmony_ci # interesting case: 0x1p-32 <= |x| < 16384 12570af302Sopenharmony_ci # check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13] 13570af302Sopenharmony_ci mov 16(%esp), %ax 14570af302Sopenharmony_ci or $0x8000, %ax 15570af302Sopenharmony_ci sub $0xbfdf, %ax 16570af302Sopenharmony_ci cmp $45, %ax 17570af302Sopenharmony_ci jbe 2f 18570af302Sopenharmony_ci test %ax, %ax 19570af302Sopenharmony_ci fld1 20570af302Sopenharmony_ci js 1f 21570af302Sopenharmony_ci # if |x|>=0x1p14 or nan return 2^trunc(x) 22570af302Sopenharmony_ci fscale 23570af302Sopenharmony_ci fstp %st(1) 24570af302Sopenharmony_ci ret 25570af302Sopenharmony_ci # if |x|<0x1p-32 return 1+x 26570af302Sopenharmony_ci1: faddp 27570af302Sopenharmony_ci ret 28570af302Sopenharmony_ci 29570af302Sopenharmony_ci # should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc 30570af302Sopenharmony_ci # it will be wrong on non-nearest rounding mode 31570af302Sopenharmony_ci2: fldl2e 32570af302Sopenharmony_ci sub $48, %esp 33570af302Sopenharmony_ci # hi = log2e_hi*x 34570af302Sopenharmony_ci # 2^hi = exp2l(hi) 35570af302Sopenharmony_ci fmul %st(1),%st 36570af302Sopenharmony_ci fld %st(0) 37570af302Sopenharmony_ci fstpt (%esp) 38570af302Sopenharmony_ci fstpt 16(%esp) 39570af302Sopenharmony_ci fstpt 32(%esp) 40570af302Sopenharmony_ci call exp2l@PLT 41570af302Sopenharmony_ci # if 2^hi == inf return 2^hi 42570af302Sopenharmony_ci fld %st(0) 43570af302Sopenharmony_ci fstpt (%esp) 44570af302Sopenharmony_ci cmpw $0x7fff, 8(%esp) 45570af302Sopenharmony_ci je 1f 46570af302Sopenharmony_ci fldt 32(%esp) 47570af302Sopenharmony_ci fldt 16(%esp) 48570af302Sopenharmony_ci # fpu stack: 2^hi x hi 49570af302Sopenharmony_ci # exact mult: x*log2e 50570af302Sopenharmony_ci fld %st(1) 51570af302Sopenharmony_ci # c = 0x1p32+1 52570af302Sopenharmony_ci movq $0x41f0000000100000,%rax 53570af302Sopenharmony_ci pushq %rax 54570af302Sopenharmony_ci fldl (%esp) 55570af302Sopenharmony_ci # xh = x - c*x + c*x 56570af302Sopenharmony_ci # xl = x - xh 57570af302Sopenharmony_ci fmulp 58570af302Sopenharmony_ci fld %st(2) 59570af302Sopenharmony_ci fsub %st(1), %st 60570af302Sopenharmony_ci faddp 61570af302Sopenharmony_ci fld %st(2) 62570af302Sopenharmony_ci fsub %st(1), %st 63570af302Sopenharmony_ci # yh = log2e_hi - c*log2e_hi + c*log2e_hi 64570af302Sopenharmony_ci movq $0x3ff7154765200000,%rax 65570af302Sopenharmony_ci pushq %rax 66570af302Sopenharmony_ci fldl (%esp) 67570af302Sopenharmony_ci # fpu stack: 2^hi x hi xh xl yh 68570af302Sopenharmony_ci # lo = hi - xh*yh + xl*yh 69570af302Sopenharmony_ci fld %st(2) 70570af302Sopenharmony_ci fmul %st(1), %st 71570af302Sopenharmony_ci fsubp %st, %st(4) 72570af302Sopenharmony_ci fmul %st(1), %st 73570af302Sopenharmony_ci faddp %st, %st(3) 74570af302Sopenharmony_ci # yl = log2e_hi - yh 75570af302Sopenharmony_ci movq $0x3de705fc2f000000,%rax 76570af302Sopenharmony_ci pushq %rax 77570af302Sopenharmony_ci fldl (%esp) 78570af302Sopenharmony_ci # fpu stack: 2^hi x lo xh xl yl 79570af302Sopenharmony_ci # lo += xh*yl + xl*yl 80570af302Sopenharmony_ci fmul %st, %st(2) 81570af302Sopenharmony_ci fmulp %st, %st(1) 82570af302Sopenharmony_ci fxch %st(2) 83570af302Sopenharmony_ci faddp 84570af302Sopenharmony_ci faddp 85570af302Sopenharmony_ci # log2e_lo 86570af302Sopenharmony_ci movq $0xbfbe,%rax 87570af302Sopenharmony_ci pushq %rax 88570af302Sopenharmony_ci movq $0x82f0025f2dc582ee,%rax 89570af302Sopenharmony_ci pushq %rax 90570af302Sopenharmony_ci fldt (%esp) 91570af302Sopenharmony_ci add $40,%esp 92570af302Sopenharmony_ci # fpu stack: 2^hi x lo log2e_lo 93570af302Sopenharmony_ci # lo += log2e_lo*x 94570af302Sopenharmony_ci # return 2^hi + 2^hi (2^lo - 1) 95570af302Sopenharmony_ci fmulp %st, %st(2) 96570af302Sopenharmony_ci faddp 97570af302Sopenharmony_ci f2xm1 98570af302Sopenharmony_ci fmul %st(1), %st 99570af302Sopenharmony_ci faddp 100570af302Sopenharmony_ci1: add $48, %esp 101570af302Sopenharmony_ci ret 102