1/* Copyright JS Foundation and other contributors, http://js.foundation 2 * 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 * 15 * This file is based on work under the following copyright and permission 16 * notice: 17 * 18 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 19 * 20 * Developed at SunSoft, a Sun Microsystems, Inc. business. 21 * Permission to use, copy, modify, and distribute this 22 * software is freely granted, provided that this notice 23 * is preserved. 24 * 25 * @(#)e_sqrt.c 1.3 95/01/18 26 */ 27 28#include "jerry-libm-internal.h" 29 30/* sqrt(x) 31 * Return correctly rounded sqrt. 32 * 33 * ------------------------------------------ 34 * | Use the hardware sqrt if you have one | 35 * ------------------------------------------ 36 * 37 * Method: 38 * Bit by bit method using integer arithmetic. (Slow, but portable) 39 * 1. Normalization 40 * Scale x to y in [1,4) with even powers of 2: 41 * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then 42 * sqrt(x) = 2^k * sqrt(y) 43 * 2. Bit by bit computation 44 * Let q = sqrt(y) truncated to i bit after binary point (q = 1), 45 * i 0 46 * i+1 2 47 * s = 2*q , and y = 2 * ( y - q ). (1) 48 * i i i i 49 * 50 * To compute q from q , one checks whether 51 * i+1 i 52 * 53 * -(i+1) 2 54 * (q + 2 ) <= y. (2) 55 * i 56 * -(i+1) 57 * If (2) is false, then q = q ; otherwise q = q + 2 . 58 * i+1 i i+1 i 59 * 60 * With some algebric manipulation, it is not difficult to see 61 * that (2) is equivalent to 62 * -(i+1) 63 * s + 2 <= y (3) 64 * i i 65 * 66 * The advantage of (3) is that s and y can be computed by 67 * i i 68 * the following recurrence formula: 69 * if (3) is false 70 * 71 * s = s , y = y ; (4) 72 * i+1 i i+1 i 73 * 74 * otherwise, 75 * -i -(i+1) 76 * s = s + 2 , y = y - s - 2 (5) 77 * i+1 i i+1 i i 78 * 79 * One may easily use induction to prove (4) and (5). 80 * Note. Since the left hand side of (3) contain only i+2 bits, 81 * it does not necessary to do a full (53-bit) comparison 82 * in (3). 83 * 3. Final rounding 84 * After generating the 53 bits result, we compute one more bit. 85 * Together with the remainder, we can decide whether the 86 * result is exact, bigger than 1/2ulp, or less than 1/2ulp 87 * (it will never equal to 1/2ulp). 88 * The rounding mode can be detected by checking whether 89 * huge + tiny is equal to huge, and whether huge - tiny is 90 * equal to huge for some floating point number "huge" and "tiny". 91 * 92 * Special cases: 93 * sqrt(+-0) = +-0 ... exact 94 * sqrt(inf) = inf 95 * sqrt(-ve) = NaN ... with invalid signal 96 * sqrt(NaN) = NaN ... with invalid signal for signaling NaN 97 * 98 * Other methods: see the appended file at the end of the program below. 99 */ 100 101#define one 1.0 102#define tiny 1.0e-300 103 104double 105sqrt (double x) 106{ 107 int sign = (int) 0x80000000; 108 unsigned r, t1, s1, ix1, q1; 109 int ix0, s0, q, m, t, i; 110 111 ix0 = __HI (x); /* high word of x */ 112 ix1 = __LO (x); /* low word of x */ 113 114 /* take care of Inf and NaN */ 115 if ((ix0 & 0x7ff00000) == 0x7ff00000) 116 { 117 return x * x + x; /* sqrt(NaN) = NaN, sqrt(+inf) = +inf, sqrt(-inf) = sNaN */ 118 } 119 /* take care of zero */ 120 if (ix0 <= 0) 121 { 122 if (((ix0 & (~sign)) | ix1) == 0) /* sqrt(+-0) = +-0 */ 123 { 124 return x; 125 } 126 else if (ix0 < 0) /* sqrt(-ve) = sNaN */ 127 { 128 return NAN; 129 } 130 } 131 /* normalize x */ 132 m = (ix0 >> 20); 133 if (m == 0) /* subnormal x */ 134 { 135 while (ix0 == 0) 136 { 137 m -= 21; 138 ix0 |= (ix1 >> 11); 139 ix1 <<= 21; 140 } 141 for (i = 0; (ix0 & 0x00100000) == 0; i++) 142 { 143 ix0 <<= 1; 144 } 145 m -= i - 1; 146 ix0 |= (ix1 >> (32 - i)); 147 ix1 <<= i; 148 } 149 m -= 1023; /* unbias exponent */ 150 ix0 = (ix0 & 0x000fffff) | 0x00100000; 151 if (m & 1) /* odd m, double x to make it even */ 152 { 153 ix0 += ix0 + ((ix1 & sign) >> 31); 154 ix1 += ix1; 155 } 156 m >>= 1; /* m = [m / 2] */ 157 158 /* generate sqrt(x) bit by bit */ 159 ix0 += ix0 + ((ix1 & sign) >> 31); 160 ix1 += ix1; 161 q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */ 162 r = 0x00200000; /* r = moving bit from right to left */ 163 164 while (r != 0) 165 { 166 t = s0 + r; 167 if (t <= ix0) 168 { 169 s0 = t + r; 170 ix0 -= t; 171 q += r; 172 } 173 ix0 += ix0 + ((ix1 & sign) >> 31); 174 ix1 += ix1; 175 r >>= 1; 176 } 177 178 r = sign; 179 while (r != 0) 180 { 181 t1 = s1 + r; 182 t = s0; 183 if ((t < ix0) || ((t == ix0) && (t1 <= ix1))) 184 { 185 s1 = t1 + r; 186 if (((t1 & sign) == sign) && (s1 & sign) == 0) 187 { 188 s0 += 1; 189 } 190 ix0 -= t; 191 if (ix1 < t1) 192 { 193 ix0 -= 1; 194 } 195 ix1 -= t1; 196 q1 += r; 197 } 198 ix0 += ix0 + ((ix1 & sign) >> 31); 199 ix1 += ix1; 200 r >>= 1; 201 } 202 203 double_accessor ret; 204 205 /* use floating add to find out rounding direction */ 206 if ((ix0 | ix1) != 0) 207 { 208 ret.dbl = one - tiny; /* trigger inexact flag */ 209 if (ret.dbl >= one) 210 { 211 ret.dbl = one + tiny; 212 if (q1 == (unsigned) 0xffffffff) 213 { 214 q1 = 0; 215 q += 1; 216 } 217 else if (ret.dbl > one) 218 { 219 if (q1 == (unsigned) 0xfffffffe) 220 { 221 q += 1; 222 } 223 q1 += 2; 224 } 225 else 226 { 227 q1 += (q1 & 1); 228 } 229 } 230 } 231 ix0 = (q >> 1) + 0x3fe00000; 232 ix1 = q1 >> 1; 233 if ((q & 1) == 1) 234 { 235 ix1 |= sign; 236 } 237 ix0 += (m << 20); 238 ret.as_int.hi = ix0; 239 ret.as_int.lo = ix1; 240 return ret.dbl; 241} /* sqrt */ 242 243#undef one 244#undef tiny 245 246/* 247Other methods (use floating-point arithmetic) 248------------- 249(This is a copy of a drafted paper by Prof W. Kahan 250and K.C. Ng, written in May, 1986) 251 252 Two algorithms are given here to implement sqrt(x) 253 (IEEE double precision arithmetic) in software. 254 Both supply sqrt(x) correctly rounded. The first algorithm (in 255 Section A) uses newton iterations and involves four divisions. 256 The second one uses reciproot iterations to avoid division, but 257 requires more multiplications. Both algorithms need the ability 258 to chop results of arithmetic operations instead of round them, 259 and the INEXACT flag to indicate when an arithmetic operation 260 is executed exactly with no roundoff error, all part of the 261 standard (IEEE 754-1985). The ability to perform shift, add, 262 subtract and logical AND operations upon 32-bit words is needed 263 too, though not part of the standard. 264 265A. sqrt(x) by Newton Iteration 266 267 (1) Initial approximation 268 269 Let x0 and x1 be the leading and the trailing 32-bit words of 270 a floating point number x (in IEEE double format) respectively 271 272 1 11 52 ...widths 273 ------------------------------------------------------ 274 x: |s| e | f | 275 ------------------------------------------------------ 276 msb lsb msb lsb ...order 277 278 ------------------------ ------------------------ 279 x0: |s| e | f1 | x1: | f2 | 280 ------------------------ ------------------------ 281 282 By performing shifts and subtracts on x0 and x1 (both regarded 283 as integers), we obtain an 8-bit approximation of sqrt(x) as 284 follows. 285 286 k := (x0>>1) + 0x1ff80000; 287 y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits 288 Here k is a 32-bit integer and T1[] is an integer array containing 289 correction terms. Now magically the floating value of y (y's 290 leading 32-bit word is y0, the value of its trailing word is 0) 291 approximates sqrt(x) to almost 8-bit. 292 293 Value of T1: 294 static int T1[32]= { 295 0, 1024, 3062, 5746, 9193, 13348, 18162, 23592, 296 29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215, 297 83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581, 298 16499, 12183, 8588, 5674, 3403, 1742, 661, 130,}; 299 300 (2) Iterative refinement 301 302 Apply Heron's rule three times to y, we have y approximates 303 sqrt(x) to within 1 ulp (Unit in the Last Place): 304 305 y := (y+x/y)/2 ... almost 17 sig. bits 306 y := (y+x/y)/2 ... almost 35 sig. bits 307 y := y-(y-x/y)/2 ... within 1 ulp 308 309 Remark 1. 310 Another way to improve y to within 1 ulp is: 311 312 y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x) 313 y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x) 314 315 2 316 (x-y )*y 317 y := y + 2* ---------- ...within 1 ulp 318 2 319 3y + x 320 321 This formula has one division fewer than the one above; however, 322 it requires more multiplications and additions. Also x must be 323 scaled in advance to avoid spurious overflow in evaluating the 324 expression 3y*y+x. Hence it is not recommended uless division 325 is slow. If division is very slow, then one should use the 326 reciproot algorithm given in section B. 327 328 (3) Final adjustment 329 330 By twiddling y's last bit it is possible to force y to be 331 correctly rounded according to the prevailing rounding mode 332 as follows. Let r and i be copies of the rounding mode and 333 inexact flag before entering the square root program. Also we 334 use the expression y+-ulp for the next representable floating 335 numbers (up and down) of y. Note that y+-ulp = either fixed 336 point y+-1, or multiply y by nextafter(1,+-inf) in chopped 337 mode. 338 339 I := FALSE; ... reset INEXACT flag I 340 R := RZ; ... set rounding mode to round-toward-zero 341 z := x/y; ... chopped quotient, possibly inexact 342 If(not I) then { ... if the quotient is exact 343 if(z=y) { 344 I := i; ... restore inexact flag 345 R := r; ... restore rounded mode 346 return sqrt(x):=y. 347 } else { 348 z := z - ulp; ... special rounding 349 } 350 } 351 i := TRUE; ... sqrt(x) is inexact 352 If (r=RN) then z=z+ulp ... rounded-to-nearest 353 If (r=RP) then { ... round-toward-+inf 354 y = y+ulp; z=z+ulp; 355 } 356 y := y+z; ... chopped sum 357 y0:=y0-0x00100000; ... y := y/2 is correctly rounded. 358 I := i; ... restore inexact flag 359 R := r; ... restore rounded mode 360 return sqrt(x):=y. 361 362 (4) Special cases 363 364 Square root of +inf, +-0, or NaN is itself; 365 Square root of a negative number is NaN with invalid signal. 366 367B. sqrt(x) by Reciproot Iteration 368 369 (1) Initial approximation 370 371 Let x0 and x1 be the leading and the trailing 32-bit words of 372 a floating point number x (in IEEE double format) respectively 373 (see section A). By performing shifs and subtracts on x0 and y0, 374 we obtain a 7.8-bit approximation of 1/sqrt(x) as follows. 375 376 k := 0x5fe80000 - (x0>>1); 377 y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits 378 379 Here k is a 32-bit integer and T2[] is an integer array 380 containing correction terms. Now magically the floating 381 value of y (y's leading 32-bit word is y0, the value of 382 its trailing word y1 is set to zero) approximates 1/sqrt(x) 383 to almost 7.8-bit. 384 385 Value of T2: 386 static int T2[64]= { 387 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866, 388 0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f, 389 0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d, 390 0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0, 391 0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989, 392 0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd, 393 0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e, 394 0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,}; 395 396 (2) Iterative refinement 397 398 Apply Reciproot iteration three times to y and multiply the 399 result by x to get an approximation z that matches sqrt(x) 400 to about 1 ulp. To be exact, we will have 401 -1ulp < sqrt(x)-z<1.0625ulp. 402 403 ... set rounding mode to Round-to-nearest 404 y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x) 405 y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x) 406 ... special arrangement for better accuracy 407 z := x*y ... 29 bits to sqrt(x), with z*y<1 408 z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x) 409 410 Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that 411 (a) the term z*y in the final iteration is always less than 1; 412 (b) the error in the final result is biased upward so that 413 -1 ulp < sqrt(x) - z < 1.0625 ulp 414 instead of |sqrt(x)-z|<1.03125ulp. 415 416 (3) Final adjustment 417 418 By twiddling y's last bit it is possible to force y to be 419 correctly rounded according to the prevailing rounding mode 420 as follows. Let r and i be copies of the rounding mode and 421 inexact flag before entering the square root program. Also we 422 use the expression y+-ulp for the next representable floating 423 numbers (up and down) of y. Note that y+-ulp = either fixed 424 point y+-1, or multiply y by nextafter(1,+-inf) in chopped 425 mode. 426 427 R := RZ; ... set rounding mode to round-toward-zero 428 switch(r) { 429 case RN: ... round-to-nearest 430 if(x<= z*(z-ulp)...chopped) z = z - ulp; else 431 if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp; 432 break; 433 case RZ:case RM: ... round-to-zero or round-to--inf 434 R:=RP; ... reset rounding mod to round-to-+inf 435 if(x<z*z ... rounded up) z = z - ulp; else 436 if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp; 437 break; 438 case RP: ... round-to-+inf 439 if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else 440 if(x>z*z ...chopped) z = z+ulp; 441 break; 442 } 443 444 Remark 3. The above comparisons can be done in fixed point. For 445 example, to compare x and w=z*z chopped, it suffices to compare 446 x1 and w1 (the trailing parts of x and w), regarding them as 447 two's complement integers. 448 449 ...Is z an exact square root? 450 To determine whether z is an exact square root of x, let z1 be the 451 trailing part of z, and also let x0 and x1 be the leading and 452 trailing parts of x. 453 454 If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0 455 I := 1; ... Raise Inexact flag: z is not exact 456 else { 457 j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2 458 k := z1 >> 26; ... get z's 25-th and 26-th 459 fraction bits 460 I := i or (k&j) or ((k&(j+j+1))!=(x1&3)); 461 } 462 R:= r ... restore rounded mode 463 return sqrt(x):=z. 464 465 If multiplication is cheaper then the foregoing red tape, the 466 Inexact flag can be evaluated by 467 468 I := i; 469 I := (z*z!=x) or I. 470 471 Note that z*z can overwrite I; this value must be sensed if it is 472 True. 473 474 Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be 475 zero. 476 477 -------------------- 478 z1: | f2 | 479 -------------------- 480 bit 31 bit 0 481 482 Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd 483 or even of logb(x) have the following relations: 484 485 ------------------------------------------------- 486 bit 27,26 of z1 bit 1,0 of x1 logb(x) 487 ------------------------------------------------- 488 00 00 odd and even 489 01 01 even 490 10 10 odd 491 10 00 even 492 11 01 even 493 ------------------------------------------------- 494 495 (4) Special cases (see (4) of Section A). 496 */ 497