1//! A small number of math routines for floats and doubles. 2//! 3//! These are adapted from libm, a port of musl libc's libm to Rust. 4//! libm can be found online [here](https://github.com/rust-lang/libm), 5//! and is similarly licensed under an Apache2.0/MIT license 6 7#![cfg(all(not(feature = "std"), feature = "compact"))] 8#![doc(hidden)] 9 10/* origin: FreeBSD /usr/src/lib/msun/src/e_powf.c */ 11/* 12 * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. 13 */ 14/* 15 * ==================================================== 16 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 17 * 18 * Developed at SunPro, a Sun Microsystems, Inc. business. 19 * Permission to use, copy, modify, and distribute this 20 * software is freely granted, provided that this notice 21 * is preserved. 22 * ==================================================== 23 */ 24 25/// # Safety 26/// 27/// Safe if `index < array.len()`. 28macro_rules! i { 29 ($array:ident, $index:expr) => { 30 // SAFETY: safe if `index < array.len()`. 31 unsafe { *$array.get_unchecked($index) } 32 }; 33} 34 35pub fn powf(x: f32, y: f32) -> f32 { 36 const BP: [f32; 2] = [1.0, 1.5]; 37 const DP_H: [f32; 2] = [0.0, 5.84960938e-01]; /* 0x3f15c000 */ 38 const DP_L: [f32; 2] = [0.0, 1.56322085e-06]; /* 0x35d1cfdc */ 39 const TWO24: f32 = 16777216.0; /* 0x4b800000 */ 40 const HUGE: f32 = 1.0e30; 41 const TINY: f32 = 1.0e-30; 42 const L1: f32 = 6.0000002384e-01; /* 0x3f19999a */ 43 const L2: f32 = 4.2857143283e-01; /* 0x3edb6db7 */ 44 const L3: f32 = 3.3333334327e-01; /* 0x3eaaaaab */ 45 const L4: f32 = 2.7272811532e-01; /* 0x3e8ba305 */ 46 const L5: f32 = 2.3066075146e-01; /* 0x3e6c3255 */ 47 const L6: f32 = 2.0697501302e-01; /* 0x3e53f142 */ 48 const P1: f32 = 1.6666667163e-01; /* 0x3e2aaaab */ 49 const P2: f32 = -2.7777778450e-03; /* 0xbb360b61 */ 50 const P3: f32 = 6.6137559770e-05; /* 0x388ab355 */ 51 const P4: f32 = -1.6533901999e-06; /* 0xb5ddea0e */ 52 const P5: f32 = 4.1381369442e-08; /* 0x3331bb4c */ 53 const LG2: f32 = 6.9314718246e-01; /* 0x3f317218 */ 54 const LG2_H: f32 = 6.93145752e-01; /* 0x3f317200 */ 55 const LG2_L: f32 = 1.42860654e-06; /* 0x35bfbe8c */ 56 const OVT: f32 = 4.2995665694e-08; /* -(128-log2(ovfl+.5ulp)) */ 57 const CP: f32 = 9.6179670095e-01; /* 0x3f76384f =2/(3ln2) */ 58 const CP_H: f32 = 9.6191406250e-01; /* 0x3f764000 =12b cp */ 59 const CP_L: f32 = -1.1736857402e-04; /* 0xb8f623c6 =tail of cp_h */ 60 const IVLN2: f32 = 1.4426950216e+00; 61 const IVLN2_H: f32 = 1.4426879883e+00; 62 const IVLN2_L: f32 = 7.0526075433e-06; 63 64 let mut z: f32; 65 let mut ax: f32; 66 let z_h: f32; 67 let z_l: f32; 68 let mut p_h: f32; 69 let mut p_l: f32; 70 let y1: f32; 71 let mut t1: f32; 72 let t2: f32; 73 let mut r: f32; 74 let s: f32; 75 let mut sn: f32; 76 let mut t: f32; 77 let mut u: f32; 78 let mut v: f32; 79 let mut w: f32; 80 let i: i32; 81 let mut j: i32; 82 let mut k: i32; 83 let mut yisint: i32; 84 let mut n: i32; 85 let hx: i32; 86 let hy: i32; 87 let mut ix: i32; 88 let iy: i32; 89 let mut is: i32; 90 91 hx = x.to_bits() as i32; 92 hy = y.to_bits() as i32; 93 94 ix = hx & 0x7fffffff; 95 iy = hy & 0x7fffffff; 96 97 /* x**0 = 1, even if x is NaN */ 98 if iy == 0 { 99 return 1.0; 100 } 101 102 /* 1**y = 1, even if y is NaN */ 103 if hx == 0x3f800000 { 104 return 1.0; 105 } 106 107 /* NaN if either arg is NaN */ 108 if ix > 0x7f800000 || iy > 0x7f800000 { 109 return x + y; 110 } 111 112 /* determine if y is an odd int when x < 0 113 * yisint = 0 ... y is not an integer 114 * yisint = 1 ... y is an odd int 115 * yisint = 2 ... y is an even int 116 */ 117 yisint = 0; 118 if hx < 0 { 119 if iy >= 0x4b800000 { 120 yisint = 2; /* even integer y */ 121 } else if iy >= 0x3f800000 { 122 k = (iy >> 23) - 0x7f; /* exponent */ 123 j = iy >> (23 - k); 124 if (j << (23 - k)) == iy { 125 yisint = 2 - (j & 1); 126 } 127 } 128 } 129 130 /* special value of y */ 131 if iy == 0x7f800000 { 132 /* y is +-inf */ 133 if ix == 0x3f800000 { 134 /* (-1)**+-inf is 1 */ 135 return 1.0; 136 } else if ix > 0x3f800000 { 137 /* (|x|>1)**+-inf = inf,0 */ 138 return if hy >= 0 { 139 y 140 } else { 141 0.0 142 }; 143 } else { 144 /* (|x|<1)**+-inf = 0,inf */ 145 return if hy >= 0 { 146 0.0 147 } else { 148 -y 149 }; 150 } 151 } 152 if iy == 0x3f800000 { 153 /* y is +-1 */ 154 return if hy >= 0 { 155 x 156 } else { 157 1.0 / x 158 }; 159 } 160 161 if hy == 0x40000000 { 162 /* y is 2 */ 163 return x * x; 164 } 165 166 if hy == 0x3f000000 167 /* y is 0.5 */ 168 && hx >= 0 169 { 170 /* x >= +0 */ 171 return sqrtf(x); 172 } 173 174 ax = fabsf(x); 175 /* special value of x */ 176 if ix == 0x7f800000 || ix == 0 || ix == 0x3f800000 { 177 /* x is +-0,+-inf,+-1 */ 178 z = ax; 179 if hy < 0 { 180 /* z = (1/|x|) */ 181 z = 1.0 / z; 182 } 183 184 if hx < 0 { 185 if ((ix - 0x3f800000) | yisint) == 0 { 186 z = (z - z) / (z - z); /* (-1)**non-int is NaN */ 187 } else if yisint == 1 { 188 z = -z; /* (x<0)**odd = -(|x|**odd) */ 189 } 190 } 191 return z; 192 } 193 194 sn = 1.0; /* sign of result */ 195 if hx < 0 { 196 if yisint == 0 { 197 /* (x<0)**(non-int) is NaN */ 198 return (x - x) / (x - x); 199 } 200 201 if yisint == 1 { 202 /* (x<0)**(odd int) */ 203 sn = -1.0; 204 } 205 } 206 207 /* |y| is HUGE */ 208 if iy > 0x4d000000 { 209 /* if |y| > 2**27 */ 210 /* over/underflow if x is not close to one */ 211 if ix < 0x3f7ffff8 { 212 return if hy < 0 { 213 sn * HUGE * HUGE 214 } else { 215 sn * TINY * TINY 216 }; 217 } 218 219 if ix > 0x3f800007 { 220 return if hy > 0 { 221 sn * HUGE * HUGE 222 } else { 223 sn * TINY * TINY 224 }; 225 } 226 227 /* now |1-x| is TINY <= 2**-20, suffice to compute 228 log(x) by x-x^2/2+x^3/3-x^4/4 */ 229 t = ax - 1.; /* t has 20 trailing zeros */ 230 w = (t * t) * (0.5 - t * (0.333333333333 - t * 0.25)); 231 u = IVLN2_H * t; /* IVLN2_H has 16 sig. bits */ 232 v = t * IVLN2_L - w * IVLN2; 233 t1 = u + v; 234 is = t1.to_bits() as i32; 235 t1 = f32::from_bits(is as u32 & 0xfffff000); 236 t2 = v - (t1 - u); 237 } else { 238 let mut s2: f32; 239 let mut s_h: f32; 240 let s_l: f32; 241 let mut t_h: f32; 242 let mut t_l: f32; 243 244 n = 0; 245 /* take care subnormal number */ 246 if ix < 0x00800000 { 247 ax *= TWO24; 248 n -= 24; 249 ix = ax.to_bits() as i32; 250 } 251 n += ((ix) >> 23) - 0x7f; 252 j = ix & 0x007fffff; 253 /* determine interval */ 254 ix = j | 0x3f800000; /* normalize ix */ 255 if j <= 0x1cc471 { 256 /* |x|<sqrt(3/2) */ 257 k = 0; 258 } else if j < 0x5db3d7 { 259 /* |x|<sqrt(3) */ 260 k = 1; 261 } else { 262 k = 0; 263 n += 1; 264 ix -= 0x00800000; 265 } 266 ax = f32::from_bits(ix as u32); 267 268 /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ 269 u = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */ 270 v = 1.0 / (ax + i!(BP, k as usize)); 271 s = u * v; 272 s_h = s; 273 is = s_h.to_bits() as i32; 274 s_h = f32::from_bits(is as u32 & 0xfffff000); 275 /* t_h=ax+bp[k] High */ 276 is = (((ix as u32 >> 1) & 0xfffff000) | 0x20000000) as i32; 277 t_h = f32::from_bits(is as u32 + 0x00400000 + ((k as u32) << 21)); 278 t_l = ax - (t_h - i!(BP, k as usize)); 279 s_l = v * ((u - s_h * t_h) - s_h * t_l); 280 /* compute log(ax) */ 281 s2 = s * s; 282 r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); 283 r += s_l * (s_h + s); 284 s2 = s_h * s_h; 285 t_h = 3.0 + s2 + r; 286 is = t_h.to_bits() as i32; 287 t_h = f32::from_bits(is as u32 & 0xfffff000); 288 t_l = r - ((t_h - 3.0) - s2); 289 /* u+v = s*(1+...) */ 290 u = s_h * t_h; 291 v = s_l * t_h + t_l * s; 292 /* 2/(3log2)*(s+...) */ 293 p_h = u + v; 294 is = p_h.to_bits() as i32; 295 p_h = f32::from_bits(is as u32 & 0xfffff000); 296 p_l = v - (p_h - u); 297 z_h = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ 298 z_l = CP_L * p_h + p_l * CP + i!(DP_L, k as usize); 299 /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */ 300 t = n as f32; 301 t1 = ((z_h + z_l) + i!(DP_H, k as usize)) + t; 302 is = t1.to_bits() as i32; 303 t1 = f32::from_bits(is as u32 & 0xfffff000); 304 t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h); 305 }; 306 307 /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ 308 is = y.to_bits() as i32; 309 y1 = f32::from_bits(is as u32 & 0xfffff000); 310 p_l = (y - y1) * t1 + y * t2; 311 p_h = y1 * t1; 312 z = p_l + p_h; 313 j = z.to_bits() as i32; 314 if j > 0x43000000 { 315 /* if z > 128 */ 316 return sn * HUGE * HUGE; /* overflow */ 317 } else if j == 0x43000000 { 318 /* if z == 128 */ 319 if p_l + OVT > z - p_h { 320 return sn * HUGE * HUGE; /* overflow */ 321 } 322 } else if (j & 0x7fffffff) > 0x43160000 { 323 /* z < -150 */ 324 // FIXME: check should be (uint32_t)j > 0xc3160000 325 return sn * TINY * TINY; /* underflow */ 326 } else if j as u32 == 0xc3160000 327 /* z == -150 */ 328 && p_l <= z - p_h 329 { 330 return sn * TINY * TINY; /* underflow */ 331 } 332 333 /* 334 * compute 2**(p_h+p_l) 335 */ 336 i = j & 0x7fffffff; 337 k = (i >> 23) - 0x7f; 338 n = 0; 339 if i > 0x3f000000 { 340 /* if |z| > 0.5, set n = [z+0.5] */ 341 n = j + (0x00800000 >> (k + 1)); 342 k = ((n & 0x7fffffff) >> 23) - 0x7f; /* new k for n */ 343 t = f32::from_bits(n as u32 & !(0x007fffff >> k)); 344 n = ((n & 0x007fffff) | 0x00800000) >> (23 - k); 345 if j < 0 { 346 n = -n; 347 } 348 p_h -= t; 349 } 350 t = p_l + p_h; 351 is = t.to_bits() as i32; 352 t = f32::from_bits(is as u32 & 0xffff8000); 353 u = t * LG2_H; 354 v = (p_l - (t - p_h)) * LG2 + t * LG2_L; 355 z = u + v; 356 w = v - (z - u); 357 t = z * z; 358 t1 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); 359 r = (z * t1) / (t1 - 2.0) - (w + z * w); 360 z = 1.0 - (r - z); 361 j = z.to_bits() as i32; 362 j += n << 23; 363 if (j >> 23) <= 0 { 364 /* subnormal output */ 365 z = scalbnf(z, n); 366 } else { 367 z = f32::from_bits(j as u32); 368 } 369 sn * z 370} 371 372/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */ 373/* 374 * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. 375 */ 376/* 377 * ==================================================== 378 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 379 * 380 * Developed at SunPro, a Sun Microsystems, Inc. business. 381 * Permission to use, copy, modify, and distribute this 382 * software is freely granted, provided that this notice 383 * is preserved. 384 * ==================================================== 385 */ 386 387pub fn sqrtf(x: f32) -> f32 { 388 #[cfg(target_feature = "sse")] 389 { 390 // Note: This path is unlikely since LLVM will usually have already 391 // optimized sqrt calls into hardware instructions if sse is available, 392 // but if someone does end up here they'll apprected the speed increase. 393 #[cfg(target_arch = "x86")] 394 use core::arch::x86::*; 395 #[cfg(target_arch = "x86_64")] 396 use core::arch::x86_64::*; 397 // SAFETY: safe, since `_mm_set_ss` takes a 32-bit float, and returns 398 // a 128-bit type with the lowest 32-bits as `x`, `_mm_sqrt_ss` calculates 399 // the sqrt of this 128-bit vector, and `_mm_cvtss_f32` extracts the lower 400 // 32-bits as a 32-bit float. 401 unsafe { 402 let m = _mm_set_ss(x); 403 let m_sqrt = _mm_sqrt_ss(m); 404 _mm_cvtss_f32(m_sqrt) 405 } 406 } 407 #[cfg(not(target_feature = "sse"))] 408 { 409 const TINY: f32 = 1.0e-30; 410 411 let mut z: f32; 412 let sign: i32 = 0x80000000u32 as i32; 413 let mut ix: i32; 414 let mut s: i32; 415 let mut q: i32; 416 let mut m: i32; 417 let mut t: i32; 418 let mut i: i32; 419 let mut r: u32; 420 421 ix = x.to_bits() as i32; 422 423 /* take care of Inf and NaN */ 424 if (ix as u32 & 0x7f800000) == 0x7f800000 { 425 return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ 426 } 427 428 /* take care of zero */ 429 if ix <= 0 { 430 if (ix & !sign) == 0 { 431 return x; /* sqrt(+-0) = +-0 */ 432 } 433 if ix < 0 { 434 return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ 435 } 436 } 437 438 /* normalize x */ 439 m = ix >> 23; 440 if m == 0 { 441 /* subnormal x */ 442 i = 0; 443 while ix & 0x00800000 == 0 { 444 ix <<= 1; 445 i = i + 1; 446 } 447 m -= i - 1; 448 } 449 m -= 127; /* unbias exponent */ 450 ix = (ix & 0x007fffff) | 0x00800000; 451 if m & 1 == 1 { 452 /* odd m, double x to make it even */ 453 ix += ix; 454 } 455 m >>= 1; /* m = [m/2] */ 456 457 /* generate sqrt(x) bit by bit */ 458 ix += ix; 459 q = 0; 460 s = 0; 461 r = 0x01000000; /* r = moving bit from right to left */ 462 463 while r != 0 { 464 t = s + r as i32; 465 if t <= ix { 466 s = t + r as i32; 467 ix -= t; 468 q += r as i32; 469 } 470 ix += ix; 471 r >>= 1; 472 } 473 474 /* use floating add to find out rounding direction */ 475 if ix != 0 { 476 z = 1.0 - TINY; /* raise inexact flag */ 477 if z >= 1.0 { 478 z = 1.0 + TINY; 479 if z > 1.0 { 480 q += 2; 481 } else { 482 q += q & 1; 483 } 484 } 485 } 486 487 ix = (q >> 1) + 0x3f000000; 488 ix += m << 23; 489 f32::from_bits(ix as u32) 490 } 491} 492 493/// Absolute value (magnitude) (f32) 494/// Calculates the absolute value (magnitude) of the argument `x`, 495/// by direct manipulation of the bit representation of `x`. 496pub fn fabsf(x: f32) -> f32 { 497 f32::from_bits(x.to_bits() & 0x7fffffff) 498} 499 500pub fn scalbnf(mut x: f32, mut n: i32) -> f32 { 501 let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 502 let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 503 let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 504 505 if n > 127 { 506 x *= x1p127; 507 n -= 127; 508 if n > 127 { 509 x *= x1p127; 510 n -= 127; 511 if n > 127 { 512 n = 127; 513 } 514 } 515 } else if n < -126 { 516 x *= x1p_126 * x1p24; 517 n += 126 - 24; 518 if n < -126 { 519 x *= x1p_126 * x1p24; 520 n += 126 - 24; 521 if n < -126 { 522 n = -126; 523 } 524 } 525 } 526 x * f32::from_bits(((0x7f + n) as u32) << 23) 527} 528 529/* origin: FreeBSD /usr/src/lib/msun/src/e_pow.c */ 530/* 531 * ==================================================== 532 * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. 533 * 534 * Permission to use, copy, modify, and distribute this 535 * software is freely granted, provided that this notice 536 * is preserved. 537 * ==================================================== 538 */ 539 540// pow(x,y) return x**y 541// 542// n 543// Method: Let x = 2 * (1+f) 544// 1. Compute and return log2(x) in two pieces: 545// log2(x) = w1 + w2, 546// where w1 has 53-24 = 29 bit trailing zeros. 547// 2. Perform y*log2(x) = n+y' by simulating muti-precision 548// arithmetic, where |y'|<=0.5. 549// 3. Return x**y = 2**n*exp(y'*log2) 550// 551// Special cases: 552// 1. (anything) ** 0 is 1 553// 2. 1 ** (anything) is 1 554// 3. (anything except 1) ** NAN is NAN 555// 4. NAN ** (anything except 0) is NAN 556// 5. +-(|x| > 1) ** +INF is +INF 557// 6. +-(|x| > 1) ** -INF is +0 558// 7. +-(|x| < 1) ** +INF is +0 559// 8. +-(|x| < 1) ** -INF is +INF 560// 9. -1 ** +-INF is 1 561// 10. +0 ** (+anything except 0, NAN) is +0 562// 11. -0 ** (+anything except 0, NAN, odd integer) is +0 563// 12. +0 ** (-anything except 0, NAN) is +INF, raise divbyzero 564// 13. -0 ** (-anything except 0, NAN, odd integer) is +INF, raise divbyzero 565// 14. -0 ** (+odd integer) is -0 566// 15. -0 ** (-odd integer) is -INF, raise divbyzero 567// 16. +INF ** (+anything except 0,NAN) is +INF 568// 17. +INF ** (-anything except 0,NAN) is +0 569// 18. -INF ** (+odd integer) is -INF 570// 19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer) 571// 20. (anything) ** 1 is (anything) 572// 21. (anything) ** -1 is 1/(anything) 573// 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) 574// 23. (-anything except 0 and inf) ** (non-integer) is NAN 575// 576// Accuracy: 577// pow(x,y) returns x**y nearly rounded. In particular 578// pow(integer,integer) 579// always returns the correct integer provided it is 580// representable. 581// 582// Constants : 583// The hexadecimal values are the intended ones for the following 584// constants. The decimal values may be used, provided that the 585// compiler will convert from decimal to binary accurately enough 586// to produce the hexadecimal values shown. 587 588pub fn powd(x: f64, y: f64) -> f64 { 589 const BP: [f64; 2] = [1.0, 1.5]; 590 const DP_H: [f64; 2] = [0.0, 5.84962487220764160156e-01]; /* 0x3fe2b803_40000000 */ 591 const DP_L: [f64; 2] = [0.0, 1.35003920212974897128e-08]; /* 0x3E4CFDEB, 0x43CFD006 */ 592 const TWO53: f64 = 9007199254740992.0; /* 0x43400000_00000000 */ 593 const HUGE: f64 = 1.0e300; 594 const TINY: f64 = 1.0e-300; 595 596 // poly coefs for (3/2)*(log(x)-2s-2/3*s**3: 597 const L1: f64 = 5.99999999999994648725e-01; /* 0x3fe33333_33333303 */ 598 const L2: f64 = 4.28571428578550184252e-01; /* 0x3fdb6db6_db6fabff */ 599 const L3: f64 = 3.33333329818377432918e-01; /* 0x3fd55555_518f264d */ 600 const L4: f64 = 2.72728123808534006489e-01; /* 0x3fd17460_a91d4101 */ 601 const L5: f64 = 2.30660745775561754067e-01; /* 0x3fcd864a_93c9db65 */ 602 const L6: f64 = 2.06975017800338417784e-01; /* 0x3fca7e28_4a454eef */ 603 const P1: f64 = 1.66666666666666019037e-01; /* 0x3fc55555_5555553e */ 604 const P2: f64 = -2.77777777770155933842e-03; /* 0xbf66c16c_16bebd93 */ 605 const P3: f64 = 6.61375632143793436117e-05; /* 0x3f11566a_af25de2c */ 606 const P4: f64 = -1.65339022054652515390e-06; /* 0xbebbbd41_c5d26bf1 */ 607 const P5: f64 = 4.13813679705723846039e-08; /* 0x3e663769_72bea4d0 */ 608 const LG2: f64 = 6.93147180559945286227e-01; /* 0x3fe62e42_fefa39ef */ 609 const LG2_H: f64 = 6.93147182464599609375e-01; /* 0x3fe62e43_00000000 */ 610 const LG2_L: f64 = -1.90465429995776804525e-09; /* 0xbe205c61_0ca86c39 */ 611 const OVT: f64 = 8.0085662595372944372e-017; /* -(1024-log2(ovfl+.5ulp)) */ 612 const CP: f64 = 9.61796693925975554329e-01; /* 0x3feec709_dc3a03fd =2/(3ln2) */ 613 const CP_H: f64 = 9.61796700954437255859e-01; /* 0x3feec709_e0000000 =(float)cp */ 614 const CP_L: f64 = -7.02846165095275826516e-09; /* 0xbe3e2fe0_145b01f5 =tail of cp_h*/ 615 const IVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547_652b82fe =1/ln2 */ 616 const IVLN2_H: f64 = 1.44269502162933349609e+00; /* 0x3ff71547_60000000 =24b 1/ln2*/ 617 const IVLN2_L: f64 = 1.92596299112661746887e-08; /* 0x3e54ae0b_f85ddf44 =1/ln2 tail*/ 618 619 let t1: f64; 620 let t2: f64; 621 622 let (hx, lx): (i32, u32) = ((x.to_bits() >> 32) as i32, x.to_bits() as u32); 623 let (hy, ly): (i32, u32) = ((y.to_bits() >> 32) as i32, y.to_bits() as u32); 624 625 let mut ix: i32 = (hx & 0x7fffffff) as i32; 626 let iy: i32 = (hy & 0x7fffffff) as i32; 627 628 /* x**0 = 1, even if x is NaN */ 629 if ((iy as u32) | ly) == 0 { 630 return 1.0; 631 } 632 633 /* 1**y = 1, even if y is NaN */ 634 if hx == 0x3ff00000 && lx == 0 { 635 return 1.0; 636 } 637 638 /* NaN if either arg is NaN */ 639 if ix > 0x7ff00000 640 || (ix == 0x7ff00000 && lx != 0) 641 || iy > 0x7ff00000 642 || (iy == 0x7ff00000 && ly != 0) 643 { 644 return x + y; 645 } 646 647 /* determine if y is an odd int when x < 0 648 * yisint = 0 ... y is not an integer 649 * yisint = 1 ... y is an odd int 650 * yisint = 2 ... y is an even int 651 */ 652 let mut yisint: i32 = 0; 653 let mut k: i32; 654 let mut j: i32; 655 if hx < 0 { 656 if iy >= 0x43400000 { 657 yisint = 2; /* even integer y */ 658 } else if iy >= 0x3ff00000 { 659 k = (iy >> 20) - 0x3ff; /* exponent */ 660 661 if k > 20 { 662 j = (ly >> (52 - k)) as i32; 663 664 if (j << (52 - k)) == (ly as i32) { 665 yisint = 2 - (j & 1); 666 } 667 } else if ly == 0 { 668 j = iy >> (20 - k); 669 670 if (j << (20 - k)) == iy { 671 yisint = 2 - (j & 1); 672 } 673 } 674 } 675 } 676 677 if ly == 0 { 678 /* special value of y */ 679 if iy == 0x7ff00000 { 680 /* y is +-inf */ 681 682 return if ((ix - 0x3ff00000) | (lx as i32)) == 0 { 683 /* (-1)**+-inf is 1 */ 684 1.0 685 } else if ix >= 0x3ff00000 { 686 /* (|x|>1)**+-inf = inf,0 */ 687 if hy >= 0 { 688 y 689 } else { 690 0.0 691 } 692 } else { 693 /* (|x|<1)**+-inf = 0,inf */ 694 if hy >= 0 { 695 0.0 696 } else { 697 -y 698 } 699 }; 700 } 701 702 if iy == 0x3ff00000 { 703 /* y is +-1 */ 704 return if hy >= 0 { 705 x 706 } else { 707 1.0 / x 708 }; 709 } 710 711 if hy == 0x40000000 { 712 /* y is 2 */ 713 return x * x; 714 } 715 716 if hy == 0x3fe00000 { 717 /* y is 0.5 */ 718 if hx >= 0 { 719 /* x >= +0 */ 720 return sqrtd(x); 721 } 722 } 723 } 724 725 let mut ax: f64 = fabsd(x); 726 if lx == 0 { 727 /* special value of x */ 728 if ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000 { 729 /* x is +-0,+-inf,+-1 */ 730 let mut z: f64 = ax; 731 732 if hy < 0 { 733 /* z = (1/|x|) */ 734 z = 1.0 / z; 735 } 736 737 if hx < 0 { 738 if ((ix - 0x3ff00000) | yisint) == 0 { 739 z = (z - z) / (z - z); /* (-1)**non-int is NaN */ 740 } else if yisint == 1 { 741 z = -z; /* (x<0)**odd = -(|x|**odd) */ 742 } 743 } 744 745 return z; 746 } 747 } 748 749 let mut s: f64 = 1.0; /* sign of result */ 750 if hx < 0 { 751 if yisint == 0 { 752 /* (x<0)**(non-int) is NaN */ 753 return (x - x) / (x - x); 754 } 755 756 if yisint == 1 { 757 /* (x<0)**(odd int) */ 758 s = -1.0; 759 } 760 } 761 762 /* |y| is HUGE */ 763 if iy > 0x41e00000 { 764 /* if |y| > 2**31 */ 765 if iy > 0x43f00000 { 766 /* if |y| > 2**64, must o/uflow */ 767 if ix <= 0x3fefffff { 768 return if hy < 0 { 769 HUGE * HUGE 770 } else { 771 TINY * TINY 772 }; 773 } 774 775 if ix >= 0x3ff00000 { 776 return if hy > 0 { 777 HUGE * HUGE 778 } else { 779 TINY * TINY 780 }; 781 } 782 } 783 784 /* over/underflow if x is not close to one */ 785 if ix < 0x3fefffff { 786 return if hy < 0 { 787 s * HUGE * HUGE 788 } else { 789 s * TINY * TINY 790 }; 791 } 792 if ix > 0x3ff00000 { 793 return if hy > 0 { 794 s * HUGE * HUGE 795 } else { 796 s * TINY * TINY 797 }; 798 } 799 800 /* now |1-x| is TINY <= 2**-20, suffice to compute 801 log(x) by x-x^2/2+x^3/3-x^4/4 */ 802 let t: f64 = ax - 1.0; /* t has 20 trailing zeros */ 803 let w: f64 = (t * t) * (0.5 - t * (0.3333333333333333333333 - t * 0.25)); 804 let u: f64 = IVLN2_H * t; /* ivln2_h has 21 sig. bits */ 805 let v: f64 = t * IVLN2_L - w * IVLN2; 806 t1 = with_set_low_word(u + v, 0); 807 t2 = v - (t1 - u); 808 } else { 809 // double ss,s2,s_h,s_l,t_h,t_l; 810 let mut n: i32 = 0; 811 812 if ix < 0x00100000 { 813 /* take care subnormal number */ 814 ax *= TWO53; 815 n -= 53; 816 ix = get_high_word(ax) as i32; 817 } 818 819 n += (ix >> 20) - 0x3ff; 820 j = ix & 0x000fffff; 821 822 /* determine interval */ 823 let k: i32; 824 ix = j | 0x3ff00000; /* normalize ix */ 825 if j <= 0x3988E { 826 /* |x|<sqrt(3/2) */ 827 k = 0; 828 } else if j < 0xBB67A { 829 /* |x|<sqrt(3) */ 830 k = 1; 831 } else { 832 k = 0; 833 n += 1; 834 ix -= 0x00100000; 835 } 836 ax = with_set_high_word(ax, ix as u32); 837 838 /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ 839 let u: f64 = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */ 840 let v: f64 = 1.0 / (ax + i!(BP, k as usize)); 841 let ss: f64 = u * v; 842 let s_h = with_set_low_word(ss, 0); 843 844 /* t_h=ax+bp[k] High */ 845 let t_h: f64 = with_set_high_word( 846 0.0, 847 ((ix as u32 >> 1) | 0x20000000) + 0x00080000 + ((k as u32) << 18), 848 ); 849 let t_l: f64 = ax - (t_h - i!(BP, k as usize)); 850 let s_l: f64 = v * ((u - s_h * t_h) - s_h * t_l); 851 852 /* compute log(ax) */ 853 let s2: f64 = ss * ss; 854 let mut r: f64 = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); 855 r += s_l * (s_h + ss); 856 let s2: f64 = s_h * s_h; 857 let t_h: f64 = with_set_low_word(3.0 + s2 + r, 0); 858 let t_l: f64 = r - ((t_h - 3.0) - s2); 859 860 /* u+v = ss*(1+...) */ 861 let u: f64 = s_h * t_h; 862 let v: f64 = s_l * t_h + t_l * ss; 863 864 /* 2/(3log2)*(ss+...) */ 865 let p_h: f64 = with_set_low_word(u + v, 0); 866 let p_l = v - (p_h - u); 867 let z_h: f64 = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ 868 let z_l: f64 = CP_L * p_h + p_l * CP + i!(DP_L, k as usize); 869 870 /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ 871 let t: f64 = n as f64; 872 t1 = with_set_low_word(((z_h + z_l) + i!(DP_H, k as usize)) + t, 0); 873 t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h); 874 } 875 876 /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ 877 let y1: f64 = with_set_low_word(y, 0); 878 let p_l: f64 = (y - y1) * t1 + y * t2; 879 let mut p_h: f64 = y1 * t1; 880 let z: f64 = p_l + p_h; 881 let mut j: i32 = (z.to_bits() >> 32) as i32; 882 let i: i32 = z.to_bits() as i32; 883 // let (j, i): (i32, i32) = ((z.to_bits() >> 32) as i32, z.to_bits() as i32); 884 885 if j >= 0x40900000 { 886 /* z >= 1024 */ 887 if (j - 0x40900000) | i != 0 { 888 /* if z > 1024 */ 889 return s * HUGE * HUGE; /* overflow */ 890 } 891 892 if p_l + OVT > z - p_h { 893 return s * HUGE * HUGE; /* overflow */ 894 } 895 } else if (j & 0x7fffffff) >= 0x4090cc00 { 896 /* z <= -1075 */ 897 // FIXME: instead of abs(j) use unsigned j 898 899 if (((j as u32) - 0xc090cc00) | (i as u32)) != 0 { 900 /* z < -1075 */ 901 return s * TINY * TINY; /* underflow */ 902 } 903 904 if p_l <= z - p_h { 905 return s * TINY * TINY; /* underflow */ 906 } 907 } 908 909 /* compute 2**(p_h+p_l) */ 910 let i: i32 = j & (0x7fffffff as i32); 911 k = (i >> 20) - 0x3ff; 912 let mut n: i32 = 0; 913 914 if i > 0x3fe00000 { 915 /* if |z| > 0.5, set n = [z+0.5] */ 916 n = j + (0x00100000 >> (k + 1)); 917 k = ((n & 0x7fffffff) >> 20) - 0x3ff; /* new k for n */ 918 let t: f64 = with_set_high_word(0.0, (n & !(0x000fffff >> k)) as u32); 919 n = ((n & 0x000fffff) | 0x00100000) >> (20 - k); 920 if j < 0 { 921 n = -n; 922 } 923 p_h -= t; 924 } 925 926 let t: f64 = with_set_low_word(p_l + p_h, 0); 927 let u: f64 = t * LG2_H; 928 let v: f64 = (p_l - (t - p_h)) * LG2 + t * LG2_L; 929 let mut z: f64 = u + v; 930 let w: f64 = v - (z - u); 931 let t: f64 = z * z; 932 let t1: f64 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); 933 let r: f64 = (z * t1) / (t1 - 2.0) - (w + z * w); 934 z = 1.0 - (r - z); 935 j = get_high_word(z) as i32; 936 j += n << 20; 937 938 if (j >> 20) <= 0 { 939 /* subnormal output */ 940 z = scalbnd(z, n); 941 } else { 942 z = with_set_high_word(z, j as u32); 943 } 944 945 s * z 946} 947 948/// Absolute value (magnitude) (f64) 949/// Calculates the absolute value (magnitude) of the argument `x`, 950/// by direct manipulation of the bit representation of `x`. 951pub fn fabsd(x: f64) -> f64 { 952 f64::from_bits(x.to_bits() & (u64::MAX / 2)) 953} 954 955pub fn scalbnd(x: f64, mut n: i32) -> f64 { 956 let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 957 let x1p53 = f64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53 958 let x1p_1022 = f64::from_bits(0x0010000000000000); // 0x1p-1022 === 2 ^ (-1022) 959 960 let mut y = x; 961 962 if n > 1023 { 963 y *= x1p1023; 964 n -= 1023; 965 if n > 1023 { 966 y *= x1p1023; 967 n -= 1023; 968 if n > 1023 { 969 n = 1023; 970 } 971 } 972 } else if n < -1022 { 973 /* make sure final n < -53 to avoid double 974 rounding in the subnormal range */ 975 y *= x1p_1022 * x1p53; 976 n += 1022 - 53; 977 if n < -1022 { 978 y *= x1p_1022 * x1p53; 979 n += 1022 - 53; 980 if n < -1022 { 981 n = -1022; 982 } 983 } 984 } 985 y * f64::from_bits(((0x3ff + n) as u64) << 52) 986} 987 988/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */ 989/* 990 * ==================================================== 991 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 992 * 993 * Developed at SunSoft, a Sun Microsystems, Inc. business. 994 * Permission to use, copy, modify, and distribute this 995 * software is freely granted, provided that this notice 996 * is preserved. 997 * ==================================================== 998 */ 999/* sqrt(x) 1000 * Return correctly rounded sqrt. 1001 * ------------------------------------------ 1002 * | Use the hardware sqrt if you have one | 1003 * ------------------------------------------ 1004 * Method: 1005 * Bit by bit method using integer arithmetic. (Slow, but portable) 1006 * 1. Normalization 1007 * Scale x to y in [1,4) with even powers of 2: 1008 * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then 1009 * sqrt(x) = 2^k * sqrt(y) 1010 * 2. Bit by bit computation 1011 * Let q = sqrt(y) truncated to i bit after binary point (q = 1), 1012 * i 0 1013 * i+1 2 1014 * s = 2*q , and y = 2 * ( y - q ). (1) 1015 * i i i i 1016 * 1017 * To compute q from q , one checks whether 1018 * i+1 i 1019 * 1020 * -(i+1) 2 1021 * (q + 2 ) <= y. (2) 1022 * i 1023 * -(i+1) 1024 * If (2) is false, then q = q ; otherwise q = q + 2 . 1025 * i+1 i i+1 i 1026 * 1027 * With some algebraic manipulation, it is not difficult to see 1028 * that (2) is equivalent to 1029 * -(i+1) 1030 * s + 2 <= y (3) 1031 * i i 1032 * 1033 * The advantage of (3) is that s and y can be computed by 1034 * i i 1035 * the following recurrence formula: 1036 * if (3) is false 1037 * 1038 * s = s , y = y ; (4) 1039 * i+1 i i+1 i 1040 * 1041 * otherwise, 1042 * -i -(i+1) 1043 * s = s + 2 , y = y - s - 2 (5) 1044 * i+1 i i+1 i i 1045 * 1046 * One may easily use induction to prove (4) and (5). 1047 * Note. Since the left hand side of (3) contain only i+2 bits, 1048 * it does not necessary to do a full (53-bit) comparison 1049 * in (3). 1050 * 3. Final rounding 1051 * After generating the 53 bits result, we compute one more bit. 1052 * Together with the remainder, we can decide whether the 1053 * result is exact, bigger than 1/2ulp, or less than 1/2ulp 1054 * (it will never equal to 1/2ulp). 1055 * The rounding mode can be detected by checking whether 1056 * huge + tiny is equal to huge, and whether huge - tiny is 1057 * equal to huge for some floating point number "huge" and "tiny". 1058 * 1059 * Special cases: 1060 * sqrt(+-0) = +-0 ... exact 1061 * sqrt(inf) = inf 1062 * sqrt(-ve) = NaN ... with invalid signal 1063 * sqrt(NaN) = NaN ... with invalid signal for signaling NaN 1064 */ 1065 1066pub fn sqrtd(x: f64) -> f64 { 1067 #[cfg(target_feature = "sse2")] 1068 { 1069 // Note: This path is unlikely since LLVM will usually have already 1070 // optimized sqrt calls into hardware instructions if sse2 is available, 1071 // but if someone does end up here they'll apprected the speed increase. 1072 #[cfg(target_arch = "x86")] 1073 use core::arch::x86::*; 1074 #[cfg(target_arch = "x86_64")] 1075 use core::arch::x86_64::*; 1076 // SAFETY: safe, since `_mm_set_sd` takes a 64-bit float, and returns 1077 // a 128-bit type with the lowest 64-bits as `x`, `_mm_sqrt_ss` calculates 1078 // the sqrt of this 128-bit vector, and `_mm_cvtss_f64` extracts the lower 1079 // 64-bits as a 64-bit float. 1080 unsafe { 1081 let m = _mm_set_sd(x); 1082 let m_sqrt = _mm_sqrt_pd(m); 1083 _mm_cvtsd_f64(m_sqrt) 1084 } 1085 } 1086 #[cfg(not(target_feature = "sse2"))] 1087 { 1088 use core::num::Wrapping; 1089 1090 const TINY: f64 = 1.0e-300; 1091 1092 let mut z: f64; 1093 let sign: Wrapping<u32> = Wrapping(0x80000000); 1094 let mut ix0: i32; 1095 let mut s0: i32; 1096 let mut q: i32; 1097 let mut m: i32; 1098 let mut t: i32; 1099 let mut i: i32; 1100 let mut r: Wrapping<u32>; 1101 let mut t1: Wrapping<u32>; 1102 let mut s1: Wrapping<u32>; 1103 let mut ix1: Wrapping<u32>; 1104 let mut q1: Wrapping<u32>; 1105 1106 ix0 = (x.to_bits() >> 32) as i32; 1107 ix1 = Wrapping(x.to_bits() as u32); 1108 1109 /* take care of Inf and NaN */ 1110 if (ix0 & 0x7ff00000) == 0x7ff00000 { 1111 return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ 1112 } 1113 /* take care of zero */ 1114 if ix0 <= 0 { 1115 if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { 1116 return x; /* sqrt(+-0) = +-0 */ 1117 } 1118 if ix0 < 0 { 1119 return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ 1120 } 1121 } 1122 /* normalize x */ 1123 m = ix0 >> 20; 1124 if m == 0 { 1125 /* subnormal x */ 1126 while ix0 == 0 { 1127 m -= 21; 1128 ix0 |= (ix1 >> 11).0 as i32; 1129 ix1 <<= 21; 1130 } 1131 i = 0; 1132 while (ix0 & 0x00100000) == 0 { 1133 i += 1; 1134 ix0 <<= 1; 1135 } 1136 m -= i - 1; 1137 ix0 |= (ix1 >> (32 - i) as usize).0 as i32; 1138 ix1 = ix1 << i as usize; 1139 } 1140 m -= 1023; /* unbias exponent */ 1141 ix0 = (ix0 & 0x000fffff) | 0x00100000; 1142 if (m & 1) == 1 { 1143 /* odd m, double x to make it even */ 1144 ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; 1145 ix1 += ix1; 1146 } 1147 m >>= 1; /* m = [m/2] */ 1148 1149 /* generate sqrt(x) bit by bit */ 1150 ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; 1151 ix1 += ix1; 1152 q = 0; /* [q,q1] = sqrt(x) */ 1153 q1 = Wrapping(0); 1154 s0 = 0; 1155 s1 = Wrapping(0); 1156 r = Wrapping(0x00200000); /* r = moving bit from right to left */ 1157 1158 while r != Wrapping(0) { 1159 t = s0 + r.0 as i32; 1160 if t <= ix0 { 1161 s0 = t + r.0 as i32; 1162 ix0 -= t; 1163 q += r.0 as i32; 1164 } 1165 ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; 1166 ix1 += ix1; 1167 r >>= 1; 1168 } 1169 1170 r = sign; 1171 while r != Wrapping(0) { 1172 t1 = s1 + r; 1173 t = s0; 1174 if t < ix0 || (t == ix0 && t1 <= ix1) { 1175 s1 = t1 + r; 1176 if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { 1177 s0 += 1; 1178 } 1179 ix0 -= t; 1180 if ix1 < t1 { 1181 ix0 -= 1; 1182 } 1183 ix1 -= t1; 1184 q1 += r; 1185 } 1186 ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; 1187 ix1 += ix1; 1188 r >>= 1; 1189 } 1190 1191 /* use floating add to find out rounding direction */ 1192 if (ix0 as u32 | ix1.0) != 0 { 1193 z = 1.0 - TINY; /* raise inexact flag */ 1194 if z >= 1.0 { 1195 z = 1.0 + TINY; 1196 if q1.0 == 0xffffffff { 1197 q1 = Wrapping(0); 1198 q += 1; 1199 } else if z > 1.0 { 1200 if q1.0 == 0xfffffffe { 1201 q += 1; 1202 } 1203 q1 += Wrapping(2); 1204 } else { 1205 q1 += q1 & Wrapping(1); 1206 } 1207 } 1208 } 1209 ix0 = (q >> 1) + 0x3fe00000; 1210 ix1 = q1 >> 1; 1211 if (q & 1) == 1 { 1212 ix1 |= sign; 1213 } 1214 ix0 += m << 20; 1215 f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) 1216 } 1217} 1218 1219#[inline] 1220fn get_high_word(x: f64) -> u32 { 1221 (x.to_bits() >> 32) as u32 1222} 1223 1224#[inline] 1225fn with_set_high_word(f: f64, hi: u32) -> f64 { 1226 let mut tmp = f.to_bits(); 1227 tmp &= 0x00000000_ffffffff; 1228 tmp |= (hi as u64) << 32; 1229 f64::from_bits(tmp) 1230} 1231 1232#[inline] 1233fn with_set_low_word(f: f64, lo: u32) -> f64 { 1234 let mut tmp = f.to_bits(); 1235 tmp &= 0xffffffff_00000000; 1236 tmp |= lo as u64; 1237 f64::from_bits(tmp) 1238} 1239