1//! An implementation of Clinger's Bellerophon algorithm. 2//! 3//! This is a moderate path algorithm that uses an extended-precision 4//! float, represented in 80 bits, by calculating the bits of slop 5//! and determining if those bits could prevent unambiguous rounding. 6//! 7//! This algorithm requires less static storage than the Lemire algorithm, 8//! and has decent performance, and is therefore used when non-decimal, 9//! non-power-of-two strings need to be parsed. Clinger's algorithm 10//! is described in depth in "How to Read Floating Point Numbers Accurately.", 11//! available online [here](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.45.4152&rep=rep1&type=pdf). 12//! 13//! This implementation is loosely based off the Golang implementation, 14//! found [here](https://github.com/golang/go/blob/b10849fbb97a2244c086991b4623ae9f32c212d0/src/strconv/extfloat.go). 15//! This code is therefore subject to a 3-clause BSD license. 16 17#![cfg(feature = "compact")] 18#![doc(hidden)] 19 20use crate::extended_float::ExtendedFloat; 21use crate::mask::{lower_n_halfway, lower_n_mask}; 22use crate::num::Float; 23use crate::number::Number; 24use crate::rounding::{round, round_nearest_tie_even}; 25use crate::table::BASE10_POWERS; 26 27// ALGORITHM 28// --------- 29 30/// Core implementation of the Bellerophon algorithm. 31/// 32/// Create an extended-precision float, scale it to the proper radix power, 33/// calculate the bits of slop, and return the representation. The value 34/// will always be guaranteed to be within 1 bit, rounded-down, of the real 35/// value. If a negative exponent is returned, this represents we were 36/// unable to unambiguously round the significant digits. 37/// 38/// This has been modified to return a biased, rather than unbiased exponent. 39pub fn bellerophon<F: Float>(num: &Number) -> ExtendedFloat { 40 let fp_zero = ExtendedFloat { 41 mant: 0, 42 exp: 0, 43 }; 44 let fp_inf = ExtendedFloat { 45 mant: 0, 46 exp: F::INFINITE_POWER, 47 }; 48 49 // Early short-circuit, in case of literal 0 or infinity. 50 // This allows us to avoid narrow casts causing numeric overflow, 51 // and is a quick check for any radix. 52 if num.mantissa == 0 || num.exponent <= -0x1000 { 53 return fp_zero; 54 } else if num.exponent >= 0x1000 { 55 return fp_inf; 56 } 57 58 // Calculate our indexes for our extended-precision multiplication. 59 // This narrowing cast is safe, since exponent must be in a valid range. 60 let exponent = num.exponent as i32 + BASE10_POWERS.bias; 61 let small_index = exponent % BASE10_POWERS.step; 62 let large_index = exponent / BASE10_POWERS.step; 63 64 if exponent < 0 { 65 // Guaranteed underflow (assign 0). 66 return fp_zero; 67 } 68 if large_index as usize >= BASE10_POWERS.large.len() { 69 // Overflow (assign infinity) 70 return fp_inf; 71 } 72 73 // Within the valid exponent range, multiply by the large and small 74 // exponents and return the resulting value. 75 76 // Track errors to as a factor of unit in last-precision. 77 let mut errors: u32 = 0; 78 if num.many_digits { 79 errors += error_halfscale(); 80 } 81 82 // Multiply by the small power. 83 // Check if we can directly multiply by an integer, if not, 84 // use extended-precision multiplication. 85 let mut fp = ExtendedFloat { 86 mant: num.mantissa, 87 exp: 0, 88 }; 89 match fp.mant.overflowing_mul(BASE10_POWERS.get_small_int(small_index as usize)) { 90 // Overflow, multiplication unsuccessful, go slow path. 91 (_, true) => { 92 normalize(&mut fp); 93 fp = mul(&fp, &BASE10_POWERS.get_small(small_index as usize)); 94 errors += error_halfscale(); 95 }, 96 // No overflow, multiplication successful. 97 (mant, false) => { 98 fp.mant = mant; 99 normalize(&mut fp); 100 }, 101 } 102 103 // Multiply by the large power. 104 fp = mul(&fp, &BASE10_POWERS.get_large(large_index as usize)); 105 if errors > 0 { 106 errors += 1; 107 } 108 errors += error_halfscale(); 109 110 // Normalize the floating point (and the errors). 111 let shift = normalize(&mut fp); 112 errors <<= shift; 113 fp.exp += F::EXPONENT_BIAS; 114 115 // Check for literal overflow, even with halfway cases. 116 if -fp.exp + 1 > 65 { 117 return fp_zero; 118 } 119 120 // Too many errors accumulated, return an error. 121 if !error_is_accurate::<F>(errors, &fp) { 122 // Bias the exponent so we know it's invalid. 123 fp.exp += F::INVALID_FP; 124 return fp; 125 } 126 127 // Check if we have a literal 0 or overflow here. 128 // If we have an exponent of -63, we can still have a valid shift, 129 // giving a case where we have too many errors and need to round-up. 130 if -fp.exp + 1 == 65 { 131 // Have more than 64 bits below the minimum exponent, must be 0. 132 return fp_zero; 133 } 134 135 round::<F, _>(&mut fp, |f, s| { 136 round_nearest_tie_even(f, s, |is_odd, is_halfway, is_above| { 137 is_above || (is_odd && is_halfway) 138 }); 139 }); 140 fp 141} 142 143// ERRORS 144// ------ 145 146// Calculate if the errors in calculating the extended-precision float. 147// 148// Specifically, we want to know if we are close to a halfway representation, 149// or halfway between `b` and `b+1`, or `b+h`. The halfway representation 150// has the form: 151// SEEEEEEEHMMMMMMMMMMMMMMMMMMMMMMM100... 152// where: 153// S = Sign Bit 154// E = Exponent Bits 155// H = Hidden Bit 156// M = Mantissa Bits 157// 158// The halfway representation has a bit set 1-after the mantissa digits, 159// and no bits set immediately afterward, making it impossible to 160// round between `b` and `b+1` with this representation. 161 162/// Get the full error scale. 163#[inline(always)] 164const fn error_scale() -> u32 { 165 8 166} 167 168/// Get the half error scale. 169#[inline(always)] 170const fn error_halfscale() -> u32 { 171 error_scale() / 2 172} 173 174/// Determine if the number of errors is tolerable for float precision. 175fn error_is_accurate<F: Float>(errors: u32, fp: &ExtendedFloat) -> bool { 176 // Check we can't have a literal 0 denormal float. 177 debug_assert!(fp.exp >= -64); 178 179 // Determine if extended-precision float is a good approximation. 180 // If the error has affected too many units, the float will be 181 // inaccurate, or if the representation is too close to halfway 182 // that any operations could affect this halfway representation. 183 // See the documentation for dtoa for more information. 184 185 // This is always a valid u32, since `fp.exp >= -64` 186 // will always be positive and the significand size is {23, 52}. 187 let mantissa_shift = 64 - F::MANTISSA_SIZE - 1; 188 189 // The unbiased exponent checks is `unbiased_exp <= F::MANTISSA_SIZE 190 // - F::EXPONENT_BIAS -64 + 1`, or `biased_exp <= F::MANTISSA_SIZE - 63`, 191 // or `biased_exp <= mantissa_shift`. 192 let extrabits = match fp.exp <= -mantissa_shift { 193 // Denormal, since shifting to the hidden bit still has a negative exponent. 194 // The unbiased check calculation for bits is `1 - F::EXPONENT_BIAS - unbiased_exp`, 195 // or `1 - biased_exp`. 196 true => 1 - fp.exp, 197 false => 64 - F::MANTISSA_SIZE - 1, 198 }; 199 200 // Our logic is as follows: we want to determine if the actual 201 // mantissa and the errors during calculation differ significantly 202 // from the rounding point. The rounding point for round-nearest 203 // is the halfway point, IE, this when the truncated bits start 204 // with b1000..., while the rounding point for the round-toward 205 // is when the truncated bits are equal to 0. 206 // To do so, we can check whether the rounding point +/- the error 207 // are >/< the actual lower n bits. 208 // 209 // For whether we need to use signed or unsigned types for this 210 // analysis, see this example, using u8 rather than u64 to simplify 211 // things. 212 // 213 // # Comparisons 214 // cmp1 = (halfway - errors) < extra 215 // cmp1 = extra < (halfway + errors) 216 // 217 // # Large Extrabits, Low Errors 218 // 219 // extrabits = 8 220 // halfway = 0b10000000 221 // extra = 0b10000010 222 // errors = 0b00000100 223 // halfway - errors = 0b01111100 224 // halfway + errors = 0b10000100 225 // 226 // Unsigned: 227 // halfway - errors = 124 228 // halfway + errors = 132 229 // extra = 130 230 // cmp1 = true 231 // cmp2 = true 232 // Signed: 233 // halfway - errors = 124 234 // halfway + errors = -124 235 // extra = -126 236 // cmp1 = false 237 // cmp2 = true 238 // 239 // # Conclusion 240 // 241 // Since errors will always be small, and since we want to detect 242 // if the representation is accurate, we need to use an **unsigned** 243 // type for comparisons. 244 let maskbits = extrabits as u64; 245 let errors = errors as u64; 246 247 // Round-to-nearest, need to use the halfway point. 248 if extrabits > 64 { 249 // Underflow, we have a shift larger than the mantissa. 250 // Representation is valid **only** if the value is close enough 251 // overflow to the next bit within errors. If it overflows, 252 // the representation is **not** valid. 253 !fp.mant.overflowing_add(errors).1 254 } else { 255 let mask = lower_n_mask(maskbits); 256 let extra = fp.mant & mask; 257 258 // Round-to-nearest, need to check if we're close to halfway. 259 // IE, b10100 | 100000, where `|` signifies the truncation point. 260 let halfway = lower_n_halfway(maskbits); 261 let cmp1 = halfway.wrapping_sub(errors) < extra; 262 let cmp2 = extra < halfway.wrapping_add(errors); 263 264 // If both comparisons are true, we have significant rounding error, 265 // and the value cannot be exactly represented. Otherwise, the 266 // representation is valid. 267 !(cmp1 && cmp2) 268 } 269} 270 271// MATH 272// ---- 273 274/// Normalize float-point number. 275/// 276/// Shift the mantissa so the number of leading zeros is 0, or the value 277/// itself is 0. 278/// 279/// Get the number of bytes shifted. 280pub fn normalize(fp: &mut ExtendedFloat) -> i32 { 281 // Note: 282 // Using the ctlz intrinsic via leading_zeros is way faster (~10x) 283 // than shifting 1-bit at a time, via while loop, and also way 284 // faster (~2x) than an unrolled loop that checks at 32, 16, 4, 285 // 2, and 1 bit. 286 // 287 // Using a modulus of pow2 (which will get optimized to a bitwise 288 // and with 0x3F or faster) is slightly slower than an if/then, 289 // however, removing the if/then will likely optimize more branched 290 // code as it removes conditional logic. 291 292 // Calculate the number of leading zeros, and then zero-out 293 // any overflowing bits, to avoid shl overflow when self.mant == 0. 294 if fp.mant != 0 { 295 let shift = fp.mant.leading_zeros() as i32; 296 fp.mant <<= shift; 297 fp.exp -= shift; 298 shift 299 } else { 300 0 301 } 302} 303 304/// Multiply two normalized extended-precision floats, as if by `a*b`. 305/// 306/// The precision is maximal when the numbers are normalized, however, 307/// decent precision will occur as long as both values have high bits 308/// set. The result is not normalized. 309/// 310/// Algorithm: 311/// 1. Non-signed multiplication of mantissas (requires 2x as many bits as input). 312/// 2. Normalization of the result (not done here). 313/// 3. Addition of exponents. 314pub fn mul(x: &ExtendedFloat, y: &ExtendedFloat) -> ExtendedFloat { 315 // Logic check, values must be decently normalized prior to multiplication. 316 debug_assert!(x.mant >> 32 != 0); 317 debug_assert!(y.mant >> 32 != 0); 318 319 // Extract high-and-low masks. 320 // Mask is u32::MAX for older Rustc versions. 321 const LOMASK: u64 = 0xffff_ffff; 322 let x1 = x.mant >> 32; 323 let x0 = x.mant & LOMASK; 324 let y1 = y.mant >> 32; 325 let y0 = y.mant & LOMASK; 326 327 // Get our products 328 let x1_y0 = x1 * y0; 329 let x0_y1 = x0 * y1; 330 let x0_y0 = x0 * y0; 331 let x1_y1 = x1 * y1; 332 333 let mut tmp = (x1_y0 & LOMASK) + (x0_y1 & LOMASK) + (x0_y0 >> 32); 334 // round up 335 tmp += 1 << (32 - 1); 336 337 ExtendedFloat { 338 mant: x1_y1 + (x1_y0 >> 32) + (x0_y1 >> 32) + (tmp >> 32), 339 exp: x.exp + y.exp + 64, 340 } 341} 342 343// POWERS 344// ------ 345 346/// Precalculated powers of base N for the Bellerophon algorithm. 347pub struct BellerophonPowers { 348 // Pre-calculated small powers. 349 pub small: &'static [u64], 350 // Pre-calculated large powers. 351 pub large: &'static [u64], 352 /// Pre-calculated small powers as 64-bit integers 353 pub small_int: &'static [u64], 354 // Step between large powers and number of small powers. 355 pub step: i32, 356 // Exponent bias for the large powers. 357 pub bias: i32, 358 /// ceil(log2(radix)) scaled as a multiplier. 359 pub log2: i64, 360 /// Bitshift for the log2 multiplier. 361 pub log2_shift: i32, 362} 363 364/// Allow indexing of values without bounds checking 365impl BellerophonPowers { 366 #[inline] 367 pub fn get_small(&self, index: usize) -> ExtendedFloat { 368 let mant = self.small[index]; 369 let exp = (1 - 64) + ((self.log2 * index as i64) >> self.log2_shift); 370 ExtendedFloat { 371 mant, 372 exp: exp as i32, 373 } 374 } 375 376 #[inline] 377 pub fn get_large(&self, index: usize) -> ExtendedFloat { 378 let mant = self.large[index]; 379 let biased_e = index as i64 * self.step as i64 - self.bias as i64; 380 let exp = (1 - 64) + ((self.log2 * biased_e) >> self.log2_shift); 381 ExtendedFloat { 382 mant, 383 exp: exp as i32, 384 } 385 } 386 387 #[inline] 388 pub fn get_small_int(&self, index: usize) -> u64 { 389 self.small_int[index] 390 } 391} 392