1//! An implementation of Clinger's Bellerophon algorithm.
2//!
3//! This is a moderate path algorithm that uses an extended-precision
4//! float, represented in 80 bits, by calculating the bits of slop
5//! and determining if those bits could prevent unambiguous rounding.
6//!
7//! This algorithm requires less static storage than the Lemire algorithm,
8//! and has decent performance, and is therefore used when non-decimal,
9//! non-power-of-two strings need to be parsed. Clinger's algorithm
10//! is described in depth in "How to Read Floating Point Numbers Accurately.",
11//! available online [here](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.45.4152&rep=rep1&type=pdf).
12//!
13//! This implementation is loosely based off the Golang implementation,
14//! found [here](https://github.com/golang/go/blob/b10849fbb97a2244c086991b4623ae9f32c212d0/src/strconv/extfloat.go).
15//! This code is therefore subject to a 3-clause BSD license.
16
17#![cfg(feature = "compact")]
18#![doc(hidden)]
19
20use crate::extended_float::ExtendedFloat;
21use crate::mask::{lower_n_halfway, lower_n_mask};
22use crate::num::Float;
23use crate::number::Number;
24use crate::rounding::{round, round_nearest_tie_even};
25use crate::table::BASE10_POWERS;
26
27// ALGORITHM
28// ---------
29
30/// Core implementation of the Bellerophon algorithm.
31///
32/// Create an extended-precision float, scale it to the proper radix power,
33/// calculate the bits of slop, and return the representation. The value
34/// will always be guaranteed to be within 1 bit, rounded-down, of the real
35/// value. If a negative exponent is returned, this represents we were
36/// unable to unambiguously round the significant digits.
37///
38/// This has been modified to return a biased, rather than unbiased exponent.
39pub fn bellerophon<F: Float>(num: &Number) -> ExtendedFloat {
40    let fp_zero = ExtendedFloat {
41        mant: 0,
42        exp: 0,
43    };
44    let fp_inf = ExtendedFloat {
45        mant: 0,
46        exp: F::INFINITE_POWER,
47    };
48
49    // Early short-circuit, in case of literal 0 or infinity.
50    // This allows us to avoid narrow casts causing numeric overflow,
51    // and is a quick check for any radix.
52    if num.mantissa == 0 || num.exponent <= -0x1000 {
53        return fp_zero;
54    } else if num.exponent >= 0x1000 {
55        return fp_inf;
56    }
57
58    // Calculate our indexes for our extended-precision multiplication.
59    // This narrowing cast is safe, since exponent must be in a valid range.
60    let exponent = num.exponent as i32 + BASE10_POWERS.bias;
61    let small_index = exponent % BASE10_POWERS.step;
62    let large_index = exponent / BASE10_POWERS.step;
63
64    if exponent < 0 {
65        // Guaranteed underflow (assign 0).
66        return fp_zero;
67    }
68    if large_index as usize >= BASE10_POWERS.large.len() {
69        // Overflow (assign infinity)
70        return fp_inf;
71    }
72
73    // Within the valid exponent range, multiply by the large and small
74    // exponents and return the resulting value.
75
76    // Track errors to as a factor of unit in last-precision.
77    let mut errors: u32 = 0;
78    if num.many_digits {
79        errors += error_halfscale();
80    }
81
82    // Multiply by the small power.
83    // Check if we can directly multiply by an integer, if not,
84    // use extended-precision multiplication.
85    let mut fp = ExtendedFloat {
86        mant: num.mantissa,
87        exp: 0,
88    };
89    match fp.mant.overflowing_mul(BASE10_POWERS.get_small_int(small_index as usize)) {
90        // Overflow, multiplication unsuccessful, go slow path.
91        (_, true) => {
92            normalize(&mut fp);
93            fp = mul(&fp, &BASE10_POWERS.get_small(small_index as usize));
94            errors += error_halfscale();
95        },
96        // No overflow, multiplication successful.
97        (mant, false) => {
98            fp.mant = mant;
99            normalize(&mut fp);
100        },
101    }
102
103    // Multiply by the large power.
104    fp = mul(&fp, &BASE10_POWERS.get_large(large_index as usize));
105    if errors > 0 {
106        errors += 1;
107    }
108    errors += error_halfscale();
109
110    // Normalize the floating point (and the errors).
111    let shift = normalize(&mut fp);
112    errors <<= shift;
113    fp.exp += F::EXPONENT_BIAS;
114
115    // Check for literal overflow, even with halfway cases.
116    if -fp.exp + 1 > 65 {
117        return fp_zero;
118    }
119
120    // Too many errors accumulated, return an error.
121    if !error_is_accurate::<F>(errors, &fp) {
122        // Bias the exponent so we know it's invalid.
123        fp.exp += F::INVALID_FP;
124        return fp;
125    }
126
127    // Check if we have a literal 0 or overflow here.
128    // If we have an exponent of -63, we can still have a valid shift,
129    // giving a case where we have too many errors and need to round-up.
130    if -fp.exp + 1 == 65 {
131        // Have more than 64 bits below the minimum exponent, must be 0.
132        return fp_zero;
133    }
134
135    round::<F, _>(&mut fp, |f, s| {
136        round_nearest_tie_even(f, s, |is_odd, is_halfway, is_above| {
137            is_above || (is_odd && is_halfway)
138        });
139    });
140    fp
141}
142
143// ERRORS
144// ------
145
146// Calculate if the errors in calculating the extended-precision float.
147//
148// Specifically, we want to know if we are close to a halfway representation,
149// or halfway between `b` and `b+1`, or `b+h`. The halfway representation
150// has the form:
151//     SEEEEEEEHMMMMMMMMMMMMMMMMMMMMMMM100...
152// where:
153//     S = Sign Bit
154//     E = Exponent Bits
155//     H = Hidden Bit
156//     M = Mantissa Bits
157//
158// The halfway representation has a bit set 1-after the mantissa digits,
159// and no bits set immediately afterward, making it impossible to
160// round between `b` and `b+1` with this representation.
161
162/// Get the full error scale.
163#[inline(always)]
164const fn error_scale() -> u32 {
165    8
166}
167
168/// Get the half error scale.
169#[inline(always)]
170const fn error_halfscale() -> u32 {
171    error_scale() / 2
172}
173
174/// Determine if the number of errors is tolerable for float precision.
175fn error_is_accurate<F: Float>(errors: u32, fp: &ExtendedFloat) -> bool {
176    // Check we can't have a literal 0 denormal float.
177    debug_assert!(fp.exp >= -64);
178
179    // Determine if extended-precision float is a good approximation.
180    // If the error has affected too many units, the float will be
181    // inaccurate, or if the representation is too close to halfway
182    // that any operations could affect this halfway representation.
183    // See the documentation for dtoa for more information.
184
185    // This is always a valid u32, since `fp.exp >= -64`
186    // will always be positive and the significand size is {23, 52}.
187    let mantissa_shift = 64 - F::MANTISSA_SIZE - 1;
188
189    // The unbiased exponent checks is `unbiased_exp <= F::MANTISSA_SIZE
190    // - F::EXPONENT_BIAS -64 + 1`, or `biased_exp <= F::MANTISSA_SIZE - 63`,
191    // or `biased_exp <= mantissa_shift`.
192    let extrabits = match fp.exp <= -mantissa_shift {
193        // Denormal, since shifting to the hidden bit still has a negative exponent.
194        // The unbiased check calculation for bits is `1 - F::EXPONENT_BIAS - unbiased_exp`,
195        // or `1 - biased_exp`.
196        true => 1 - fp.exp,
197        false => 64 - F::MANTISSA_SIZE - 1,
198    };
199
200    // Our logic is as follows: we want to determine if the actual
201    // mantissa and the errors during calculation differ significantly
202    // from the rounding point. The rounding point for round-nearest
203    // is the halfway point, IE, this when the truncated bits start
204    // with b1000..., while the rounding point for the round-toward
205    // is when the truncated bits are equal to 0.
206    // To do so, we can check whether the rounding point +/- the error
207    // are >/< the actual lower n bits.
208    //
209    // For whether we need to use signed or unsigned types for this
210    // analysis, see this example, using u8 rather than u64 to simplify
211    // things.
212    //
213    // # Comparisons
214    //      cmp1 = (halfway - errors) < extra
215    //      cmp1 = extra < (halfway + errors)
216    //
217    // # Large Extrabits, Low Errors
218    //
219    //      extrabits = 8
220    //      halfway          =  0b10000000
221    //      extra            =  0b10000010
222    //      errors           =  0b00000100
223    //      halfway - errors =  0b01111100
224    //      halfway + errors =  0b10000100
225    //
226    //      Unsigned:
227    //          halfway - errors = 124
228    //          halfway + errors = 132
229    //          extra            = 130
230    //          cmp1             = true
231    //          cmp2             = true
232    //      Signed:
233    //          halfway - errors = 124
234    //          halfway + errors = -124
235    //          extra            = -126
236    //          cmp1             = false
237    //          cmp2             = true
238    //
239    // # Conclusion
240    //
241    // Since errors will always be small, and since we want to detect
242    // if the representation is accurate, we need to use an **unsigned**
243    // type for comparisons.
244    let maskbits = extrabits as u64;
245    let errors = errors as u64;
246
247    // Round-to-nearest, need to use the halfway point.
248    if extrabits > 64 {
249        // Underflow, we have a shift larger than the mantissa.
250        // Representation is valid **only** if the value is close enough
251        // overflow to the next bit within errors. If it overflows,
252        // the representation is **not** valid.
253        !fp.mant.overflowing_add(errors).1
254    } else {
255        let mask = lower_n_mask(maskbits);
256        let extra = fp.mant & mask;
257
258        // Round-to-nearest, need to check if we're close to halfway.
259        // IE, b10100 | 100000, where `|` signifies the truncation point.
260        let halfway = lower_n_halfway(maskbits);
261        let cmp1 = halfway.wrapping_sub(errors) < extra;
262        let cmp2 = extra < halfway.wrapping_add(errors);
263
264        // If both comparisons are true, we have significant rounding error,
265        // and the value cannot be exactly represented. Otherwise, the
266        // representation is valid.
267        !(cmp1 && cmp2)
268    }
269}
270
271// MATH
272// ----
273
274/// Normalize float-point number.
275///
276/// Shift the mantissa so the number of leading zeros is 0, or the value
277/// itself is 0.
278///
279/// Get the number of bytes shifted.
280pub fn normalize(fp: &mut ExtendedFloat) -> i32 {
281    // Note:
282    // Using the ctlz intrinsic via leading_zeros is way faster (~10x)
283    // than shifting 1-bit at a time, via while loop, and also way
284    // faster (~2x) than an unrolled loop that checks at 32, 16, 4,
285    // 2, and 1 bit.
286    //
287    // Using a modulus of pow2 (which will get optimized to a bitwise
288    // and with 0x3F or faster) is slightly slower than an if/then,
289    // however, removing the if/then will likely optimize more branched
290    // code as it removes conditional logic.
291
292    // Calculate the number of leading zeros, and then zero-out
293    // any overflowing bits, to avoid shl overflow when self.mant == 0.
294    if fp.mant != 0 {
295        let shift = fp.mant.leading_zeros() as i32;
296        fp.mant <<= shift;
297        fp.exp -= shift;
298        shift
299    } else {
300        0
301    }
302}
303
304/// Multiply two normalized extended-precision floats, as if by `a*b`.
305///
306/// The precision is maximal when the numbers are normalized, however,
307/// decent precision will occur as long as both values have high bits
308/// set. The result is not normalized.
309///
310/// Algorithm:
311///     1. Non-signed multiplication of mantissas (requires 2x as many bits as input).
312///     2. Normalization of the result (not done here).
313///     3. Addition of exponents.
314pub fn mul(x: &ExtendedFloat, y: &ExtendedFloat) -> ExtendedFloat {
315    // Logic check, values must be decently normalized prior to multiplication.
316    debug_assert!(x.mant >> 32 != 0);
317    debug_assert!(y.mant >> 32 != 0);
318
319    // Extract high-and-low masks.
320    // Mask is u32::MAX for older Rustc versions.
321    const LOMASK: u64 = 0xffff_ffff;
322    let x1 = x.mant >> 32;
323    let x0 = x.mant & LOMASK;
324    let y1 = y.mant >> 32;
325    let y0 = y.mant & LOMASK;
326
327    // Get our products
328    let x1_y0 = x1 * y0;
329    let x0_y1 = x0 * y1;
330    let x0_y0 = x0 * y0;
331    let x1_y1 = x1 * y1;
332
333    let mut tmp = (x1_y0 & LOMASK) + (x0_y1 & LOMASK) + (x0_y0 >> 32);
334    // round up
335    tmp += 1 << (32 - 1);
336
337    ExtendedFloat {
338        mant: x1_y1 + (x1_y0 >> 32) + (x0_y1 >> 32) + (tmp >> 32),
339        exp: x.exp + y.exp + 64,
340    }
341}
342
343// POWERS
344// ------
345
346/// Precalculated powers of base N for the Bellerophon algorithm.
347pub struct BellerophonPowers {
348    // Pre-calculated small powers.
349    pub small: &'static [u64],
350    // Pre-calculated large powers.
351    pub large: &'static [u64],
352    /// Pre-calculated small powers as 64-bit integers
353    pub small_int: &'static [u64],
354    // Step between large powers and number of small powers.
355    pub step: i32,
356    // Exponent bias for the large powers.
357    pub bias: i32,
358    /// ceil(log2(radix)) scaled as a multiplier.
359    pub log2: i64,
360    /// Bitshift for the log2 multiplier.
361    pub log2_shift: i32,
362}
363
364/// Allow indexing of values without bounds checking
365impl BellerophonPowers {
366    #[inline]
367    pub fn get_small(&self, index: usize) -> ExtendedFloat {
368        let mant = self.small[index];
369        let exp = (1 - 64) + ((self.log2 * index as i64) >> self.log2_shift);
370        ExtendedFloat {
371            mant,
372            exp: exp as i32,
373        }
374    }
375
376    #[inline]
377    pub fn get_large(&self, index: usize) -> ExtendedFloat {
378        let mant = self.large[index];
379        let biased_e = index as i64 * self.step as i64 - self.bias as i64;
380        let exp = (1 - 64) + ((self.log2 * biased_e) >> self.log2_shift);
381        ExtendedFloat {
382            mant,
383            exp: exp as i32,
384        }
385    }
386
387    #[inline]
388    pub fn get_small_int(&self, index: usize) -> u64 {
389        self.small_int[index]
390    }
391}
392