minimal-lexical/src/parse.rs

cbd624adSopenharmony_ci//! Parse byte iterators to float.
cbd624adSopenharmony_ci
cbd624adSopenharmony_ci#![doc(hidden)]
cbd624adSopenharmony_ci
cbd624adSopenharmony_ci#[cfg(feature = "compact")]
cbd624adSopenharmony_ciuse crate::bellerophon::bellerophon;
cbd624adSopenharmony_ciuse crate::extended_float::{extended_to_float, ExtendedFloat};
cbd624adSopenharmony_ci#[cfg(not(feature = "compact"))]
cbd624adSopenharmony_ciuse crate::lemire::lemire;
cbd624adSopenharmony_ciuse crate::num::Float;
cbd624adSopenharmony_ciuse crate::number::Number;
cbd624adSopenharmony_ciuse crate::slow::slow;
cbd624adSopenharmony_ci
cbd624adSopenharmony_ci/// Try to parse the significant digits quickly.
cbd624adSopenharmony_ci///
cbd624adSopenharmony_ci/// This attempts a very quick parse, to deal with common cases.
cbd624adSopenharmony_ci///
cbd624adSopenharmony_ci/// * `integer`     - Slice containing the integer digits.
cbd624adSopenharmony_ci/// * `fraction`    - Slice containing the fraction digits.
cbd624adSopenharmony_ci#[inline]
cbd624adSopenharmony_cifn parse_number_fast<'a, Iter1, Iter2>(
cbd624adSopenharmony_ci    integer: Iter1,
cbd624adSopenharmony_ci    fraction: Iter2,
cbd624adSopenharmony_ci    exponent: i32,
cbd624adSopenharmony_ci) -> Option<Number>
cbd624adSopenharmony_ciwhere
cbd624adSopenharmony_ci    Iter1: Iterator<Item = &'a u8>,
cbd624adSopenharmony_ci    Iter2: Iterator<Item = &'a u8>,
cbd624adSopenharmony_ci{
cbd624adSopenharmony_ci    let mut num = Number::default();
cbd624adSopenharmony_ci    let mut integer_count: usize = 0;
cbd624adSopenharmony_ci    let mut fraction_count: usize = 0;
cbd624adSopenharmony_ci    for &c in integer {
cbd624adSopenharmony_ci        integer_count += 1;
cbd624adSopenharmony_ci        let digit = c - b'0';
cbd624adSopenharmony_ci        num.mantissa = num.mantissa.wrapping_mul(10).wrapping_add(digit as u64);
cbd624adSopenharmony_ci    }
cbd624adSopenharmony_ci    for &c in fraction {
cbd624adSopenharmony_ci        fraction_count += 1;
cbd624adSopenharmony_ci        let digit = c - b'0';
cbd624adSopenharmony_ci        num.mantissa = num.mantissa.wrapping_mul(10).wrapping_add(digit as u64);
cbd624adSopenharmony_ci    }
cbd624adSopenharmony_ci
cbd624adSopenharmony_ci    if integer_count + fraction_count <= 19 {
cbd624adSopenharmony_ci        // Can't overflow, since must be <= 19.
cbd624adSopenharmony_ci        num.exponent = exponent.saturating_sub(fraction_count as i32);
cbd624adSopenharmony_ci        Some(num)
cbd624adSopenharmony_ci    } else {
cbd624adSopenharmony_ci        None
cbd624adSopenharmony_ci    }
cbd624adSopenharmony_ci}
cbd624adSopenharmony_ci
cbd624adSopenharmony_ci/// Parse the significant digits of the float and adjust the exponent.
cbd624adSopenharmony_ci///
cbd624adSopenharmony_ci/// * `integer`     - Slice containing the integer digits.
cbd624adSopenharmony_ci/// * `fraction`    - Slice containing the fraction digits.
cbd624adSopenharmony_ci#[inline]
cbd624adSopenharmony_cifn parse_number<'a, Iter1, Iter2>(mut integer: Iter1, mut fraction: Iter2, exponent: i32) -> Number
cbd624adSopenharmony_ciwhere
cbd624adSopenharmony_ci    Iter1: Iterator<Item = &'a u8> + Clone,
cbd624adSopenharmony_ci    Iter2: Iterator<Item = &'a u8> + Clone,
cbd624adSopenharmony_ci{
cbd624adSopenharmony_ci    // NOTE: for performance, we do this in 2 passes:
cbd624adSopenharmony_ci    if let Some(num) = parse_number_fast(integer.clone(), fraction.clone(), exponent) {
cbd624adSopenharmony_ci        return num;
cbd624adSopenharmony_ci    }
cbd624adSopenharmony_ci
cbd624adSopenharmony_ci    // Can only add 19 digits.
cbd624adSopenharmony_ci    let mut num = Number::default();
cbd624adSopenharmony_ci    let mut count = 0;
cbd624adSopenharmony_ci    while let Some(&c) = integer.next() {
cbd624adSopenharmony_ci        count += 1;
cbd624adSopenharmony_ci        if count == 20 {
cbd624adSopenharmony_ci            // Only the integer digits affect the exponent.
cbd624adSopenharmony_ci            num.many_digits = true;
cbd624adSopenharmony_ci            num.exponent = exponent.saturating_add(into_i32(1 + integer.count()));
cbd624adSopenharmony_ci            return num;
cbd624adSopenharmony_ci        } else {
cbd624adSopenharmony_ci            let digit = c - b'0';
cbd624adSopenharmony_ci            num.mantissa = num.mantissa * 10 + digit as u64;
cbd624adSopenharmony_ci        }
cbd624adSopenharmony_ci    }
cbd624adSopenharmony_ci
cbd624adSopenharmony_ci    // Skip leading fraction zeros.
cbd624adSopenharmony_ci    // This is required otherwise we might have a 0 mantissa and many digits.
cbd624adSopenharmony_ci    let mut fraction_count: usize = 0;
cbd624adSopenharmony_ci    if count == 0 {
cbd624adSopenharmony_ci        for &c in &mut fraction {
cbd624adSopenharmony_ci            fraction_count += 1;
cbd624adSopenharmony_ci            if c != b'0' {
cbd624adSopenharmony_ci                count += 1;
cbd624adSopenharmony_ci                let digit = c - b'0';
cbd624adSopenharmony_ci                num.mantissa = num.mantissa * 10 + digit as u64;
cbd624adSopenharmony_ci                break;
cbd624adSopenharmony_ci            }
cbd624adSopenharmony_ci        }
cbd624adSopenharmony_ci    }
cbd624adSopenharmony_ci    for c in fraction {
cbd624adSopenharmony_ci        fraction_count += 1;
cbd624adSopenharmony_ci        count += 1;
cbd624adSopenharmony_ci        if count == 20 {
cbd624adSopenharmony_ci            num.many_digits = true;
cbd624adSopenharmony_ci            // This can't wrap, since we have at most 20 digits.
cbd624adSopenharmony_ci            // We've adjusted the exponent too high by `fraction_count - 1`.
cbd624adSopenharmony_ci            // Note: -1 is due to incrementing this loop iteration, which we
cbd624adSopenharmony_ci            // didn't use.
cbd624adSopenharmony_ci            num.exponent = exponent.saturating_sub(fraction_count as i32 - 1);
cbd624adSopenharmony_ci            return num;
cbd624adSopenharmony_ci        } else {
cbd624adSopenharmony_ci            let digit = c - b'0';
cbd624adSopenharmony_ci            num.mantissa = num.mantissa * 10 + digit as u64;
cbd624adSopenharmony_ci        }
cbd624adSopenharmony_ci    }
cbd624adSopenharmony_ci
cbd624adSopenharmony_ci    // No truncated digits: easy.
cbd624adSopenharmony_ci    // Cannot overflow: <= 20 digits.
cbd624adSopenharmony_ci    num.exponent = exponent.saturating_sub(fraction_count as i32);
cbd624adSopenharmony_ci    num
cbd624adSopenharmony_ci}
cbd624adSopenharmony_ci
cbd624adSopenharmony_ci/// Parse float from extracted float components.
cbd624adSopenharmony_ci///
cbd624adSopenharmony_ci/// * `integer`     - Cloneable, forward iterator over integer digits.
cbd624adSopenharmony_ci/// * `fraction`    - Cloneable, forward iterator over integer digits.
cbd624adSopenharmony_ci/// * `exponent`    - Parsed, 32-bit exponent.
cbd624adSopenharmony_ci///
cbd624adSopenharmony_ci/// # Preconditions
cbd624adSopenharmony_ci/// 1. The integer should not have leading zeros.
cbd624adSopenharmony_ci/// 2. The fraction should not have trailing zeros.
cbd624adSopenharmony_ci/// 3. All bytes in `integer` and `fraction` should be valid digits,
cbd624adSopenharmony_ci///     in the range [`b'0', b'9'].
cbd624adSopenharmony_ci///
cbd624adSopenharmony_ci/// # Panics
cbd624adSopenharmony_ci///
cbd624adSopenharmony_ci/// Although passing garbage input will not cause memory safety issues,
cbd624adSopenharmony_ci/// it is very likely to cause a panic with a large number of digits, or
cbd624adSopenharmony_ci/// in debug mode. The big-integer arithmetic without the `alloc` feature
cbd624adSopenharmony_ci/// assumes a maximum, fixed-width input, which assumes at maximum a
cbd624adSopenharmony_ci/// value of `10^(769 + 342)`, or ~4000 bits of storage. Passing in
cbd624adSopenharmony_ci/// nonsensical digits may require up to ~6000 bits of storage, which will
cbd624adSopenharmony_ci/// panic when attempting to add it to the big integer. It is therefore
cbd624adSopenharmony_ci/// up to the caller to validate this input.
cbd624adSopenharmony_ci///
cbd624adSopenharmony_ci/// We cannot efficiently remove trailing zeros while only accepting a
cbd624adSopenharmony_ci/// forward iterator.
cbd624adSopenharmony_cipub fn parse_float<'a, F, Iter1, Iter2>(integer: Iter1, fraction: Iter2, exponent: i32) -> F
cbd624adSopenharmony_ciwhere
cbd624adSopenharmony_ci    F: Float,
cbd624adSopenharmony_ci    Iter1: Iterator<Item = &'a u8> + Clone,
cbd624adSopenharmony_ci    Iter2: Iterator<Item = &'a u8> + Clone,
cbd624adSopenharmony_ci{
cbd624adSopenharmony_ci    // Parse the mantissa and attempt the fast and moderate-path algorithms.
cbd624adSopenharmony_ci    let num = parse_number(integer.clone(), fraction.clone(), exponent);
cbd624adSopenharmony_ci    // Try the fast-path algorithm.
cbd624adSopenharmony_ci    if let Some(value) = num.try_fast_path() {
cbd624adSopenharmony_ci        return value;
cbd624adSopenharmony_ci    }
cbd624adSopenharmony_ci
cbd624adSopenharmony_ci    // Now try the moderate path algorithm.
cbd624adSopenharmony_ci    let mut fp = moderate_path::<F>(&num);
cbd624adSopenharmony_ci    if fp.exp < 0 {
cbd624adSopenharmony_ci        // Undo the invalid extended float biasing.
cbd624adSopenharmony_ci        fp.exp -= F::INVALID_FP;
cbd624adSopenharmony_ci        fp = slow::<F, _, _>(num, fp, integer, fraction);
cbd624adSopenharmony_ci    }
cbd624adSopenharmony_ci
cbd624adSopenharmony_ci    // Unable to correctly round the float using the fast or moderate algorithms.
cbd624adSopenharmony_ci    // Fallback to a slower, but always correct algorithm. If we have
cbd624adSopenharmony_ci    // lossy, we can't be here.
cbd624adSopenharmony_ci    extended_to_float::<F>(fp)
cbd624adSopenharmony_ci}
cbd624adSopenharmony_ci
cbd624adSopenharmony_ci/// Wrapper for different moderate-path algorithms.
cbd624adSopenharmony_ci/// A return exponent of `-1` indicates an invalid value.
cbd624adSopenharmony_ci#[inline]
cbd624adSopenharmony_cipub fn moderate_path<F: Float>(num: &Number) -> ExtendedFloat {
cbd624adSopenharmony_ci    #[cfg(not(feature = "compact"))]
cbd624adSopenharmony_ci    return lemire::<F>(num);
cbd624adSopenharmony_ci
cbd624adSopenharmony_ci    #[cfg(feature = "compact")]
cbd624adSopenharmony_ci    return bellerophon::<F>(num);
cbd624adSopenharmony_ci}
cbd624adSopenharmony_ci
cbd624adSopenharmony_ci/// Convert usize into i32 without overflow.
cbd624adSopenharmony_ci///
cbd624adSopenharmony_ci/// This is needed to ensure when adjusting the exponent relative to
cbd624adSopenharmony_ci/// the mantissa we do not overflow for comically-long exponents.
cbd624adSopenharmony_ci#[inline]
cbd624adSopenharmony_cifn into_i32(value: usize) -> i32 {
cbd624adSopenharmony_ci    if value > i32::max_value() as usize {
cbd624adSopenharmony_ci        i32::max_value()
cbd624adSopenharmony_ci    } else {
cbd624adSopenharmony_ci        value as i32
cbd624adSopenharmony_ci    }
cbd624adSopenharmony_ci}
cbd624adSopenharmony_ci
cbd624adSopenharmony_ci// Add digit to mantissa.
cbd624adSopenharmony_ci#[inline]
cbd624adSopenharmony_cipub fn add_digit(value: u64, digit: u8) -> Option<u64> {
cbd624adSopenharmony_ci    value.checked_mul(10)?.checked_add(digit as u64)
cbd624adSopenharmony_ci}