1//! A simple example on how to use minimal_lexical within parser framework.
2//!
3//! This works on input bytes, however, it could be easily adapted to use
4//! `io::Read`, or any iterator over bytes. Since floats can only include
5//! ASCII characters, it will work with UTF-8 encoded data and return
6//! remaining bytes properly on UTF-8 boundaries.
7//!
8//! # License
9//!
10//! This is example is unlicensed, so please adapt the code into your
11//! own project. It is meant to show how to implement a float parser
12//! easily and efficiently, and how to adapt it for specialized use-cases.
13//!
14//! ```text
15//! This is free and unencumbered software released into the public domain.
16//!
17//! Anyone is free to copy, modify, publish, use, compile, sell, or
18//! distribute this software, either in source code form or as a compiled
19//! binary, for any purpose, commercial or non-commercial, and by any
20//! means.
21//!
22//! In jurisdictions that recognize copyright laws, the author or authors
23//! of this software dedicate any and all copyright interest in the
24//! software to the public domain. We make this dedication for the benefit
25//! of the public at large and to the detriment of our heirs and
26//! successors. We intend this dedication to be an overt act of
27//! relinquishment in perpetuity of all present and future rights to this
28//! software under copyright law.
29//!
30//! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31//! EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32//! MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
33//! IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
34//! OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
35//! ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
36//! OTHER DEALINGS IN THE SOFTWARE.
37//!
38//! For more information, please refer to <http://unlicense.org/>
39//! ```
40
41extern crate minimal_lexical;
42
43// HELPERS
44// -------
45
46// These functions are simple, resuable componetns
47
48/// Find and parse sign and get remaining bytes.
49#[inline]
50fn parse_sign<'a>(bytes: &'a [u8]) -> (bool, &'a [u8]) {
51    match bytes.get(0) {
52        Some(&b'+') => (true, &bytes[1..]),
53        Some(&b'-') => (false, &bytes[1..]),
54        _ => (true, bytes),
55    }
56}
57
58// Convert u8 to digit.
59#[inline]
60fn to_digit(c: u8) -> Option<u32> {
61    (c as char).to_digit(10)
62}
63
64// Add digit from exponent.
65#[inline]
66fn add_digit_i32(value: i32, digit: u32) -> Option<i32> {
67    return value.checked_mul(10)?.checked_add(digit as i32);
68}
69
70// Subtract digit from exponent.
71#[inline]
72fn sub_digit_i32(value: i32, digit: u32) -> Option<i32> {
73    return value.checked_mul(10)?.checked_sub(digit as i32);
74}
75
76// Convert character to digit.
77#[inline]
78fn is_digit(c: u8) -> bool {
79    to_digit(c).is_some()
80}
81
82// Split buffer at index.
83#[inline]
84fn split_at_index<'a>(digits: &'a [u8], index: usize) -> (&'a [u8], &'a [u8]) {
85    (&digits[..index], &digits[index..])
86}
87
88/// Consume until a an invalid digit is found.
89///
90/// - `digits`      - Slice containing 0 or more digits.
91#[inline]
92fn consume_digits<'a>(digits: &'a [u8]) -> (&'a [u8], &'a [u8]) {
93    // Consume all digits.
94    let mut index = 0;
95    while index < digits.len() && is_digit(digits[index]) {
96        index += 1;
97    }
98    split_at_index(digits, index)
99}
100
101// Trim leading 0s.
102#[inline]
103fn ltrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8] {
104    let count = bytes.iter().take_while(|&&si| si == b'0').count();
105    &bytes[count..]
106}
107
108// Trim trailing 0s.
109#[inline]
110fn rtrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8] {
111    let count = bytes.iter().rev().take_while(|&&si| si == b'0').count();
112    let index = bytes.len() - count;
113    &bytes[..index]
114}
115
116// PARSERS
117// -------
118
119/// Parse the exponent of the float.
120///
121/// * `exponent`    - Slice containing the exponent digits.
122/// * `is_positive` - If the exponent sign is positive.
123fn parse_exponent(exponent: &[u8], is_positive: bool) -> i32 {
124    // Parse the sign bit or current data.
125    let mut value: i32 = 0;
126    match is_positive {
127        true => {
128            for c in exponent {
129                value = match add_digit_i32(value, to_digit(*c).unwrap()) {
130                    Some(v) => v,
131                    None => return i32::max_value(),
132                };
133            }
134        },
135        false => {
136            for c in exponent {
137                value = match sub_digit_i32(value, to_digit(*c).unwrap()) {
138                    Some(v) => v,
139                    None => return i32::min_value(),
140                };
141            }
142        },
143    }
144
145    value
146}
147
148/// Parse float from input bytes, returning the float and the remaining bytes.
149///
150/// * `bytes`    - Array of bytes leading with float-data.
151fn parse_float<'a, F>(bytes: &'a [u8]) -> (F, &'a [u8])
152where
153    F: minimal_lexical::Float,
154{
155    // Parse the sign.
156    let (is_positive, bytes) = parse_sign(bytes);
157
158    // Note: this does not handle special float values.
159    // You will have to handle NaN, Inf, and Infinity
160    // on your own.
161
162    // Extract and parse the float components:
163    //  1. Integer
164    //  2. Fraction
165    //  3. Exponent
166    let (integer_slc, bytes) = consume_digits(bytes);
167    let (fraction_slc, bytes) = match bytes.first() {
168        Some(&b'.') => consume_digits(&bytes[1..]),
169        _ => (&bytes[..0], bytes),
170    };
171    let (exponent, bytes) = match bytes.first() {
172        Some(&b'e') | Some(&b'E') => {
173            // Extract and parse the exponent.
174            let (is_positive, bytes) = parse_sign(&bytes[1..]);
175            let (exponent, bytes) = consume_digits(bytes);
176            (parse_exponent(exponent, is_positive), bytes)
177        },
178        _ => (0, bytes),
179    };
180
181    // Note: You may want to check and validate the float data here:
182    //  1). Many floats require integer or fraction digits, if a fraction
183    //      is present.
184    //  2). All floats require either integer or fraction digits.
185    //  3). Some floats do not allow a '+' sign before the significant digits.
186    //  4). Many floats require exponent digits after the exponent symbol.
187    //  5). Some floats do not allow a '+' sign before the exponent.
188
189    // We now need to trim leading and trailing 0s from the integer
190    // and fraction, respectively. This is required to make the
191    // fast and moderate paths more efficient, and for the slow
192    // path.
193    let integer_slc = ltrim_zero(integer_slc);
194    let fraction_slc = rtrim_zero(fraction_slc);
195
196    // Create the float and return our data.
197    let mut float: F =
198        minimal_lexical::parse_float(integer_slc.iter(), fraction_slc.iter(), exponent);
199    if !is_positive {
200        float = -float;
201    }
202
203    (float, bytes)
204}
205
206pub fn main() {
207    let check_parse_float =
208        |s: &str, v, t: &str| assert_eq!(parse_float(s.as_bytes()), (v, t.as_bytes()));
209
210    check_parse_float("1.0e7", 1.0e7f64, "");
211    check_parse_float("12345.67", 12345.67, "");
212    check_parse_float("12345.67 narnia", 12345.67, " narnia");
213}
214