1#[macro_use]
2extern crate criterion;
3
4#[global_allocator]
5static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
6
7use criterion::Criterion;
8use nom::{
9  branch::alt,
10  bytes::complete::{tag, take},
11  character::complete::{anychar, char, multispace0, none_of},
12  combinator::{map, map_opt, map_res, value, verify},
13  error::{ErrorKind, ParseError},
14  multi::{fold_many0, separated_list0},
15  number::complete::{double, recognize_float},
16  sequence::{delimited, preceded, separated_pair},
17  IResult, Parser,
18};
19
20use std::collections::HashMap;
21
22#[derive(Debug, PartialEq, Clone)]
23pub enum JsonValue {
24  Null,
25  Bool(bool),
26  Str(String),
27  Num(f64),
28  Array(Vec<JsonValue>),
29  Object(HashMap<String, JsonValue>),
30}
31
32fn boolean(input: &str) -> IResult<&str, bool> {
33  alt((value(false, tag("false")), value(true, tag("true"))))(input)
34}
35
36fn u16_hex(input: &str) -> IResult<&str, u16> {
37  map_res(take(4usize), |s| u16::from_str_radix(s, 16))(input)
38}
39
40fn unicode_escape(input: &str) -> IResult<&str, char> {
41  map_opt(
42    alt((
43      // Not a surrogate
44      map(verify(u16_hex, |cp| !(0xD800..0xE000).contains(cp)), |cp| {
45        cp as u32
46      }),
47      // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details
48      map(
49        verify(
50          separated_pair(u16_hex, tag("\\u"), u16_hex),
51          |(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low),
52        ),
53        |(high, low)| {
54          let high_ten = (high as u32) - 0xD800;
55          let low_ten = (low as u32) - 0xDC00;
56          (high_ten << 10) + low_ten + 0x10000
57        },
58      ),
59    )),
60    // Could probably be replaced with .unwrap() or _unchecked due to the verify checks
61    std::char::from_u32,
62  )(input)
63}
64
65fn character(input: &str) -> IResult<&str, char> {
66  let (input, c) = none_of("\"")(input)?;
67  if c == '\\' {
68    alt((
69      map_res(anychar, |c| {
70        Ok(match c {
71          '"' | '\\' | '/' => c,
72          'b' => '\x08',
73          'f' => '\x0C',
74          'n' => '\n',
75          'r' => '\r',
76          't' => '\t',
77          _ => return Err(()),
78        })
79      }),
80      preceded(char('u'), unicode_escape),
81    ))(input)
82  } else {
83    Ok((input, c))
84  }
85}
86
87fn string(input: &str) -> IResult<&str, String> {
88  delimited(
89    char('"'),
90    fold_many0(character, String::new, |mut string, c| {
91      string.push(c);
92      string
93    }),
94    char('"'),
95  )(input)
96}
97
98fn ws<'a, O, E: ParseError<&'a str>, F: Parser<&'a str, O, E>>(f: F) -> impl Parser<&'a str, O, E> {
99  delimited(multispace0, f, multispace0)
100}
101
102fn array(input: &str) -> IResult<&str, Vec<JsonValue>> {
103  delimited(
104    char('['),
105    ws(separated_list0(ws(char(',')), json_value)),
106    char(']'),
107  )(input)
108}
109
110fn object(input: &str) -> IResult<&str, HashMap<String, JsonValue>> {
111  map(
112    delimited(
113      char('{'),
114      ws(separated_list0(
115        ws(char(',')),
116        separated_pair(string, ws(char(':')), json_value),
117      )),
118      char('}'),
119    ),
120    |key_values| key_values.into_iter().collect(),
121  )(input)
122}
123
124fn json_value(input: &str) -> IResult<&str, JsonValue> {
125  use JsonValue::*;
126
127  alt((
128    value(Null, tag("null")),
129    map(boolean, Bool),
130    map(string, Str),
131    map(double, Num),
132    map(array, Array),
133    map(object, Object),
134  ))(input)
135}
136
137fn json(input: &str) -> IResult<&str, JsonValue> {
138  ws(json_value).parse(input)
139}
140
141fn json_bench(c: &mut Criterion) {
142  let data = "  { \"a\"\t: 42,
143  \"b\": [ \"x\", \"y\", 12 ,\"\\u2014\", \"\\uD83D\\uDE10\"] ,
144  \"c\": { \"hello\" : \"world\"
145  }
146  }  ";
147
148  // println!("data:\n{:?}", json(data));
149  c.bench_function("json", |b| {
150    b.iter(|| json(data).unwrap());
151  });
152}
153
154fn recognize_float_bytes(c: &mut Criterion) {
155  println!(
156    "recognize_float_bytes result: {:?}",
157    recognize_float::<_, (_, ErrorKind)>(&b"-1.234E-12"[..])
158  );
159  c.bench_function("recognize float bytes", |b| {
160    b.iter(|| recognize_float::<_, (_, ErrorKind)>(&b"-1.234E-12"[..]));
161  });
162}
163
164fn recognize_float_str(c: &mut Criterion) {
165  println!(
166    "recognize_float_str result: {:?}",
167    recognize_float::<_, (_, ErrorKind)>("-1.234E-12")
168  );
169  c.bench_function("recognize float str", |b| {
170    b.iter(|| recognize_float::<_, (_, ErrorKind)>("-1.234E-12"));
171  });
172}
173
174fn float_bytes(c: &mut Criterion) {
175  println!(
176    "float_bytes result: {:?}",
177    double::<_, (_, ErrorKind)>(&b"-1.234E-12"[..])
178  );
179  c.bench_function("float bytes", |b| {
180    b.iter(|| double::<_, (_, ErrorKind)>(&b"-1.234E-12"[..]));
181  });
182}
183
184fn float_str(c: &mut Criterion) {
185  println!(
186    "float_str result: {:?}",
187    double::<_, (_, ErrorKind)>("-1.234E-12")
188  );
189  c.bench_function("float str", |b| {
190    b.iter(|| double::<_, (_, ErrorKind)>("-1.234E-12"));
191  });
192}
193
194use nom::Err;
195use nom::ParseTo;
196fn std_float(input: &[u8]) -> IResult<&[u8], f64, (&[u8], ErrorKind)> {
197  match recognize_float(input) {
198    Err(e) => Err(e),
199    Ok((i, s)) => match s.parse_to() {
200      Some(n) => Ok((i, n)),
201      None => Err(Err::Error((i, ErrorKind::Float))),
202    },
203  }
204}
205
206fn std_float_bytes(c: &mut Criterion) {
207  println!(
208    "std_float_bytes result: {:?}",
209    std_float(&b"-1.234E-12"[..])
210  );
211  c.bench_function("std_float bytes", |b| {
212    b.iter(|| std_float(&b"-1.234E-12"[..]));
213  });
214}
215
216criterion_group!(
217  benches,
218  json_bench,
219  recognize_float_bytes,
220  recognize_float_str,
221  float_bytes,
222  std_float_bytes,
223  float_str
224);
225criterion_main!(benches);
226