16e652d70Sopenharmony_ci// To run:
26e652d70Sopenharmony_ci//
36e652d70Sopenharmony_ci//     cargo criterion --features criterion/real_blackbox
46e652d70Sopenharmony_ci//
56e652d70Sopenharmony_ci// This benchmarks each of the different libraries at several ratios of ASCII to
66e652d70Sopenharmony_ci// non-ASCII content. There is one additional benchmark labeled "baseline" which
76e652d70Sopenharmony_ci// just iterates over characters in a string, converting UTF-8 to 32-bit chars.
86e652d70Sopenharmony_ci//
96e652d70Sopenharmony_ci// Criterion will show a time in milliseconds. The non-baseline bench functions
106e652d70Sopenharmony_ci// each make one million function calls (2 calls per character, 500K characters
116e652d70Sopenharmony_ci// in the strings created by gen_string). The "time per call" listed in our
126e652d70Sopenharmony_ci// readme is computed by subtracting this baseline from the other bench
136e652d70Sopenharmony_ci// functions' time, then dividing by one million (ms -> ns).
146e652d70Sopenharmony_ci
156e652d70Sopenharmony_ci#![allow(clippy::needless_pass_by_value)]
166e652d70Sopenharmony_ci
176e652d70Sopenharmony_ci#[path = "../tests/fst/mod.rs"]
186e652d70Sopenharmony_cimod fst;
196e652d70Sopenharmony_ci#[path = "../tests/roaring/mod.rs"]
206e652d70Sopenharmony_cimod roaring;
216e652d70Sopenharmony_ci#[path = "../tests/trie/mod.rs"]
226e652d70Sopenharmony_cimod trie;
236e652d70Sopenharmony_ci
246e652d70Sopenharmony_ciuse criterion::{black_box, criterion_group, criterion_main, Criterion};
256e652d70Sopenharmony_ciuse rand::distributions::{Bernoulli, Distribution, Uniform};
266e652d70Sopenharmony_ciuse rand::rngs::SmallRng;
276e652d70Sopenharmony_ciuse rand::SeedableRng;
286e652d70Sopenharmony_ciuse std::time::Duration;
296e652d70Sopenharmony_ci
306e652d70Sopenharmony_cifn gen_string(p_nonascii: u32) -> String {
316e652d70Sopenharmony_ci    let mut rng = SmallRng::from_seed([b'!'; 32]);
326e652d70Sopenharmony_ci    let pick_nonascii = Bernoulli::from_ratio(p_nonascii, 100).unwrap();
336e652d70Sopenharmony_ci    let ascii = Uniform::new_inclusive('\0', '\x7f');
346e652d70Sopenharmony_ci    let nonascii = Uniform::new_inclusive(0x80 as char, char::MAX);
356e652d70Sopenharmony_ci
366e652d70Sopenharmony_ci    let mut string = String::new();
376e652d70Sopenharmony_ci    for _ in 0..500_000 {
386e652d70Sopenharmony_ci        let distribution = if pick_nonascii.sample(&mut rng) {
396e652d70Sopenharmony_ci            nonascii
406e652d70Sopenharmony_ci        } else {
416e652d70Sopenharmony_ci            ascii
426e652d70Sopenharmony_ci        };
436e652d70Sopenharmony_ci        string.push(distribution.sample(&mut rng));
446e652d70Sopenharmony_ci    }
456e652d70Sopenharmony_ci
466e652d70Sopenharmony_ci    string
476e652d70Sopenharmony_ci}
486e652d70Sopenharmony_ci
496e652d70Sopenharmony_cifn bench(c: &mut Criterion, group_name: &str, string: String) {
506e652d70Sopenharmony_ci    let mut group = c.benchmark_group(group_name);
516e652d70Sopenharmony_ci    group.measurement_time(Duration::from_secs(10));
526e652d70Sopenharmony_ci    group.bench_function("baseline", |b| {
536e652d70Sopenharmony_ci        b.iter(|| {
546e652d70Sopenharmony_ci            for ch in string.chars() {
556e652d70Sopenharmony_ci                black_box(ch);
566e652d70Sopenharmony_ci            }
576e652d70Sopenharmony_ci        });
586e652d70Sopenharmony_ci    });
596e652d70Sopenharmony_ci    group.bench_function("unicode-ident", |b| {
606e652d70Sopenharmony_ci        b.iter(|| {
616e652d70Sopenharmony_ci            for ch in string.chars() {
626e652d70Sopenharmony_ci                black_box(unicode_ident::is_xid_start(ch));
636e652d70Sopenharmony_ci                black_box(unicode_ident::is_xid_continue(ch));
646e652d70Sopenharmony_ci            }
656e652d70Sopenharmony_ci        });
666e652d70Sopenharmony_ci    });
676e652d70Sopenharmony_ci    group.bench_function("unicode-xid", |b| {
686e652d70Sopenharmony_ci        b.iter(|| {
696e652d70Sopenharmony_ci            for ch in string.chars() {
706e652d70Sopenharmony_ci                black_box(unicode_xid::UnicodeXID::is_xid_start(ch));
716e652d70Sopenharmony_ci                black_box(unicode_xid::UnicodeXID::is_xid_continue(ch));
726e652d70Sopenharmony_ci            }
736e652d70Sopenharmony_ci        });
746e652d70Sopenharmony_ci    });
756e652d70Sopenharmony_ci    group.bench_function("ucd-trie", |b| {
766e652d70Sopenharmony_ci        b.iter(|| {
776e652d70Sopenharmony_ci            for ch in string.chars() {
786e652d70Sopenharmony_ci                black_box(trie::XID_START.contains_char(ch));
796e652d70Sopenharmony_ci                black_box(trie::XID_CONTINUE.contains_char(ch));
806e652d70Sopenharmony_ci            }
816e652d70Sopenharmony_ci        });
826e652d70Sopenharmony_ci    });
836e652d70Sopenharmony_ci    group.bench_function("fst", |b| {
846e652d70Sopenharmony_ci        let xid_start_fst = fst::xid_start_fst();
856e652d70Sopenharmony_ci        let xid_continue_fst = fst::xid_continue_fst();
866e652d70Sopenharmony_ci        b.iter(|| {
876e652d70Sopenharmony_ci            for ch in string.chars() {
886e652d70Sopenharmony_ci                let ch_bytes = (ch as u32).to_be_bytes();
896e652d70Sopenharmony_ci                black_box(xid_start_fst.contains(ch_bytes));
906e652d70Sopenharmony_ci                black_box(xid_continue_fst.contains(ch_bytes));
916e652d70Sopenharmony_ci            }
926e652d70Sopenharmony_ci        });
936e652d70Sopenharmony_ci    });
946e652d70Sopenharmony_ci    group.bench_function("roaring", |b| {
956e652d70Sopenharmony_ci        let xid_start_bitmap = roaring::xid_start_bitmap();
966e652d70Sopenharmony_ci        let xid_continue_bitmap = roaring::xid_continue_bitmap();
976e652d70Sopenharmony_ci        b.iter(|| {
986e652d70Sopenharmony_ci            for ch in string.chars() {
996e652d70Sopenharmony_ci                black_box(xid_start_bitmap.contains(ch as u32));
1006e652d70Sopenharmony_ci                black_box(xid_continue_bitmap.contains(ch as u32));
1016e652d70Sopenharmony_ci            }
1026e652d70Sopenharmony_ci        });
1036e652d70Sopenharmony_ci    });
1046e652d70Sopenharmony_ci    group.finish();
1056e652d70Sopenharmony_ci}
1066e652d70Sopenharmony_ci
1076e652d70Sopenharmony_cifn bench0(c: &mut Criterion) {
1086e652d70Sopenharmony_ci    bench(c, "0%-nonascii", gen_string(0));
1096e652d70Sopenharmony_ci}
1106e652d70Sopenharmony_ci
1116e652d70Sopenharmony_cifn bench1(c: &mut Criterion) {
1126e652d70Sopenharmony_ci    bench(c, "1%-nonascii", gen_string(1));
1136e652d70Sopenharmony_ci}
1146e652d70Sopenharmony_ci
1156e652d70Sopenharmony_cifn bench10(c: &mut Criterion) {
1166e652d70Sopenharmony_ci    bench(c, "10%-nonascii", gen_string(10));
1176e652d70Sopenharmony_ci}
1186e652d70Sopenharmony_ci
1196e652d70Sopenharmony_cifn bench100(c: &mut Criterion) {
1206e652d70Sopenharmony_ci    bench(c, "100%-nonascii", gen_string(100));
1216e652d70Sopenharmony_ci}
1226e652d70Sopenharmony_ci
1236e652d70Sopenharmony_cicriterion_group!(benches, bench0, bench1, bench10, bench100);
1246e652d70Sopenharmony_cicriterion_main!(benches);
125