1c67d6573Sopenharmony_ci// The Computer Language Benchmarks Game
2c67d6573Sopenharmony_ci// https://benchmarksgame-team.pages.debian.net/benchmarksgame/
3c67d6573Sopenharmony_ci//
4c67d6573Sopenharmony_ci// contributed by the Rust Project Developers
5c67d6573Sopenharmony_ci// contributed by TeXitoi
6c67d6573Sopenharmony_ci// contributed by BurntSushi
7c67d6573Sopenharmony_ci
8c67d6573Sopenharmony_ci// This technically solves the problem posed in the `regex-dna` benchmark, but
9c67d6573Sopenharmony_ci// it cheats by combining all of the replacements into a single regex and
10c67d6573Sopenharmony_ci// replacing them with a single linear scan. i.e., it re-implements
11c67d6573Sopenharmony_ci// `replace_all`. As a result, this is around 25% faster. ---AG
12c67d6573Sopenharmony_ci
13c67d6573Sopenharmony_ciuse std::io::{self, Read};
14c67d6573Sopenharmony_ciuse std::sync::Arc;
15c67d6573Sopenharmony_ciuse std::thread;
16c67d6573Sopenharmony_ci
17c67d6573Sopenharmony_cimacro_rules! regex {
18c67d6573Sopenharmony_ci    ($re:expr) => {
19c67d6573Sopenharmony_ci        ::regex::Regex::new($re).unwrap()
20c67d6573Sopenharmony_ci    };
21c67d6573Sopenharmony_ci}
22c67d6573Sopenharmony_ci
23c67d6573Sopenharmony_cifn main() {
24c67d6573Sopenharmony_ci    let mut seq = String::with_capacity(50 * (1 << 20));
25c67d6573Sopenharmony_ci    io::stdin().read_to_string(&mut seq).unwrap();
26c67d6573Sopenharmony_ci    let ilen = seq.len();
27c67d6573Sopenharmony_ci
28c67d6573Sopenharmony_ci    seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
29c67d6573Sopenharmony_ci    let clen = seq.len();
30c67d6573Sopenharmony_ci    let seq_arc = Arc::new(seq.clone());
31c67d6573Sopenharmony_ci
32c67d6573Sopenharmony_ci    let variants = vec![
33c67d6573Sopenharmony_ci        regex!("agggtaaa|tttaccct"),
34c67d6573Sopenharmony_ci        regex!("[cgt]gggtaaa|tttaccc[acg]"),
35c67d6573Sopenharmony_ci        regex!("a[act]ggtaaa|tttacc[agt]t"),
36c67d6573Sopenharmony_ci        regex!("ag[act]gtaaa|tttac[agt]ct"),
37c67d6573Sopenharmony_ci        regex!("agg[act]taaa|ttta[agt]cct"),
38c67d6573Sopenharmony_ci        regex!("aggg[acg]aaa|ttt[cgt]ccct"),
39c67d6573Sopenharmony_ci        regex!("agggt[cgt]aa|tt[acg]accct"),
40c67d6573Sopenharmony_ci        regex!("agggta[cgt]a|t[acg]taccct"),
41c67d6573Sopenharmony_ci        regex!("agggtaa[cgt]|[acg]ttaccct"),
42c67d6573Sopenharmony_ci    ];
43c67d6573Sopenharmony_ci    let mut counts = vec![];
44c67d6573Sopenharmony_ci    for variant in variants {
45c67d6573Sopenharmony_ci        let seq = seq_arc.clone();
46c67d6573Sopenharmony_ci        let restr = variant.to_string();
47c67d6573Sopenharmony_ci        let future = thread::spawn(move || variant.find_iter(&seq).count());
48c67d6573Sopenharmony_ci        counts.push((restr, future));
49c67d6573Sopenharmony_ci    }
50c67d6573Sopenharmony_ci
51c67d6573Sopenharmony_ci    let substs = vec![
52c67d6573Sopenharmony_ci        (b'B', "(c|g|t)"),
53c67d6573Sopenharmony_ci        (b'D', "(a|g|t)"),
54c67d6573Sopenharmony_ci        (b'H', "(a|c|t)"),
55c67d6573Sopenharmony_ci        (b'K', "(g|t)"),
56c67d6573Sopenharmony_ci        (b'M', "(a|c)"),
57c67d6573Sopenharmony_ci        (b'N', "(a|c|g|t)"),
58c67d6573Sopenharmony_ci        (b'R', "(a|g)"),
59c67d6573Sopenharmony_ci        (b'S', "(c|g)"),
60c67d6573Sopenharmony_ci        (b'V', "(a|c|g)"),
61c67d6573Sopenharmony_ci        (b'W', "(a|t)"),
62c67d6573Sopenharmony_ci        (b'Y', "(c|t)"),
63c67d6573Sopenharmony_ci    ]; // combined into one regex in `replace_all`
64c67d6573Sopenharmony_ci    let seq = replace_all(&seq, substs);
65c67d6573Sopenharmony_ci
66c67d6573Sopenharmony_ci    for (variant, count) in counts {
67c67d6573Sopenharmony_ci        println!("{} {}", variant, count.join().unwrap());
68c67d6573Sopenharmony_ci    }
69c67d6573Sopenharmony_ci    println!("\n{}\n{}\n{}", ilen, clen, seq.len());
70c67d6573Sopenharmony_ci}
71c67d6573Sopenharmony_ci
72c67d6573Sopenharmony_cifn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String {
73c67d6573Sopenharmony_ci    let mut replacements = vec![""; 256];
74c67d6573Sopenharmony_ci    let mut alternates = vec![];
75c67d6573Sopenharmony_ci    for (re, replacement) in substs {
76c67d6573Sopenharmony_ci        replacements[re as usize] = replacement;
77c67d6573Sopenharmony_ci        alternates.push((re as char).to_string());
78c67d6573Sopenharmony_ci    }
79c67d6573Sopenharmony_ci
80c67d6573Sopenharmony_ci    let re = regex!(&alternates.join("|"));
81c67d6573Sopenharmony_ci    let mut new = String::with_capacity(text.len());
82c67d6573Sopenharmony_ci    let mut last_match = 0;
83c67d6573Sopenharmony_ci    for m in re.find_iter(text) {
84c67d6573Sopenharmony_ci        new.push_str(&text[last_match..m.start()]);
85c67d6573Sopenharmony_ci        new.push_str(replacements[text.as_bytes()[m.start()] as usize]);
86c67d6573Sopenharmony_ci        last_match = m.end();
87c67d6573Sopenharmony_ci    }
88c67d6573Sopenharmony_ci    new.push_str(&text[last_match..]);
89c67d6573Sopenharmony_ci    new
90c67d6573Sopenharmony_ci}
91