1// The Computer Language Benchmarks Game 2// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ 3// 4// contributed by the Rust Project Developers 5// contributed by TeXitoi 6// contributed by BurntSushi 7 8// This technically solves the problem posed in the `regex-dna` benchmark, but 9// it cheats by combining all of the replacements into a single regex and 10// replacing them with a single linear scan. i.e., it re-implements 11// `replace_all`. As a result, this is around 25% faster. ---AG 12 13use std::io::{self, Read}; 14use std::sync::Arc; 15use std::thread; 16 17macro_rules! regex { 18 ($re:expr) => { 19 ::regex::Regex::new($re).unwrap() 20 }; 21} 22 23fn main() { 24 let mut seq = String::with_capacity(50 * (1 << 20)); 25 io::stdin().read_to_string(&mut seq).unwrap(); 26 let ilen = seq.len(); 27 28 seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); 29 let clen = seq.len(); 30 let seq_arc = Arc::new(seq.clone()); 31 32 let variants = vec![ 33 regex!("agggtaaa|tttaccct"), 34 regex!("[cgt]gggtaaa|tttaccc[acg]"), 35 regex!("a[act]ggtaaa|tttacc[agt]t"), 36 regex!("ag[act]gtaaa|tttac[agt]ct"), 37 regex!("agg[act]taaa|ttta[agt]cct"), 38 regex!("aggg[acg]aaa|ttt[cgt]ccct"), 39 regex!("agggt[cgt]aa|tt[acg]accct"), 40 regex!("agggta[cgt]a|t[acg]taccct"), 41 regex!("agggtaa[cgt]|[acg]ttaccct"), 42 ]; 43 let mut counts = vec![]; 44 for variant in variants { 45 let seq = seq_arc.clone(); 46 let restr = variant.to_string(); 47 let future = thread::spawn(move || variant.find_iter(&seq).count()); 48 counts.push((restr, future)); 49 } 50 51 let substs = vec![ 52 (b'B', "(c|g|t)"), 53 (b'D', "(a|g|t)"), 54 (b'H', "(a|c|t)"), 55 (b'K', "(g|t)"), 56 (b'M', "(a|c)"), 57 (b'N', "(a|c|g|t)"), 58 (b'R', "(a|g)"), 59 (b'S', "(c|g)"), 60 (b'V', "(a|c|g)"), 61 (b'W', "(a|t)"), 62 (b'Y', "(c|t)"), 63 ]; // combined into one regex in `replace_all` 64 let seq = replace_all(&seq, substs); 65 66 for (variant, count) in counts { 67 println!("{} {}", variant, count.join().unwrap()); 68 } 69 println!("\n{}\n{}\n{}", ilen, clen, seq.len()); 70} 71 72fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String { 73 let mut replacements = vec![""; 256]; 74 let mut alternates = vec![]; 75 for (re, replacement) in substs { 76 replacements[re as usize] = replacement; 77 alternates.push((re as char).to_string()); 78 } 79 80 let re = regex!(&alternates.join("|")); 81 let mut new = String::with_capacity(text.len()); 82 let mut last_match = 0; 83 for m in re.find_iter(text) { 84 new.push_str(&text[last_match..m.start()]); 85 new.push_str(replacements[text.as_bytes()[m.start()] as usize]); 86 last_match = m.end(); 87 } 88 new.push_str(&text[last_match..]); 89 new 90} 91