1c67d6573Sopenharmony_ci// The Computer Language Benchmarks Game 2c67d6573Sopenharmony_ci// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ 3c67d6573Sopenharmony_ci// 4c67d6573Sopenharmony_ci// contributed by the Rust Project Developers 5c67d6573Sopenharmony_ci// contributed by TeXitoi 6c67d6573Sopenharmony_ci// contributed by BurntSushi 7c67d6573Sopenharmony_ci 8c67d6573Sopenharmony_ci// This technically solves the problem posed in the `regex-dna` benchmark, but 9c67d6573Sopenharmony_ci// it cheats by combining all of the replacements into a single regex and 10c67d6573Sopenharmony_ci// replacing them with a single linear scan. i.e., it re-implements 11c67d6573Sopenharmony_ci// `replace_all`. As a result, this is around 25% faster. ---AG 12c67d6573Sopenharmony_ci 13c67d6573Sopenharmony_ciuse std::io::{self, Read}; 14c67d6573Sopenharmony_ciuse std::sync::Arc; 15c67d6573Sopenharmony_ciuse std::thread; 16c67d6573Sopenharmony_ci 17c67d6573Sopenharmony_cimacro_rules! regex { 18c67d6573Sopenharmony_ci ($re:expr) => { 19c67d6573Sopenharmony_ci ::regex::Regex::new($re).unwrap() 20c67d6573Sopenharmony_ci }; 21c67d6573Sopenharmony_ci} 22c67d6573Sopenharmony_ci 23c67d6573Sopenharmony_cifn main() { 24c67d6573Sopenharmony_ci let mut seq = String::with_capacity(50 * (1 << 20)); 25c67d6573Sopenharmony_ci io::stdin().read_to_string(&mut seq).unwrap(); 26c67d6573Sopenharmony_ci let ilen = seq.len(); 27c67d6573Sopenharmony_ci 28c67d6573Sopenharmony_ci seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); 29c67d6573Sopenharmony_ci let clen = seq.len(); 30c67d6573Sopenharmony_ci let seq_arc = Arc::new(seq.clone()); 31c67d6573Sopenharmony_ci 32c67d6573Sopenharmony_ci let variants = vec![ 33c67d6573Sopenharmony_ci regex!("agggtaaa|tttaccct"), 34c67d6573Sopenharmony_ci regex!("[cgt]gggtaaa|tttaccc[acg]"), 35c67d6573Sopenharmony_ci regex!("a[act]ggtaaa|tttacc[agt]t"), 36c67d6573Sopenharmony_ci regex!("ag[act]gtaaa|tttac[agt]ct"), 37c67d6573Sopenharmony_ci regex!("agg[act]taaa|ttta[agt]cct"), 38c67d6573Sopenharmony_ci regex!("aggg[acg]aaa|ttt[cgt]ccct"), 39c67d6573Sopenharmony_ci regex!("agggt[cgt]aa|tt[acg]accct"), 40c67d6573Sopenharmony_ci regex!("agggta[cgt]a|t[acg]taccct"), 41c67d6573Sopenharmony_ci regex!("agggtaa[cgt]|[acg]ttaccct"), 42c67d6573Sopenharmony_ci ]; 43c67d6573Sopenharmony_ci let mut counts = vec![]; 44c67d6573Sopenharmony_ci for variant in variants { 45c67d6573Sopenharmony_ci let seq = seq_arc.clone(); 46c67d6573Sopenharmony_ci let restr = variant.to_string(); 47c67d6573Sopenharmony_ci let future = thread::spawn(move || variant.find_iter(&seq).count()); 48c67d6573Sopenharmony_ci counts.push((restr, future)); 49c67d6573Sopenharmony_ci } 50c67d6573Sopenharmony_ci 51c67d6573Sopenharmony_ci let substs = vec![ 52c67d6573Sopenharmony_ci (b'B', "(c|g|t)"), 53c67d6573Sopenharmony_ci (b'D', "(a|g|t)"), 54c67d6573Sopenharmony_ci (b'H', "(a|c|t)"), 55c67d6573Sopenharmony_ci (b'K', "(g|t)"), 56c67d6573Sopenharmony_ci (b'M', "(a|c)"), 57c67d6573Sopenharmony_ci (b'N', "(a|c|g|t)"), 58c67d6573Sopenharmony_ci (b'R', "(a|g)"), 59c67d6573Sopenharmony_ci (b'S', "(c|g)"), 60c67d6573Sopenharmony_ci (b'V', "(a|c|g)"), 61c67d6573Sopenharmony_ci (b'W', "(a|t)"), 62c67d6573Sopenharmony_ci (b'Y', "(c|t)"), 63c67d6573Sopenharmony_ci ]; // combined into one regex in `replace_all` 64c67d6573Sopenharmony_ci let seq = replace_all(&seq, substs); 65c67d6573Sopenharmony_ci 66c67d6573Sopenharmony_ci for (variant, count) in counts { 67c67d6573Sopenharmony_ci println!("{} {}", variant, count.join().unwrap()); 68c67d6573Sopenharmony_ci } 69c67d6573Sopenharmony_ci println!("\n{}\n{}\n{}", ilen, clen, seq.len()); 70c67d6573Sopenharmony_ci} 71c67d6573Sopenharmony_ci 72c67d6573Sopenharmony_cifn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String { 73c67d6573Sopenharmony_ci let mut replacements = vec![""; 256]; 74c67d6573Sopenharmony_ci let mut alternates = vec![]; 75c67d6573Sopenharmony_ci for (re, replacement) in substs { 76c67d6573Sopenharmony_ci replacements[re as usize] = replacement; 77c67d6573Sopenharmony_ci alternates.push((re as char).to_string()); 78c67d6573Sopenharmony_ci } 79c67d6573Sopenharmony_ci 80c67d6573Sopenharmony_ci let re = regex!(&alternates.join("|")); 81c67d6573Sopenharmony_ci let mut new = String::with_capacity(text.len()); 82c67d6573Sopenharmony_ci let mut last_match = 0; 83c67d6573Sopenharmony_ci for m in re.find_iter(text) { 84c67d6573Sopenharmony_ci new.push_str(&text[last_match..m.start()]); 85c67d6573Sopenharmony_ci new.push_str(replacements[text.as_bytes()[m.start()] as usize]); 86c67d6573Sopenharmony_ci last_match = m.end(); 87c67d6573Sopenharmony_ci } 88c67d6573Sopenharmony_ci new.push_str(&text[last_match..]); 89c67d6573Sopenharmony_ci new 90c67d6573Sopenharmony_ci} 91