1c67d6573Sopenharmony_ci// The Computer Language Benchmarks Game 2c67d6573Sopenharmony_ci// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ 3c67d6573Sopenharmony_ci// 4c67d6573Sopenharmony_ci// contributed by the Rust Project Developers 5c67d6573Sopenharmony_ci// contributed by TeXitoi 6c67d6573Sopenharmony_ci// contributed by BurntSushi 7c67d6573Sopenharmony_ci 8c67d6573Sopenharmony_ciuse std::io::{self, Read}; 9c67d6573Sopenharmony_ci 10c67d6573Sopenharmony_cimacro_rules! regex { 11c67d6573Sopenharmony_ci ($re:expr) => { 12c67d6573Sopenharmony_ci ::regex::Regex::new($re).unwrap() 13c67d6573Sopenharmony_ci }; 14c67d6573Sopenharmony_ci} 15c67d6573Sopenharmony_ci 16c67d6573Sopenharmony_cifn main() { 17c67d6573Sopenharmony_ci let mut seq = String::with_capacity(50 * (1 << 20)); 18c67d6573Sopenharmony_ci io::stdin().read_to_string(&mut seq).unwrap(); 19c67d6573Sopenharmony_ci let ilen = seq.len(); 20c67d6573Sopenharmony_ci 21c67d6573Sopenharmony_ci seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); 22c67d6573Sopenharmony_ci let clen = seq.len(); 23c67d6573Sopenharmony_ci 24c67d6573Sopenharmony_ci let variants = vec![ 25c67d6573Sopenharmony_ci regex!("agggtaaa|tttaccct"), 26c67d6573Sopenharmony_ci regex!("[cgt]gggtaaa|tttaccc[acg]"), 27c67d6573Sopenharmony_ci regex!("a[act]ggtaaa|tttacc[agt]t"), 28c67d6573Sopenharmony_ci regex!("ag[act]gtaaa|tttac[agt]ct"), 29c67d6573Sopenharmony_ci regex!("agg[act]taaa|ttta[agt]cct"), 30c67d6573Sopenharmony_ci regex!("aggg[acg]aaa|ttt[cgt]ccct"), 31c67d6573Sopenharmony_ci regex!("agggt[cgt]aa|tt[acg]accct"), 32c67d6573Sopenharmony_ci regex!("agggta[cgt]a|t[acg]taccct"), 33c67d6573Sopenharmony_ci regex!("agggtaa[cgt]|[acg]ttaccct"), 34c67d6573Sopenharmony_ci ]; 35c67d6573Sopenharmony_ci for re in variants { 36c67d6573Sopenharmony_ci println!("{} {}", re.to_string(), re.find_iter(&seq).count()); 37c67d6573Sopenharmony_ci } 38c67d6573Sopenharmony_ci 39c67d6573Sopenharmony_ci let substs = vec![ 40c67d6573Sopenharmony_ci (b'B', "(c|g|t)"), 41c67d6573Sopenharmony_ci (b'D', "(a|g|t)"), 42c67d6573Sopenharmony_ci (b'H', "(a|c|t)"), 43c67d6573Sopenharmony_ci (b'K', "(g|t)"), 44c67d6573Sopenharmony_ci (b'M', "(a|c)"), 45c67d6573Sopenharmony_ci (b'N', "(a|c|g|t)"), 46c67d6573Sopenharmony_ci (b'R', "(a|g)"), 47c67d6573Sopenharmony_ci (b'S', "(c|g)"), 48c67d6573Sopenharmony_ci (b'V', "(a|c|g)"), 49c67d6573Sopenharmony_ci (b'W', "(a|t)"), 50c67d6573Sopenharmony_ci (b'Y', "(c|t)"), 51c67d6573Sopenharmony_ci ]; // combined into one regex in `replace_all` 52c67d6573Sopenharmony_ci let seq = replace_all(&seq, substs); 53c67d6573Sopenharmony_ci 54c67d6573Sopenharmony_ci println!("\n{}\n{}\n{}", ilen, clen, seq.len()); 55c67d6573Sopenharmony_ci} 56c67d6573Sopenharmony_ci 57c67d6573Sopenharmony_cifn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String { 58c67d6573Sopenharmony_ci let mut replacements = vec![""; 256]; 59c67d6573Sopenharmony_ci let mut alternates = vec![]; 60c67d6573Sopenharmony_ci for (re, replacement) in substs { 61c67d6573Sopenharmony_ci replacements[re as usize] = replacement; 62c67d6573Sopenharmony_ci alternates.push((re as char).to_string()); 63c67d6573Sopenharmony_ci } 64c67d6573Sopenharmony_ci 65c67d6573Sopenharmony_ci let re = regex!(&alternates.join("|")); 66c67d6573Sopenharmony_ci let mut new = String::with_capacity(text.len()); 67c67d6573Sopenharmony_ci let mut last_match = 0; 68c67d6573Sopenharmony_ci for m in re.find_iter(text) { 69c67d6573Sopenharmony_ci new.push_str(&text[last_match..m.start()]); 70c67d6573Sopenharmony_ci new.push_str(replacements[text.as_bytes()[m.start()] as usize]); 71c67d6573Sopenharmony_ci last_match = m.end(); 72c67d6573Sopenharmony_ci } 73c67d6573Sopenharmony_ci new.push_str(&text[last_match..]); 74c67d6573Sopenharmony_ci new 75c67d6573Sopenharmony_ci} 76