1c67d6573Sopenharmony_ciuse std::str; 2c67d6573Sopenharmony_ci 3c67d6573Sopenharmony_ciuse crate::find_byte::find_byte; 4c67d6573Sopenharmony_ci 5c67d6573Sopenharmony_ciuse crate::re_bytes; 6c67d6573Sopenharmony_ciuse crate::re_unicode; 7c67d6573Sopenharmony_ci 8c67d6573Sopenharmony_cipub fn expand_str( 9c67d6573Sopenharmony_ci caps: &re_unicode::Captures<'_>, 10c67d6573Sopenharmony_ci mut replacement: &str, 11c67d6573Sopenharmony_ci dst: &mut String, 12c67d6573Sopenharmony_ci) { 13c67d6573Sopenharmony_ci while !replacement.is_empty() { 14c67d6573Sopenharmony_ci match find_byte(b'$', replacement.as_bytes()) { 15c67d6573Sopenharmony_ci None => break, 16c67d6573Sopenharmony_ci Some(i) => { 17c67d6573Sopenharmony_ci dst.push_str(&replacement[..i]); 18c67d6573Sopenharmony_ci replacement = &replacement[i..]; 19c67d6573Sopenharmony_ci } 20c67d6573Sopenharmony_ci } 21c67d6573Sopenharmony_ci if replacement.as_bytes().get(1).map_or(false, |&b| b == b'$') { 22c67d6573Sopenharmony_ci dst.push_str("$"); 23c67d6573Sopenharmony_ci replacement = &replacement[2..]; 24c67d6573Sopenharmony_ci continue; 25c67d6573Sopenharmony_ci } 26c67d6573Sopenharmony_ci debug_assert!(!replacement.is_empty()); 27c67d6573Sopenharmony_ci let cap_ref = match find_cap_ref(replacement.as_bytes()) { 28c67d6573Sopenharmony_ci Some(cap_ref) => cap_ref, 29c67d6573Sopenharmony_ci None => { 30c67d6573Sopenharmony_ci dst.push_str("$"); 31c67d6573Sopenharmony_ci replacement = &replacement[1..]; 32c67d6573Sopenharmony_ci continue; 33c67d6573Sopenharmony_ci } 34c67d6573Sopenharmony_ci }; 35c67d6573Sopenharmony_ci replacement = &replacement[cap_ref.end..]; 36c67d6573Sopenharmony_ci match cap_ref.cap { 37c67d6573Sopenharmony_ci Ref::Number(i) => { 38c67d6573Sopenharmony_ci dst.push_str(caps.get(i).map(|m| m.as_str()).unwrap_or("")); 39c67d6573Sopenharmony_ci } 40c67d6573Sopenharmony_ci Ref::Named(name) => { 41c67d6573Sopenharmony_ci dst.push_str( 42c67d6573Sopenharmony_ci caps.name(name).map(|m| m.as_str()).unwrap_or(""), 43c67d6573Sopenharmony_ci ); 44c67d6573Sopenharmony_ci } 45c67d6573Sopenharmony_ci } 46c67d6573Sopenharmony_ci } 47c67d6573Sopenharmony_ci dst.push_str(replacement); 48c67d6573Sopenharmony_ci} 49c67d6573Sopenharmony_ci 50c67d6573Sopenharmony_cipub fn expand_bytes( 51c67d6573Sopenharmony_ci caps: &re_bytes::Captures<'_>, 52c67d6573Sopenharmony_ci mut replacement: &[u8], 53c67d6573Sopenharmony_ci dst: &mut Vec<u8>, 54c67d6573Sopenharmony_ci) { 55c67d6573Sopenharmony_ci while !replacement.is_empty() { 56c67d6573Sopenharmony_ci match find_byte(b'$', replacement) { 57c67d6573Sopenharmony_ci None => break, 58c67d6573Sopenharmony_ci Some(i) => { 59c67d6573Sopenharmony_ci dst.extend(&replacement[..i]); 60c67d6573Sopenharmony_ci replacement = &replacement[i..]; 61c67d6573Sopenharmony_ci } 62c67d6573Sopenharmony_ci } 63c67d6573Sopenharmony_ci if replacement.get(1).map_or(false, |&b| b == b'$') { 64c67d6573Sopenharmony_ci dst.push(b'$'); 65c67d6573Sopenharmony_ci replacement = &replacement[2..]; 66c67d6573Sopenharmony_ci continue; 67c67d6573Sopenharmony_ci } 68c67d6573Sopenharmony_ci debug_assert!(!replacement.is_empty()); 69c67d6573Sopenharmony_ci let cap_ref = match find_cap_ref(replacement) { 70c67d6573Sopenharmony_ci Some(cap_ref) => cap_ref, 71c67d6573Sopenharmony_ci None => { 72c67d6573Sopenharmony_ci dst.push(b'$'); 73c67d6573Sopenharmony_ci replacement = &replacement[1..]; 74c67d6573Sopenharmony_ci continue; 75c67d6573Sopenharmony_ci } 76c67d6573Sopenharmony_ci }; 77c67d6573Sopenharmony_ci replacement = &replacement[cap_ref.end..]; 78c67d6573Sopenharmony_ci match cap_ref.cap { 79c67d6573Sopenharmony_ci Ref::Number(i) => { 80c67d6573Sopenharmony_ci dst.extend(caps.get(i).map(|m| m.as_bytes()).unwrap_or(b"")); 81c67d6573Sopenharmony_ci } 82c67d6573Sopenharmony_ci Ref::Named(name) => { 83c67d6573Sopenharmony_ci dst.extend( 84c67d6573Sopenharmony_ci caps.name(name).map(|m| m.as_bytes()).unwrap_or(b""), 85c67d6573Sopenharmony_ci ); 86c67d6573Sopenharmony_ci } 87c67d6573Sopenharmony_ci } 88c67d6573Sopenharmony_ci } 89c67d6573Sopenharmony_ci dst.extend(replacement); 90c67d6573Sopenharmony_ci} 91c67d6573Sopenharmony_ci 92c67d6573Sopenharmony_ci/// `CaptureRef` represents a reference to a capture group inside some text. 93c67d6573Sopenharmony_ci/// The reference is either a capture group name or a number. 94c67d6573Sopenharmony_ci/// 95c67d6573Sopenharmony_ci/// It is also tagged with the position in the text following the 96c67d6573Sopenharmony_ci/// capture reference. 97c67d6573Sopenharmony_ci#[derive(Clone, Copy, Debug, Eq, PartialEq)] 98c67d6573Sopenharmony_cistruct CaptureRef<'a> { 99c67d6573Sopenharmony_ci cap: Ref<'a>, 100c67d6573Sopenharmony_ci end: usize, 101c67d6573Sopenharmony_ci} 102c67d6573Sopenharmony_ci 103c67d6573Sopenharmony_ci/// A reference to a capture group in some text. 104c67d6573Sopenharmony_ci/// 105c67d6573Sopenharmony_ci/// e.g., `$2`, `$foo`, `${foo}`. 106c67d6573Sopenharmony_ci#[derive(Clone, Copy, Debug, Eq, PartialEq)] 107c67d6573Sopenharmony_cienum Ref<'a> { 108c67d6573Sopenharmony_ci Named(&'a str), 109c67d6573Sopenharmony_ci Number(usize), 110c67d6573Sopenharmony_ci} 111c67d6573Sopenharmony_ci 112c67d6573Sopenharmony_ciimpl<'a> From<&'a str> for Ref<'a> { 113c67d6573Sopenharmony_ci fn from(x: &'a str) -> Ref<'a> { 114c67d6573Sopenharmony_ci Ref::Named(x) 115c67d6573Sopenharmony_ci } 116c67d6573Sopenharmony_ci} 117c67d6573Sopenharmony_ci 118c67d6573Sopenharmony_ciimpl From<usize> for Ref<'static> { 119c67d6573Sopenharmony_ci fn from(x: usize) -> Ref<'static> { 120c67d6573Sopenharmony_ci Ref::Number(x) 121c67d6573Sopenharmony_ci } 122c67d6573Sopenharmony_ci} 123c67d6573Sopenharmony_ci 124c67d6573Sopenharmony_ci/// Parses a possible reference to a capture group name in the given text, 125c67d6573Sopenharmony_ci/// starting at the beginning of `replacement`. 126c67d6573Sopenharmony_ci/// 127c67d6573Sopenharmony_ci/// If no such valid reference could be found, None is returned. 128c67d6573Sopenharmony_cifn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> { 129c67d6573Sopenharmony_ci let mut i = 0; 130c67d6573Sopenharmony_ci let rep: &[u8] = replacement; 131c67d6573Sopenharmony_ci if rep.len() <= 1 || rep[0] != b'$' { 132c67d6573Sopenharmony_ci return None; 133c67d6573Sopenharmony_ci } 134c67d6573Sopenharmony_ci i += 1; 135c67d6573Sopenharmony_ci if rep[i] == b'{' { 136c67d6573Sopenharmony_ci return find_cap_ref_braced(rep, i + 1); 137c67d6573Sopenharmony_ci } 138c67d6573Sopenharmony_ci let mut cap_end = i; 139c67d6573Sopenharmony_ci while rep.get(cap_end).copied().map_or(false, is_valid_cap_letter) { 140c67d6573Sopenharmony_ci cap_end += 1; 141c67d6573Sopenharmony_ci } 142c67d6573Sopenharmony_ci if cap_end == i { 143c67d6573Sopenharmony_ci return None; 144c67d6573Sopenharmony_ci } 145c67d6573Sopenharmony_ci // We just verified that the range 0..cap_end is valid ASCII, so it must 146c67d6573Sopenharmony_ci // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8 147c67d6573Sopenharmony_ci // check via an unchecked conversion or by parsing the number straight from 148c67d6573Sopenharmony_ci // &[u8]. 149c67d6573Sopenharmony_ci let cap = 150c67d6573Sopenharmony_ci str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name"); 151c67d6573Sopenharmony_ci Some(CaptureRef { 152c67d6573Sopenharmony_ci cap: match cap.parse::<u32>() { 153c67d6573Sopenharmony_ci Ok(i) => Ref::Number(i as usize), 154c67d6573Sopenharmony_ci Err(_) => Ref::Named(cap), 155c67d6573Sopenharmony_ci }, 156c67d6573Sopenharmony_ci end: cap_end, 157c67d6573Sopenharmony_ci }) 158c67d6573Sopenharmony_ci} 159c67d6573Sopenharmony_ci 160c67d6573Sopenharmony_cifn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef<'_>> { 161c67d6573Sopenharmony_ci let start = i; 162c67d6573Sopenharmony_ci while rep.get(i).map_or(false, |&b| b != b'}') { 163c67d6573Sopenharmony_ci i += 1; 164c67d6573Sopenharmony_ci } 165c67d6573Sopenharmony_ci if !rep.get(i).map_or(false, |&b| b == b'}') { 166c67d6573Sopenharmony_ci return None; 167c67d6573Sopenharmony_ci } 168c67d6573Sopenharmony_ci // When looking at braced names, we don't put any restrictions on the name, 169c67d6573Sopenharmony_ci // so it's possible it could be invalid UTF-8. But a capture group name 170c67d6573Sopenharmony_ci // can never be invalid UTF-8, so if we have invalid UTF-8, then we can 171c67d6573Sopenharmony_ci // safely return None. 172c67d6573Sopenharmony_ci let cap = match str::from_utf8(&rep[start..i]) { 173c67d6573Sopenharmony_ci Err(_) => return None, 174c67d6573Sopenharmony_ci Ok(cap) => cap, 175c67d6573Sopenharmony_ci }; 176c67d6573Sopenharmony_ci Some(CaptureRef { 177c67d6573Sopenharmony_ci cap: match cap.parse::<u32>() { 178c67d6573Sopenharmony_ci Ok(i) => Ref::Number(i as usize), 179c67d6573Sopenharmony_ci Err(_) => Ref::Named(cap), 180c67d6573Sopenharmony_ci }, 181c67d6573Sopenharmony_ci end: i + 1, 182c67d6573Sopenharmony_ci }) 183c67d6573Sopenharmony_ci} 184c67d6573Sopenharmony_ci 185c67d6573Sopenharmony_ci/// Returns true if and only if the given byte is allowed in a capture name. 186c67d6573Sopenharmony_cifn is_valid_cap_letter(b: u8) -> bool { 187c67d6573Sopenharmony_ci match b { 188c67d6573Sopenharmony_ci b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true, 189c67d6573Sopenharmony_ci _ => false, 190c67d6573Sopenharmony_ci } 191c67d6573Sopenharmony_ci} 192c67d6573Sopenharmony_ci 193c67d6573Sopenharmony_ci#[cfg(test)] 194c67d6573Sopenharmony_cimod tests { 195c67d6573Sopenharmony_ci use super::{find_cap_ref, CaptureRef}; 196c67d6573Sopenharmony_ci 197c67d6573Sopenharmony_ci macro_rules! find { 198c67d6573Sopenharmony_ci ($name:ident, $text:expr) => { 199c67d6573Sopenharmony_ci #[test] 200c67d6573Sopenharmony_ci fn $name() { 201c67d6573Sopenharmony_ci assert_eq!(None, find_cap_ref($text.as_bytes())); 202c67d6573Sopenharmony_ci } 203c67d6573Sopenharmony_ci }; 204c67d6573Sopenharmony_ci ($name:ident, $text:expr, $capref:expr) => { 205c67d6573Sopenharmony_ci #[test] 206c67d6573Sopenharmony_ci fn $name() { 207c67d6573Sopenharmony_ci assert_eq!(Some($capref), find_cap_ref($text.as_bytes())); 208c67d6573Sopenharmony_ci } 209c67d6573Sopenharmony_ci }; 210c67d6573Sopenharmony_ci } 211c67d6573Sopenharmony_ci 212c67d6573Sopenharmony_ci macro_rules! c { 213c67d6573Sopenharmony_ci ($name_or_number:expr, $pos:expr) => { 214c67d6573Sopenharmony_ci CaptureRef { cap: $name_or_number.into(), end: $pos } 215c67d6573Sopenharmony_ci }; 216c67d6573Sopenharmony_ci } 217c67d6573Sopenharmony_ci 218c67d6573Sopenharmony_ci find!(find_cap_ref1, "$foo", c!("foo", 4)); 219c67d6573Sopenharmony_ci find!(find_cap_ref2, "${foo}", c!("foo", 6)); 220c67d6573Sopenharmony_ci find!(find_cap_ref3, "$0", c!(0, 2)); 221c67d6573Sopenharmony_ci find!(find_cap_ref4, "$5", c!(5, 2)); 222c67d6573Sopenharmony_ci find!(find_cap_ref5, "$10", c!(10, 3)); 223c67d6573Sopenharmony_ci // See https://github.com/rust-lang/regex/pull/585 224c67d6573Sopenharmony_ci // for more on characters following numbers 225c67d6573Sopenharmony_ci find!(find_cap_ref6, "$42a", c!("42a", 4)); 226c67d6573Sopenharmony_ci find!(find_cap_ref7, "${42}a", c!(42, 5)); 227c67d6573Sopenharmony_ci find!(find_cap_ref8, "${42"); 228c67d6573Sopenharmony_ci find!(find_cap_ref9, "${42 "); 229c67d6573Sopenharmony_ci find!(find_cap_ref10, " $0 "); 230c67d6573Sopenharmony_ci find!(find_cap_ref11, "$"); 231c67d6573Sopenharmony_ci find!(find_cap_ref12, " "); 232c67d6573Sopenharmony_ci find!(find_cap_ref13, ""); 233c67d6573Sopenharmony_ci find!(find_cap_ref14, "$1-$2", c!(1, 2)); 234c67d6573Sopenharmony_ci find!(find_cap_ref15, "$1_$2", c!("1_", 3)); 235c67d6573Sopenharmony_ci find!(find_cap_ref16, "$x-$y", c!("x", 2)); 236c67d6573Sopenharmony_ci find!(find_cap_ref17, "$x_$y", c!("x_", 3)); 237c67d6573Sopenharmony_ci find!(find_cap_ref18, "${#}", c!("#", 4)); 238c67d6573Sopenharmony_ci find!(find_cap_ref19, "${Z[}", c!("Z[", 5)); 239c67d6573Sopenharmony_ci} 240