1use std::str; 2 3use crate::find_byte::find_byte; 4 5use crate::re_bytes; 6use crate::re_unicode; 7 8pub fn expand_str( 9 caps: &re_unicode::Captures<'_>, 10 mut replacement: &str, 11 dst: &mut String, 12) { 13 while !replacement.is_empty() { 14 match find_byte(b'$', replacement.as_bytes()) { 15 None => break, 16 Some(i) => { 17 dst.push_str(&replacement[..i]); 18 replacement = &replacement[i..]; 19 } 20 } 21 if replacement.as_bytes().get(1).map_or(false, |&b| b == b'$') { 22 dst.push_str("$"); 23 replacement = &replacement[2..]; 24 continue; 25 } 26 debug_assert!(!replacement.is_empty()); 27 let cap_ref = match find_cap_ref(replacement.as_bytes()) { 28 Some(cap_ref) => cap_ref, 29 None => { 30 dst.push_str("$"); 31 replacement = &replacement[1..]; 32 continue; 33 } 34 }; 35 replacement = &replacement[cap_ref.end..]; 36 match cap_ref.cap { 37 Ref::Number(i) => { 38 dst.push_str(caps.get(i).map(|m| m.as_str()).unwrap_or("")); 39 } 40 Ref::Named(name) => { 41 dst.push_str( 42 caps.name(name).map(|m| m.as_str()).unwrap_or(""), 43 ); 44 } 45 } 46 } 47 dst.push_str(replacement); 48} 49 50pub fn expand_bytes( 51 caps: &re_bytes::Captures<'_>, 52 mut replacement: &[u8], 53 dst: &mut Vec<u8>, 54) { 55 while !replacement.is_empty() { 56 match find_byte(b'$', replacement) { 57 None => break, 58 Some(i) => { 59 dst.extend(&replacement[..i]); 60 replacement = &replacement[i..]; 61 } 62 } 63 if replacement.get(1).map_or(false, |&b| b == b'$') { 64 dst.push(b'$'); 65 replacement = &replacement[2..]; 66 continue; 67 } 68 debug_assert!(!replacement.is_empty()); 69 let cap_ref = match find_cap_ref(replacement) { 70 Some(cap_ref) => cap_ref, 71 None => { 72 dst.push(b'$'); 73 replacement = &replacement[1..]; 74 continue; 75 } 76 }; 77 replacement = &replacement[cap_ref.end..]; 78 match cap_ref.cap { 79 Ref::Number(i) => { 80 dst.extend(caps.get(i).map(|m| m.as_bytes()).unwrap_or(b"")); 81 } 82 Ref::Named(name) => { 83 dst.extend( 84 caps.name(name).map(|m| m.as_bytes()).unwrap_or(b""), 85 ); 86 } 87 } 88 } 89 dst.extend(replacement); 90} 91 92/// `CaptureRef` represents a reference to a capture group inside some text. 93/// The reference is either a capture group name or a number. 94/// 95/// It is also tagged with the position in the text following the 96/// capture reference. 97#[derive(Clone, Copy, Debug, Eq, PartialEq)] 98struct CaptureRef<'a> { 99 cap: Ref<'a>, 100 end: usize, 101} 102 103/// A reference to a capture group in some text. 104/// 105/// e.g., `$2`, `$foo`, `${foo}`. 106#[derive(Clone, Copy, Debug, Eq, PartialEq)] 107enum Ref<'a> { 108 Named(&'a str), 109 Number(usize), 110} 111 112impl<'a> From<&'a str> for Ref<'a> { 113 fn from(x: &'a str) -> Ref<'a> { 114 Ref::Named(x) 115 } 116} 117 118impl From<usize> for Ref<'static> { 119 fn from(x: usize) -> Ref<'static> { 120 Ref::Number(x) 121 } 122} 123 124/// Parses a possible reference to a capture group name in the given text, 125/// starting at the beginning of `replacement`. 126/// 127/// If no such valid reference could be found, None is returned. 128fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> { 129 let mut i = 0; 130 let rep: &[u8] = replacement; 131 if rep.len() <= 1 || rep[0] != b'$' { 132 return None; 133 } 134 i += 1; 135 if rep[i] == b'{' { 136 return find_cap_ref_braced(rep, i + 1); 137 } 138 let mut cap_end = i; 139 while rep.get(cap_end).copied().map_or(false, is_valid_cap_letter) { 140 cap_end += 1; 141 } 142 if cap_end == i { 143 return None; 144 } 145 // We just verified that the range 0..cap_end is valid ASCII, so it must 146 // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8 147 // check via an unchecked conversion or by parsing the number straight from 148 // &[u8]. 149 let cap = 150 str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name"); 151 Some(CaptureRef { 152 cap: match cap.parse::<u32>() { 153 Ok(i) => Ref::Number(i as usize), 154 Err(_) => Ref::Named(cap), 155 }, 156 end: cap_end, 157 }) 158} 159 160fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef<'_>> { 161 let start = i; 162 while rep.get(i).map_or(false, |&b| b != b'}') { 163 i += 1; 164 } 165 if !rep.get(i).map_or(false, |&b| b == b'}') { 166 return None; 167 } 168 // When looking at braced names, we don't put any restrictions on the name, 169 // so it's possible it could be invalid UTF-8. But a capture group name 170 // can never be invalid UTF-8, so if we have invalid UTF-8, then we can 171 // safely return None. 172 let cap = match str::from_utf8(&rep[start..i]) { 173 Err(_) => return None, 174 Ok(cap) => cap, 175 }; 176 Some(CaptureRef { 177 cap: match cap.parse::<u32>() { 178 Ok(i) => Ref::Number(i as usize), 179 Err(_) => Ref::Named(cap), 180 }, 181 end: i + 1, 182 }) 183} 184 185/// Returns true if and only if the given byte is allowed in a capture name. 186fn is_valid_cap_letter(b: u8) -> bool { 187 match b { 188 b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true, 189 _ => false, 190 } 191} 192 193#[cfg(test)] 194mod tests { 195 use super::{find_cap_ref, CaptureRef}; 196 197 macro_rules! find { 198 ($name:ident, $text:expr) => { 199 #[test] 200 fn $name() { 201 assert_eq!(None, find_cap_ref($text.as_bytes())); 202 } 203 }; 204 ($name:ident, $text:expr, $capref:expr) => { 205 #[test] 206 fn $name() { 207 assert_eq!(Some($capref), find_cap_ref($text.as_bytes())); 208 } 209 }; 210 } 211 212 macro_rules! c { 213 ($name_or_number:expr, $pos:expr) => { 214 CaptureRef { cap: $name_or_number.into(), end: $pos } 215 }; 216 } 217 218 find!(find_cap_ref1, "$foo", c!("foo", 4)); 219 find!(find_cap_ref2, "${foo}", c!("foo", 6)); 220 find!(find_cap_ref3, "$0", c!(0, 2)); 221 find!(find_cap_ref4, "$5", c!(5, 2)); 222 find!(find_cap_ref5, "$10", c!(10, 3)); 223 // See https://github.com/rust-lang/regex/pull/585 224 // for more on characters following numbers 225 find!(find_cap_ref6, "$42a", c!("42a", 4)); 226 find!(find_cap_ref7, "${42}a", c!(42, 5)); 227 find!(find_cap_ref8, "${42"); 228 find!(find_cap_ref9, "${42 "); 229 find!(find_cap_ref10, " $0 "); 230 find!(find_cap_ref11, "$"); 231 find!(find_cap_ref12, " "); 232 find!(find_cap_ref13, ""); 233 find!(find_cap_ref14, "$1-$2", c!(1, 2)); 234 find!(find_cap_ref15, "$1_$2", c!("1_", 3)); 235 find!(find_cap_ref16, "$x-$y", c!("x", 2)); 236 find!(find_cap_ref17, "$x_$y", c!("x_", 3)); 237 find!(find_cap_ref18, "${#}", c!("#", 4)); 238 find!(find_cap_ref19, "${Z[}", c!("Z[", 5)); 239} 240