1c67d6573Sopenharmony_ciuse std::str;
2c67d6573Sopenharmony_ci
3c67d6573Sopenharmony_ciuse crate::find_byte::find_byte;
4c67d6573Sopenharmony_ci
5c67d6573Sopenharmony_ciuse crate::re_bytes;
6c67d6573Sopenharmony_ciuse crate::re_unicode;
7c67d6573Sopenharmony_ci
8c67d6573Sopenharmony_cipub fn expand_str(
9c67d6573Sopenharmony_ci    caps: &re_unicode::Captures<'_>,
10c67d6573Sopenharmony_ci    mut replacement: &str,
11c67d6573Sopenharmony_ci    dst: &mut String,
12c67d6573Sopenharmony_ci) {
13c67d6573Sopenharmony_ci    while !replacement.is_empty() {
14c67d6573Sopenharmony_ci        match find_byte(b'$', replacement.as_bytes()) {
15c67d6573Sopenharmony_ci            None => break,
16c67d6573Sopenharmony_ci            Some(i) => {
17c67d6573Sopenharmony_ci                dst.push_str(&replacement[..i]);
18c67d6573Sopenharmony_ci                replacement = &replacement[i..];
19c67d6573Sopenharmony_ci            }
20c67d6573Sopenharmony_ci        }
21c67d6573Sopenharmony_ci        if replacement.as_bytes().get(1).map_or(false, |&b| b == b'$') {
22c67d6573Sopenharmony_ci            dst.push_str("$");
23c67d6573Sopenharmony_ci            replacement = &replacement[2..];
24c67d6573Sopenharmony_ci            continue;
25c67d6573Sopenharmony_ci        }
26c67d6573Sopenharmony_ci        debug_assert!(!replacement.is_empty());
27c67d6573Sopenharmony_ci        let cap_ref = match find_cap_ref(replacement.as_bytes()) {
28c67d6573Sopenharmony_ci            Some(cap_ref) => cap_ref,
29c67d6573Sopenharmony_ci            None => {
30c67d6573Sopenharmony_ci                dst.push_str("$");
31c67d6573Sopenharmony_ci                replacement = &replacement[1..];
32c67d6573Sopenharmony_ci                continue;
33c67d6573Sopenharmony_ci            }
34c67d6573Sopenharmony_ci        };
35c67d6573Sopenharmony_ci        replacement = &replacement[cap_ref.end..];
36c67d6573Sopenharmony_ci        match cap_ref.cap {
37c67d6573Sopenharmony_ci            Ref::Number(i) => {
38c67d6573Sopenharmony_ci                dst.push_str(caps.get(i).map(|m| m.as_str()).unwrap_or(""));
39c67d6573Sopenharmony_ci            }
40c67d6573Sopenharmony_ci            Ref::Named(name) => {
41c67d6573Sopenharmony_ci                dst.push_str(
42c67d6573Sopenharmony_ci                    caps.name(name).map(|m| m.as_str()).unwrap_or(""),
43c67d6573Sopenharmony_ci                );
44c67d6573Sopenharmony_ci            }
45c67d6573Sopenharmony_ci        }
46c67d6573Sopenharmony_ci    }
47c67d6573Sopenharmony_ci    dst.push_str(replacement);
48c67d6573Sopenharmony_ci}
49c67d6573Sopenharmony_ci
50c67d6573Sopenharmony_cipub fn expand_bytes(
51c67d6573Sopenharmony_ci    caps: &re_bytes::Captures<'_>,
52c67d6573Sopenharmony_ci    mut replacement: &[u8],
53c67d6573Sopenharmony_ci    dst: &mut Vec<u8>,
54c67d6573Sopenharmony_ci) {
55c67d6573Sopenharmony_ci    while !replacement.is_empty() {
56c67d6573Sopenharmony_ci        match find_byte(b'$', replacement) {
57c67d6573Sopenharmony_ci            None => break,
58c67d6573Sopenharmony_ci            Some(i) => {
59c67d6573Sopenharmony_ci                dst.extend(&replacement[..i]);
60c67d6573Sopenharmony_ci                replacement = &replacement[i..];
61c67d6573Sopenharmony_ci            }
62c67d6573Sopenharmony_ci        }
63c67d6573Sopenharmony_ci        if replacement.get(1).map_or(false, |&b| b == b'$') {
64c67d6573Sopenharmony_ci            dst.push(b'$');
65c67d6573Sopenharmony_ci            replacement = &replacement[2..];
66c67d6573Sopenharmony_ci            continue;
67c67d6573Sopenharmony_ci        }
68c67d6573Sopenharmony_ci        debug_assert!(!replacement.is_empty());
69c67d6573Sopenharmony_ci        let cap_ref = match find_cap_ref(replacement) {
70c67d6573Sopenharmony_ci            Some(cap_ref) => cap_ref,
71c67d6573Sopenharmony_ci            None => {
72c67d6573Sopenharmony_ci                dst.push(b'$');
73c67d6573Sopenharmony_ci                replacement = &replacement[1..];
74c67d6573Sopenharmony_ci                continue;
75c67d6573Sopenharmony_ci            }
76c67d6573Sopenharmony_ci        };
77c67d6573Sopenharmony_ci        replacement = &replacement[cap_ref.end..];
78c67d6573Sopenharmony_ci        match cap_ref.cap {
79c67d6573Sopenharmony_ci            Ref::Number(i) => {
80c67d6573Sopenharmony_ci                dst.extend(caps.get(i).map(|m| m.as_bytes()).unwrap_or(b""));
81c67d6573Sopenharmony_ci            }
82c67d6573Sopenharmony_ci            Ref::Named(name) => {
83c67d6573Sopenharmony_ci                dst.extend(
84c67d6573Sopenharmony_ci                    caps.name(name).map(|m| m.as_bytes()).unwrap_or(b""),
85c67d6573Sopenharmony_ci                );
86c67d6573Sopenharmony_ci            }
87c67d6573Sopenharmony_ci        }
88c67d6573Sopenharmony_ci    }
89c67d6573Sopenharmony_ci    dst.extend(replacement);
90c67d6573Sopenharmony_ci}
91c67d6573Sopenharmony_ci
92c67d6573Sopenharmony_ci/// `CaptureRef` represents a reference to a capture group inside some text.
93c67d6573Sopenharmony_ci/// The reference is either a capture group name or a number.
94c67d6573Sopenharmony_ci///
95c67d6573Sopenharmony_ci/// It is also tagged with the position in the text following the
96c67d6573Sopenharmony_ci/// capture reference.
97c67d6573Sopenharmony_ci#[derive(Clone, Copy, Debug, Eq, PartialEq)]
98c67d6573Sopenharmony_cistruct CaptureRef<'a> {
99c67d6573Sopenharmony_ci    cap: Ref<'a>,
100c67d6573Sopenharmony_ci    end: usize,
101c67d6573Sopenharmony_ci}
102c67d6573Sopenharmony_ci
103c67d6573Sopenharmony_ci/// A reference to a capture group in some text.
104c67d6573Sopenharmony_ci///
105c67d6573Sopenharmony_ci/// e.g., `$2`, `$foo`, `${foo}`.
106c67d6573Sopenharmony_ci#[derive(Clone, Copy, Debug, Eq, PartialEq)]
107c67d6573Sopenharmony_cienum Ref<'a> {
108c67d6573Sopenharmony_ci    Named(&'a str),
109c67d6573Sopenharmony_ci    Number(usize),
110c67d6573Sopenharmony_ci}
111c67d6573Sopenharmony_ci
112c67d6573Sopenharmony_ciimpl<'a> From<&'a str> for Ref<'a> {
113c67d6573Sopenharmony_ci    fn from(x: &'a str) -> Ref<'a> {
114c67d6573Sopenharmony_ci        Ref::Named(x)
115c67d6573Sopenharmony_ci    }
116c67d6573Sopenharmony_ci}
117c67d6573Sopenharmony_ci
118c67d6573Sopenharmony_ciimpl From<usize> for Ref<'static> {
119c67d6573Sopenharmony_ci    fn from(x: usize) -> Ref<'static> {
120c67d6573Sopenharmony_ci        Ref::Number(x)
121c67d6573Sopenharmony_ci    }
122c67d6573Sopenharmony_ci}
123c67d6573Sopenharmony_ci
124c67d6573Sopenharmony_ci/// Parses a possible reference to a capture group name in the given text,
125c67d6573Sopenharmony_ci/// starting at the beginning of `replacement`.
126c67d6573Sopenharmony_ci///
127c67d6573Sopenharmony_ci/// If no such valid reference could be found, None is returned.
128c67d6573Sopenharmony_cifn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
129c67d6573Sopenharmony_ci    let mut i = 0;
130c67d6573Sopenharmony_ci    let rep: &[u8] = replacement;
131c67d6573Sopenharmony_ci    if rep.len() <= 1 || rep[0] != b'$' {
132c67d6573Sopenharmony_ci        return None;
133c67d6573Sopenharmony_ci    }
134c67d6573Sopenharmony_ci    i += 1;
135c67d6573Sopenharmony_ci    if rep[i] == b'{' {
136c67d6573Sopenharmony_ci        return find_cap_ref_braced(rep, i + 1);
137c67d6573Sopenharmony_ci    }
138c67d6573Sopenharmony_ci    let mut cap_end = i;
139c67d6573Sopenharmony_ci    while rep.get(cap_end).copied().map_or(false, is_valid_cap_letter) {
140c67d6573Sopenharmony_ci        cap_end += 1;
141c67d6573Sopenharmony_ci    }
142c67d6573Sopenharmony_ci    if cap_end == i {
143c67d6573Sopenharmony_ci        return None;
144c67d6573Sopenharmony_ci    }
145c67d6573Sopenharmony_ci    // We just verified that the range 0..cap_end is valid ASCII, so it must
146c67d6573Sopenharmony_ci    // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
147c67d6573Sopenharmony_ci    // check via an unchecked conversion or by parsing the number straight from
148c67d6573Sopenharmony_ci    // &[u8].
149c67d6573Sopenharmony_ci    let cap =
150c67d6573Sopenharmony_ci        str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name");
151c67d6573Sopenharmony_ci    Some(CaptureRef {
152c67d6573Sopenharmony_ci        cap: match cap.parse::<u32>() {
153c67d6573Sopenharmony_ci            Ok(i) => Ref::Number(i as usize),
154c67d6573Sopenharmony_ci            Err(_) => Ref::Named(cap),
155c67d6573Sopenharmony_ci        },
156c67d6573Sopenharmony_ci        end: cap_end,
157c67d6573Sopenharmony_ci    })
158c67d6573Sopenharmony_ci}
159c67d6573Sopenharmony_ci
160c67d6573Sopenharmony_cifn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef<'_>> {
161c67d6573Sopenharmony_ci    let start = i;
162c67d6573Sopenharmony_ci    while rep.get(i).map_or(false, |&b| b != b'}') {
163c67d6573Sopenharmony_ci        i += 1;
164c67d6573Sopenharmony_ci    }
165c67d6573Sopenharmony_ci    if !rep.get(i).map_or(false, |&b| b == b'}') {
166c67d6573Sopenharmony_ci        return None;
167c67d6573Sopenharmony_ci    }
168c67d6573Sopenharmony_ci    // When looking at braced names, we don't put any restrictions on the name,
169c67d6573Sopenharmony_ci    // so it's possible it could be invalid UTF-8. But a capture group name
170c67d6573Sopenharmony_ci    // can never be invalid UTF-8, so if we have invalid UTF-8, then we can
171c67d6573Sopenharmony_ci    // safely return None.
172c67d6573Sopenharmony_ci    let cap = match str::from_utf8(&rep[start..i]) {
173c67d6573Sopenharmony_ci        Err(_) => return None,
174c67d6573Sopenharmony_ci        Ok(cap) => cap,
175c67d6573Sopenharmony_ci    };
176c67d6573Sopenharmony_ci    Some(CaptureRef {
177c67d6573Sopenharmony_ci        cap: match cap.parse::<u32>() {
178c67d6573Sopenharmony_ci            Ok(i) => Ref::Number(i as usize),
179c67d6573Sopenharmony_ci            Err(_) => Ref::Named(cap),
180c67d6573Sopenharmony_ci        },
181c67d6573Sopenharmony_ci        end: i + 1,
182c67d6573Sopenharmony_ci    })
183c67d6573Sopenharmony_ci}
184c67d6573Sopenharmony_ci
185c67d6573Sopenharmony_ci/// Returns true if and only if the given byte is allowed in a capture name.
186c67d6573Sopenharmony_cifn is_valid_cap_letter(b: u8) -> bool {
187c67d6573Sopenharmony_ci    match b {
188c67d6573Sopenharmony_ci        b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true,
189c67d6573Sopenharmony_ci        _ => false,
190c67d6573Sopenharmony_ci    }
191c67d6573Sopenharmony_ci}
192c67d6573Sopenharmony_ci
193c67d6573Sopenharmony_ci#[cfg(test)]
194c67d6573Sopenharmony_cimod tests {
195c67d6573Sopenharmony_ci    use super::{find_cap_ref, CaptureRef};
196c67d6573Sopenharmony_ci
197c67d6573Sopenharmony_ci    macro_rules! find {
198c67d6573Sopenharmony_ci        ($name:ident, $text:expr) => {
199c67d6573Sopenharmony_ci            #[test]
200c67d6573Sopenharmony_ci            fn $name() {
201c67d6573Sopenharmony_ci                assert_eq!(None, find_cap_ref($text.as_bytes()));
202c67d6573Sopenharmony_ci            }
203c67d6573Sopenharmony_ci        };
204c67d6573Sopenharmony_ci        ($name:ident, $text:expr, $capref:expr) => {
205c67d6573Sopenharmony_ci            #[test]
206c67d6573Sopenharmony_ci            fn $name() {
207c67d6573Sopenharmony_ci                assert_eq!(Some($capref), find_cap_ref($text.as_bytes()));
208c67d6573Sopenharmony_ci            }
209c67d6573Sopenharmony_ci        };
210c67d6573Sopenharmony_ci    }
211c67d6573Sopenharmony_ci
212c67d6573Sopenharmony_ci    macro_rules! c {
213c67d6573Sopenharmony_ci        ($name_or_number:expr, $pos:expr) => {
214c67d6573Sopenharmony_ci            CaptureRef { cap: $name_or_number.into(), end: $pos }
215c67d6573Sopenharmony_ci        };
216c67d6573Sopenharmony_ci    }
217c67d6573Sopenharmony_ci
218c67d6573Sopenharmony_ci    find!(find_cap_ref1, "$foo", c!("foo", 4));
219c67d6573Sopenharmony_ci    find!(find_cap_ref2, "${foo}", c!("foo", 6));
220c67d6573Sopenharmony_ci    find!(find_cap_ref3, "$0", c!(0, 2));
221c67d6573Sopenharmony_ci    find!(find_cap_ref4, "$5", c!(5, 2));
222c67d6573Sopenharmony_ci    find!(find_cap_ref5, "$10", c!(10, 3));
223c67d6573Sopenharmony_ci    // See https://github.com/rust-lang/regex/pull/585
224c67d6573Sopenharmony_ci    // for more on characters following numbers
225c67d6573Sopenharmony_ci    find!(find_cap_ref6, "$42a", c!("42a", 4));
226c67d6573Sopenharmony_ci    find!(find_cap_ref7, "${42}a", c!(42, 5));
227c67d6573Sopenharmony_ci    find!(find_cap_ref8, "${42");
228c67d6573Sopenharmony_ci    find!(find_cap_ref9, "${42 ");
229c67d6573Sopenharmony_ci    find!(find_cap_ref10, " $0 ");
230c67d6573Sopenharmony_ci    find!(find_cap_ref11, "$");
231c67d6573Sopenharmony_ci    find!(find_cap_ref12, " ");
232c67d6573Sopenharmony_ci    find!(find_cap_ref13, "");
233c67d6573Sopenharmony_ci    find!(find_cap_ref14, "$1-$2", c!(1, 2));
234c67d6573Sopenharmony_ci    find!(find_cap_ref15, "$1_$2", c!("1_", 3));
235c67d6573Sopenharmony_ci    find!(find_cap_ref16, "$x-$y", c!("x", 2));
236c67d6573Sopenharmony_ci    find!(find_cap_ref17, "$x_$y", c!("x_", 3));
237c67d6573Sopenharmony_ci    find!(find_cap_ref18, "${#}", c!("#", 4));
238c67d6573Sopenharmony_ci    find!(find_cap_ref19, "${Z[}", c!("Z[", 5));
239c67d6573Sopenharmony_ci}
240