1c67d6573Sopenharmony_ciuse std::borrow::Cow; 2c67d6573Sopenharmony_ciuse std::collections::HashMap; 3c67d6573Sopenharmony_ciuse std::fmt; 4c67d6573Sopenharmony_ciuse std::iter::FusedIterator; 5c67d6573Sopenharmony_ciuse std::ops::{Index, Range}; 6c67d6573Sopenharmony_ciuse std::str::FromStr; 7c67d6573Sopenharmony_ciuse std::sync::Arc; 8c67d6573Sopenharmony_ci 9c67d6573Sopenharmony_ciuse crate::find_byte::find_byte; 10c67d6573Sopenharmony_ci 11c67d6573Sopenharmony_ciuse crate::error::Error; 12c67d6573Sopenharmony_ciuse crate::exec::{Exec, ExecNoSync}; 13c67d6573Sopenharmony_ciuse crate::expand::expand_bytes; 14c67d6573Sopenharmony_ciuse crate::re_builder::bytes::RegexBuilder; 15c67d6573Sopenharmony_ciuse crate::re_trait::{self, RegularExpression, SubCapturesPosIter}; 16c67d6573Sopenharmony_ci 17c67d6573Sopenharmony_ci/// Match represents a single match of a regex in a haystack. 18c67d6573Sopenharmony_ci/// 19c67d6573Sopenharmony_ci/// The lifetime parameter `'t` refers to the lifetime of the matched text. 20c67d6573Sopenharmony_ci#[derive(Copy, Clone, Debug, Eq, PartialEq)] 21c67d6573Sopenharmony_cipub struct Match<'t> { 22c67d6573Sopenharmony_ci text: &'t [u8], 23c67d6573Sopenharmony_ci start: usize, 24c67d6573Sopenharmony_ci end: usize, 25c67d6573Sopenharmony_ci} 26c67d6573Sopenharmony_ci 27c67d6573Sopenharmony_ciimpl<'t> Match<'t> { 28c67d6573Sopenharmony_ci /// Returns the starting byte offset of the match in the haystack. 29c67d6573Sopenharmony_ci #[inline] 30c67d6573Sopenharmony_ci pub fn start(&self) -> usize { 31c67d6573Sopenharmony_ci self.start 32c67d6573Sopenharmony_ci } 33c67d6573Sopenharmony_ci 34c67d6573Sopenharmony_ci /// Returns the ending byte offset of the match in the haystack. 35c67d6573Sopenharmony_ci #[inline] 36c67d6573Sopenharmony_ci pub fn end(&self) -> usize { 37c67d6573Sopenharmony_ci self.end 38c67d6573Sopenharmony_ci } 39c67d6573Sopenharmony_ci 40c67d6573Sopenharmony_ci /// Returns the range over the starting and ending byte offsets of the 41c67d6573Sopenharmony_ci /// match in the haystack. 42c67d6573Sopenharmony_ci #[inline] 43c67d6573Sopenharmony_ci pub fn range(&self) -> Range<usize> { 44c67d6573Sopenharmony_ci self.start..self.end 45c67d6573Sopenharmony_ci } 46c67d6573Sopenharmony_ci 47c67d6573Sopenharmony_ci /// Returns the matched text. 48c67d6573Sopenharmony_ci #[inline] 49c67d6573Sopenharmony_ci pub fn as_bytes(&self) -> &'t [u8] { 50c67d6573Sopenharmony_ci &self.text[self.range()] 51c67d6573Sopenharmony_ci } 52c67d6573Sopenharmony_ci 53c67d6573Sopenharmony_ci /// Creates a new match from the given haystack and byte offsets. 54c67d6573Sopenharmony_ci #[inline] 55c67d6573Sopenharmony_ci fn new(haystack: &'t [u8], start: usize, end: usize) -> Match<'t> { 56c67d6573Sopenharmony_ci Match { text: haystack, start, end } 57c67d6573Sopenharmony_ci } 58c67d6573Sopenharmony_ci} 59c67d6573Sopenharmony_ci 60c67d6573Sopenharmony_ciimpl<'t> From<Match<'t>> for Range<usize> { 61c67d6573Sopenharmony_ci fn from(m: Match<'t>) -> Range<usize> { 62c67d6573Sopenharmony_ci m.range() 63c67d6573Sopenharmony_ci } 64c67d6573Sopenharmony_ci} 65c67d6573Sopenharmony_ci 66c67d6573Sopenharmony_ci/// A compiled regular expression for matching arbitrary bytes. 67c67d6573Sopenharmony_ci/// 68c67d6573Sopenharmony_ci/// It can be used to search, split or replace text. All searching is done with 69c67d6573Sopenharmony_ci/// an implicit `.*?` at the beginning and end of an expression. To force an 70c67d6573Sopenharmony_ci/// expression to match the whole string (or a prefix or a suffix), you must 71c67d6573Sopenharmony_ci/// use an anchor like `^` or `$` (or `\A` and `\z`). 72c67d6573Sopenharmony_ci/// 73c67d6573Sopenharmony_ci/// Like the `Regex` type in the parent module, matches with this regex return 74c67d6573Sopenharmony_ci/// byte offsets into the search text. **Unlike** the parent `Regex` type, 75c67d6573Sopenharmony_ci/// these byte offsets may not correspond to UTF-8 sequence boundaries since 76c67d6573Sopenharmony_ci/// the regexes in this module can match arbitrary bytes. 77c67d6573Sopenharmony_ci#[derive(Clone)] 78c67d6573Sopenharmony_cipub struct Regex(Exec); 79c67d6573Sopenharmony_ci 80c67d6573Sopenharmony_ciimpl fmt::Display for Regex { 81c67d6573Sopenharmony_ci /// Shows the original regular expression. 82c67d6573Sopenharmony_ci fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 83c67d6573Sopenharmony_ci write!(f, "{}", self.as_str()) 84c67d6573Sopenharmony_ci } 85c67d6573Sopenharmony_ci} 86c67d6573Sopenharmony_ci 87c67d6573Sopenharmony_ciimpl fmt::Debug for Regex { 88c67d6573Sopenharmony_ci /// Shows the original regular expression. 89c67d6573Sopenharmony_ci fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 90c67d6573Sopenharmony_ci fmt::Display::fmt(self, f) 91c67d6573Sopenharmony_ci } 92c67d6573Sopenharmony_ci} 93c67d6573Sopenharmony_ci 94c67d6573Sopenharmony_ci/// A constructor for Regex from an Exec. 95c67d6573Sopenharmony_ci/// 96c67d6573Sopenharmony_ci/// This is hidden because Exec isn't actually part of the public API. 97c67d6573Sopenharmony_ci#[doc(hidden)] 98c67d6573Sopenharmony_ciimpl From<Exec> for Regex { 99c67d6573Sopenharmony_ci fn from(exec: Exec) -> Regex { 100c67d6573Sopenharmony_ci Regex(exec) 101c67d6573Sopenharmony_ci } 102c67d6573Sopenharmony_ci} 103c67d6573Sopenharmony_ci 104c67d6573Sopenharmony_ciimpl FromStr for Regex { 105c67d6573Sopenharmony_ci type Err = Error; 106c67d6573Sopenharmony_ci 107c67d6573Sopenharmony_ci /// Attempts to parse a string into a regular expression 108c67d6573Sopenharmony_ci fn from_str(s: &str) -> Result<Regex, Error> { 109c67d6573Sopenharmony_ci Regex::new(s) 110c67d6573Sopenharmony_ci } 111c67d6573Sopenharmony_ci} 112c67d6573Sopenharmony_ci 113c67d6573Sopenharmony_ci/// Core regular expression methods. 114c67d6573Sopenharmony_ciimpl Regex { 115c67d6573Sopenharmony_ci /// Compiles a regular expression. Once compiled, it can be used repeatedly 116c67d6573Sopenharmony_ci /// to search, split or replace text in a string. 117c67d6573Sopenharmony_ci /// 118c67d6573Sopenharmony_ci /// If an invalid expression is given, then an error is returned. 119c67d6573Sopenharmony_ci pub fn new(re: &str) -> Result<Regex, Error> { 120c67d6573Sopenharmony_ci RegexBuilder::new(re).build() 121c67d6573Sopenharmony_ci } 122c67d6573Sopenharmony_ci 123c67d6573Sopenharmony_ci /// Returns true if and only if there is a match for the regex in the 124c67d6573Sopenharmony_ci /// string given. 125c67d6573Sopenharmony_ci /// 126c67d6573Sopenharmony_ci /// It is recommended to use this method if all you need to do is test 127c67d6573Sopenharmony_ci /// a match, since the underlying matching engine may be able to do less 128c67d6573Sopenharmony_ci /// work. 129c67d6573Sopenharmony_ci /// 130c67d6573Sopenharmony_ci /// # Example 131c67d6573Sopenharmony_ci /// 132c67d6573Sopenharmony_ci /// Test if some text contains at least one word with exactly 13 ASCII word 133c67d6573Sopenharmony_ci /// bytes: 134c67d6573Sopenharmony_ci /// 135c67d6573Sopenharmony_ci /// ```rust 136c67d6573Sopenharmony_ci /// # use regex::bytes::Regex; 137c67d6573Sopenharmony_ci /// # fn main() { 138c67d6573Sopenharmony_ci /// let text = b"I categorically deny having triskaidekaphobia."; 139c67d6573Sopenharmony_ci /// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text)); 140c67d6573Sopenharmony_ci /// # } 141c67d6573Sopenharmony_ci /// ``` 142c67d6573Sopenharmony_ci pub fn is_match(&self, text: &[u8]) -> bool { 143c67d6573Sopenharmony_ci self.is_match_at(text, 0) 144c67d6573Sopenharmony_ci } 145c67d6573Sopenharmony_ci 146c67d6573Sopenharmony_ci /// Returns the start and end byte range of the leftmost-first match in 147c67d6573Sopenharmony_ci /// `text`. If no match exists, then `None` is returned. 148c67d6573Sopenharmony_ci /// 149c67d6573Sopenharmony_ci /// Note that this should only be used if you want to discover the position 150c67d6573Sopenharmony_ci /// of the match. Testing the existence of a match is faster if you use 151c67d6573Sopenharmony_ci /// `is_match`. 152c67d6573Sopenharmony_ci /// 153c67d6573Sopenharmony_ci /// # Example 154c67d6573Sopenharmony_ci /// 155c67d6573Sopenharmony_ci /// Find the start and end location of the first word with exactly 13 156c67d6573Sopenharmony_ci /// ASCII word bytes: 157c67d6573Sopenharmony_ci /// 158c67d6573Sopenharmony_ci /// ```rust 159c67d6573Sopenharmony_ci /// # use regex::bytes::Regex; 160c67d6573Sopenharmony_ci /// # fn main() { 161c67d6573Sopenharmony_ci /// let text = b"I categorically deny having triskaidekaphobia."; 162c67d6573Sopenharmony_ci /// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap(); 163c67d6573Sopenharmony_ci /// assert_eq!((mat.start(), mat.end()), (2, 15)); 164c67d6573Sopenharmony_ci /// # } 165c67d6573Sopenharmony_ci /// ``` 166c67d6573Sopenharmony_ci pub fn find<'t>(&self, text: &'t [u8]) -> Option<Match<'t>> { 167c67d6573Sopenharmony_ci self.find_at(text, 0) 168c67d6573Sopenharmony_ci } 169c67d6573Sopenharmony_ci 170c67d6573Sopenharmony_ci /// Returns an iterator for each successive non-overlapping match in 171c67d6573Sopenharmony_ci /// `text`, returning the start and end byte indices with respect to 172c67d6573Sopenharmony_ci /// `text`. 173c67d6573Sopenharmony_ci /// 174c67d6573Sopenharmony_ci /// # Example 175c67d6573Sopenharmony_ci /// 176c67d6573Sopenharmony_ci /// Find the start and end location of every word with exactly 13 ASCII 177c67d6573Sopenharmony_ci /// word bytes: 178c67d6573Sopenharmony_ci /// 179c67d6573Sopenharmony_ci /// ```rust 180c67d6573Sopenharmony_ci /// # use regex::bytes::Regex; 181c67d6573Sopenharmony_ci /// # fn main() { 182c67d6573Sopenharmony_ci /// let text = b"Retroactively relinquishing remunerations is reprehensible."; 183c67d6573Sopenharmony_ci /// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) { 184c67d6573Sopenharmony_ci /// println!("{:?}", mat); 185c67d6573Sopenharmony_ci /// } 186c67d6573Sopenharmony_ci /// # } 187c67d6573Sopenharmony_ci /// ``` 188c67d6573Sopenharmony_ci pub fn find_iter<'r, 't>(&'r self, text: &'t [u8]) -> Matches<'r, 't> { 189c67d6573Sopenharmony_ci Matches(self.0.searcher().find_iter(text)) 190c67d6573Sopenharmony_ci } 191c67d6573Sopenharmony_ci 192c67d6573Sopenharmony_ci /// Returns the capture groups corresponding to the leftmost-first 193c67d6573Sopenharmony_ci /// match in `text`. Capture group `0` always corresponds to the entire 194c67d6573Sopenharmony_ci /// match. If no match is found, then `None` is returned. 195c67d6573Sopenharmony_ci /// 196c67d6573Sopenharmony_ci /// You should only use `captures` if you need access to the location of 197c67d6573Sopenharmony_ci /// capturing group matches. Otherwise, `find` is faster for discovering 198c67d6573Sopenharmony_ci /// the location of the overall match. 199c67d6573Sopenharmony_ci /// 200c67d6573Sopenharmony_ci /// # Examples 201c67d6573Sopenharmony_ci /// 202c67d6573Sopenharmony_ci /// Say you have some text with movie names and their release years, 203c67d6573Sopenharmony_ci /// like "'Citizen Kane' (1941)". It'd be nice if we could search for text 204c67d6573Sopenharmony_ci /// looking like that, while also extracting the movie name and its release 205c67d6573Sopenharmony_ci /// year separately. 206c67d6573Sopenharmony_ci /// 207c67d6573Sopenharmony_ci /// ```rust 208c67d6573Sopenharmony_ci /// # use regex::bytes::Regex; 209c67d6573Sopenharmony_ci /// # fn main() { 210c67d6573Sopenharmony_ci /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); 211c67d6573Sopenharmony_ci /// let text = b"Not my favorite movie: 'Citizen Kane' (1941)."; 212c67d6573Sopenharmony_ci /// let caps = re.captures(text).unwrap(); 213c67d6573Sopenharmony_ci /// assert_eq!(caps.get(1).unwrap().as_bytes(), &b"Citizen Kane"[..]); 214c67d6573Sopenharmony_ci /// assert_eq!(caps.get(2).unwrap().as_bytes(), &b"1941"[..]); 215c67d6573Sopenharmony_ci /// assert_eq!(caps.get(0).unwrap().as_bytes(), &b"'Citizen Kane' (1941)"[..]); 216c67d6573Sopenharmony_ci /// // You can also access the groups by index using the Index notation. 217c67d6573Sopenharmony_ci /// // Note that this will panic on an invalid index. 218c67d6573Sopenharmony_ci /// assert_eq!(&caps[1], b"Citizen Kane"); 219c67d6573Sopenharmony_ci /// assert_eq!(&caps[2], b"1941"); 220c67d6573Sopenharmony_ci /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); 221c67d6573Sopenharmony_ci /// # } 222c67d6573Sopenharmony_ci /// ``` 223c67d6573Sopenharmony_ci /// 224c67d6573Sopenharmony_ci /// Note that the full match is at capture group `0`. Each subsequent 225c67d6573Sopenharmony_ci /// capture group is indexed by the order of its opening `(`. 226c67d6573Sopenharmony_ci /// 227c67d6573Sopenharmony_ci /// We can make this example a bit clearer by using *named* capture groups: 228c67d6573Sopenharmony_ci /// 229c67d6573Sopenharmony_ci /// ```rust 230c67d6573Sopenharmony_ci /// # use regex::bytes::Regex; 231c67d6573Sopenharmony_ci /// # fn main() { 232c67d6573Sopenharmony_ci /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") 233c67d6573Sopenharmony_ci /// .unwrap(); 234c67d6573Sopenharmony_ci /// let text = b"Not my favorite movie: 'Citizen Kane' (1941)."; 235c67d6573Sopenharmony_ci /// let caps = re.captures(text).unwrap(); 236c67d6573Sopenharmony_ci /// assert_eq!(caps.name("title").unwrap().as_bytes(), b"Citizen Kane"); 237c67d6573Sopenharmony_ci /// assert_eq!(caps.name("year").unwrap().as_bytes(), b"1941"); 238c67d6573Sopenharmony_ci /// assert_eq!(caps.get(0).unwrap().as_bytes(), &b"'Citizen Kane' (1941)"[..]); 239c67d6573Sopenharmony_ci /// // You can also access the groups by name using the Index notation. 240c67d6573Sopenharmony_ci /// // Note that this will panic on an invalid group name. 241c67d6573Sopenharmony_ci /// assert_eq!(&caps["title"], b"Citizen Kane"); 242c67d6573Sopenharmony_ci /// assert_eq!(&caps["year"], b"1941"); 243c67d6573Sopenharmony_ci /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); 244c67d6573Sopenharmony_ci /// 245c67d6573Sopenharmony_ci /// # } 246c67d6573Sopenharmony_ci /// ``` 247c67d6573Sopenharmony_ci /// 248c67d6573Sopenharmony_ci /// Here we name the capture groups, which we can access with the `name` 249c67d6573Sopenharmony_ci /// method or the `Index` notation with a `&str`. Note that the named 250c67d6573Sopenharmony_ci /// capture groups are still accessible with `get` or the `Index` notation 251c67d6573Sopenharmony_ci /// with a `usize`. 252c67d6573Sopenharmony_ci /// 253c67d6573Sopenharmony_ci /// The `0`th capture group is always unnamed, so it must always be 254c67d6573Sopenharmony_ci /// accessed with `get(0)` or `[0]`. 255c67d6573Sopenharmony_ci pub fn captures<'t>(&self, text: &'t [u8]) -> Option<Captures<'t>> { 256c67d6573Sopenharmony_ci let mut locs = self.capture_locations(); 257c67d6573Sopenharmony_ci self.captures_read_at(&mut locs, text, 0).map(move |_| Captures { 258c67d6573Sopenharmony_ci text, 259c67d6573Sopenharmony_ci locs: locs.0, 260c67d6573Sopenharmony_ci named_groups: self.0.capture_name_idx().clone(), 261c67d6573Sopenharmony_ci }) 262c67d6573Sopenharmony_ci } 263c67d6573Sopenharmony_ci 264c67d6573Sopenharmony_ci /// Returns an iterator over all the non-overlapping capture groups matched 265c67d6573Sopenharmony_ci /// in `text`. This is operationally the same as `find_iter`, except it 266c67d6573Sopenharmony_ci /// yields information about capturing group matches. 267c67d6573Sopenharmony_ci /// 268c67d6573Sopenharmony_ci /// # Example 269c67d6573Sopenharmony_ci /// 270c67d6573Sopenharmony_ci /// We can use this to find all movie titles and their release years in 271c67d6573Sopenharmony_ci /// some text, where the movie is formatted like "'Title' (xxxx)": 272c67d6573Sopenharmony_ci /// 273c67d6573Sopenharmony_ci /// ```rust 274c67d6573Sopenharmony_ci /// # use std::str; use regex::bytes::Regex; 275c67d6573Sopenharmony_ci /// # fn main() { 276c67d6573Sopenharmony_ci /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") 277c67d6573Sopenharmony_ci /// .unwrap(); 278c67d6573Sopenharmony_ci /// let text = b"'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; 279c67d6573Sopenharmony_ci /// for caps in re.captures_iter(text) { 280c67d6573Sopenharmony_ci /// let title = str::from_utf8(&caps["title"]).unwrap(); 281c67d6573Sopenharmony_ci /// let year = str::from_utf8(&caps["year"]).unwrap(); 282c67d6573Sopenharmony_ci /// println!("Movie: {:?}, Released: {:?}", title, year); 283c67d6573Sopenharmony_ci /// } 284c67d6573Sopenharmony_ci /// // Output: 285c67d6573Sopenharmony_ci /// // Movie: Citizen Kane, Released: 1941 286c67d6573Sopenharmony_ci /// // Movie: The Wizard of Oz, Released: 1939 287c67d6573Sopenharmony_ci /// // Movie: M, Released: 1931 288c67d6573Sopenharmony_ci /// # } 289c67d6573Sopenharmony_ci /// ``` 290c67d6573Sopenharmony_ci pub fn captures_iter<'r, 't>( 291c67d6573Sopenharmony_ci &'r self, 292c67d6573Sopenharmony_ci text: &'t [u8], 293c67d6573Sopenharmony_ci ) -> CaptureMatches<'r, 't> { 294c67d6573Sopenharmony_ci CaptureMatches(self.0.searcher().captures_iter(text)) 295c67d6573Sopenharmony_ci } 296c67d6573Sopenharmony_ci 297c67d6573Sopenharmony_ci /// Returns an iterator of substrings of `text` delimited by a match of the 298c67d6573Sopenharmony_ci /// regular expression. Namely, each element of the iterator corresponds to 299c67d6573Sopenharmony_ci /// text that *isn't* matched by the regular expression. 300c67d6573Sopenharmony_ci /// 301c67d6573Sopenharmony_ci /// This method will *not* copy the text given. 302c67d6573Sopenharmony_ci /// 303c67d6573Sopenharmony_ci /// # Example 304c67d6573Sopenharmony_ci /// 305c67d6573Sopenharmony_ci /// To split a string delimited by arbitrary amounts of spaces or tabs: 306c67d6573Sopenharmony_ci /// 307c67d6573Sopenharmony_ci /// ```rust 308c67d6573Sopenharmony_ci /// # use regex::bytes::Regex; 309c67d6573Sopenharmony_ci /// # fn main() { 310c67d6573Sopenharmony_ci /// let re = Regex::new(r"[ \t]+").unwrap(); 311c67d6573Sopenharmony_ci /// let fields: Vec<&[u8]> = re.split(b"a b \t c\td e").collect(); 312c67d6573Sopenharmony_ci /// assert_eq!(fields, vec![ 313c67d6573Sopenharmony_ci /// &b"a"[..], &b"b"[..], &b"c"[..], &b"d"[..], &b"e"[..], 314c67d6573Sopenharmony_ci /// ]); 315c67d6573Sopenharmony_ci /// # } 316c67d6573Sopenharmony_ci /// ``` 317c67d6573Sopenharmony_ci pub fn split<'r, 't>(&'r self, text: &'t [u8]) -> Split<'r, 't> { 318c67d6573Sopenharmony_ci Split { finder: self.find_iter(text), last: 0 } 319c67d6573Sopenharmony_ci } 320c67d6573Sopenharmony_ci 321c67d6573Sopenharmony_ci /// Returns an iterator of at most `limit` substrings of `text` delimited 322c67d6573Sopenharmony_ci /// by a match of the regular expression. (A `limit` of `0` will return no 323c67d6573Sopenharmony_ci /// substrings.) Namely, each element of the iterator corresponds to text 324c67d6573Sopenharmony_ci /// that *isn't* matched by the regular expression. The remainder of the 325c67d6573Sopenharmony_ci /// string that is not split will be the last element in the iterator. 326c67d6573Sopenharmony_ci /// 327c67d6573Sopenharmony_ci /// This method will *not* copy the text given. 328c67d6573Sopenharmony_ci /// 329c67d6573Sopenharmony_ci /// # Example 330c67d6573Sopenharmony_ci /// 331c67d6573Sopenharmony_ci /// Get the first two words in some text: 332c67d6573Sopenharmony_ci /// 333c67d6573Sopenharmony_ci /// ```rust 334c67d6573Sopenharmony_ci /// # use regex::bytes::Regex; 335c67d6573Sopenharmony_ci /// # fn main() { 336c67d6573Sopenharmony_ci /// let re = Regex::new(r"\W+").unwrap(); 337c67d6573Sopenharmony_ci /// let fields: Vec<&[u8]> = re.splitn(b"Hey! How are you?", 3).collect(); 338c67d6573Sopenharmony_ci /// assert_eq!(fields, vec![&b"Hey"[..], &b"How"[..], &b"are you?"[..]]); 339c67d6573Sopenharmony_ci /// # } 340c67d6573Sopenharmony_ci /// ``` 341c67d6573Sopenharmony_ci pub fn splitn<'r, 't>( 342c67d6573Sopenharmony_ci &'r self, 343c67d6573Sopenharmony_ci text: &'t [u8], 344c67d6573Sopenharmony_ci limit: usize, 345c67d6573Sopenharmony_ci ) -> SplitN<'r, 't> { 346c67d6573Sopenharmony_ci SplitN { splits: self.split(text), n: limit } 347c67d6573Sopenharmony_ci } 348c67d6573Sopenharmony_ci 349c67d6573Sopenharmony_ci /// Replaces the leftmost-first match with the replacement provided. The 350c67d6573Sopenharmony_ci /// replacement can be a regular byte string (where `$N` and `$name` are 351c67d6573Sopenharmony_ci /// expanded to match capture groups) or a function that takes the matches' 352c67d6573Sopenharmony_ci /// `Captures` and returns the replaced byte string. 353c67d6573Sopenharmony_ci /// 354c67d6573Sopenharmony_ci /// If no match is found, then a copy of the byte string is returned 355c67d6573Sopenharmony_ci /// unchanged. 356c67d6573Sopenharmony_ci /// 357c67d6573Sopenharmony_ci /// # Replacement string syntax 358c67d6573Sopenharmony_ci /// 359c67d6573Sopenharmony_ci /// All instances of `$name` in the replacement text is replaced with the 360c67d6573Sopenharmony_ci /// corresponding capture group `name`. 361c67d6573Sopenharmony_ci /// 362c67d6573Sopenharmony_ci /// `name` may be an integer corresponding to the index of the 363c67d6573Sopenharmony_ci /// capture group (counted by order of opening parenthesis where `0` is the 364c67d6573Sopenharmony_ci /// entire match) or it can be a name (consisting of letters, digits or 365c67d6573Sopenharmony_ci /// underscores) corresponding to a named capture group. 366c67d6573Sopenharmony_ci /// 367c67d6573Sopenharmony_ci /// If `name` isn't a valid capture group (whether the name doesn't exist 368c67d6573Sopenharmony_ci /// or isn't a valid index), then it is replaced with the empty string. 369c67d6573Sopenharmony_ci /// 370c67d6573Sopenharmony_ci /// The longest possible name is used. e.g., `$1a` looks up the capture 371c67d6573Sopenharmony_ci /// group named `1a` and not the capture group at index `1`. To exert more 372c67d6573Sopenharmony_ci /// precise control over the name, use braces, e.g., `${1}a`. 373c67d6573Sopenharmony_ci /// 374c67d6573Sopenharmony_ci /// To write a literal `$` use `$$`. 375c67d6573Sopenharmony_ci /// 376c67d6573Sopenharmony_ci /// # Examples 377c67d6573Sopenharmony_ci /// 378c67d6573Sopenharmony_ci /// Note that this function is polymorphic with respect to the replacement. 379c67d6573Sopenharmony_ci /// In typical usage, this can just be a normal byte string: 380c67d6573Sopenharmony_ci /// 381c67d6573Sopenharmony_ci /// ```rust 382c67d6573Sopenharmony_ci /// # use regex::bytes::Regex; 383c67d6573Sopenharmony_ci /// # fn main() { 384c67d6573Sopenharmony_ci /// let re = Regex::new("[^01]+").unwrap(); 385c67d6573Sopenharmony_ci /// assert_eq!(re.replace(b"1078910", &b""[..]), &b"1010"[..]); 386c67d6573Sopenharmony_ci /// # } 387c67d6573Sopenharmony_ci /// ``` 388c67d6573Sopenharmony_ci /// 389c67d6573Sopenharmony_ci /// But anything satisfying the `Replacer` trait will work. For example, a 390c67d6573Sopenharmony_ci /// closure of type `|&Captures| -> Vec<u8>` provides direct access to the 391c67d6573Sopenharmony_ci /// captures corresponding to a match. This allows one to access capturing 392c67d6573Sopenharmony_ci /// group matches easily: 393c67d6573Sopenharmony_ci /// 394c67d6573Sopenharmony_ci /// ```rust 395c67d6573Sopenharmony_ci /// # use regex::bytes::Regex; 396c67d6573Sopenharmony_ci /// # use regex::bytes::Captures; fn main() { 397c67d6573Sopenharmony_ci /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); 398c67d6573Sopenharmony_ci /// let result = re.replace(b"Springsteen, Bruce", |caps: &Captures| { 399c67d6573Sopenharmony_ci /// let mut replacement = caps[2].to_owned(); 400c67d6573Sopenharmony_ci /// replacement.push(b' '); 401c67d6573Sopenharmony_ci /// replacement.extend(&caps[1]); 402c67d6573Sopenharmony_ci /// replacement 403c67d6573Sopenharmony_ci /// }); 404c67d6573Sopenharmony_ci /// assert_eq!(result, &b"Bruce Springsteen"[..]); 405c67d6573Sopenharmony_ci /// # } 406c67d6573Sopenharmony_ci /// ``` 407c67d6573Sopenharmony_ci /// 408c67d6573Sopenharmony_ci /// But this is a bit cumbersome to use all the time. Instead, a simple 409c67d6573Sopenharmony_ci /// syntax is supported that expands `$name` into the corresponding capture 410c67d6573Sopenharmony_ci /// group. Here's the last example, but using this expansion technique 411c67d6573Sopenharmony_ci /// with named capture groups: 412c67d6573Sopenharmony_ci /// 413c67d6573Sopenharmony_ci /// ```rust 414c67d6573Sopenharmony_ci /// # use regex::bytes::Regex; 415c67d6573Sopenharmony_ci /// # fn main() { 416c67d6573Sopenharmony_ci /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap(); 417c67d6573Sopenharmony_ci /// let result = re.replace(b"Springsteen, Bruce", &b"$first $last"[..]); 418c67d6573Sopenharmony_ci /// assert_eq!(result, &b"Bruce Springsteen"[..]); 419c67d6573Sopenharmony_ci /// # } 420c67d6573Sopenharmony_ci /// ``` 421c67d6573Sopenharmony_ci /// 422c67d6573Sopenharmony_ci /// Note that using `$2` instead of `$first` or `$1` instead of `$last` 423c67d6573Sopenharmony_ci /// would produce the same result. To write a literal `$` use `$$`. 424c67d6573Sopenharmony_ci /// 425c67d6573Sopenharmony_ci /// Sometimes the replacement string requires use of curly braces to 426c67d6573Sopenharmony_ci /// delineate a capture group replacement and surrounding literal text. 427c67d6573Sopenharmony_ci /// For example, if we wanted to join two words together with an 428c67d6573Sopenharmony_ci /// underscore: 429c67d6573Sopenharmony_ci /// 430c67d6573Sopenharmony_ci /// ```rust 431c67d6573Sopenharmony_ci /// # use regex::bytes::Regex; 432c67d6573Sopenharmony_ci /// # fn main() { 433c67d6573Sopenharmony_ci /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap(); 434c67d6573Sopenharmony_ci /// let result = re.replace(b"deep fried", &b"${first}_$second"[..]); 435c67d6573Sopenharmony_ci /// assert_eq!(result, &b"deep_fried"[..]); 436c67d6573Sopenharmony_ci /// # } 437c67d6573Sopenharmony_ci /// ``` 438c67d6573Sopenharmony_ci /// 439c67d6573Sopenharmony_ci /// Without the curly braces, the capture group name `first_` would be 440c67d6573Sopenharmony_ci /// used, and since it doesn't exist, it would be replaced with the empty 441c67d6573Sopenharmony_ci /// string. 442c67d6573Sopenharmony_ci /// 443c67d6573Sopenharmony_ci /// Finally, sometimes you just want to replace a literal string with no 444c67d6573Sopenharmony_ci /// regard for capturing group expansion. This can be done by wrapping a 445c67d6573Sopenharmony_ci /// byte string with `NoExpand`: 446c67d6573Sopenharmony_ci /// 447c67d6573Sopenharmony_ci /// ```rust 448c67d6573Sopenharmony_ci /// # use regex::bytes::Regex; 449c67d6573Sopenharmony_ci /// # fn main() { 450c67d6573Sopenharmony_ci /// use regex::bytes::NoExpand; 451c67d6573Sopenharmony_ci /// 452c67d6573Sopenharmony_ci /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap(); 453c67d6573Sopenharmony_ci /// let result = re.replace(b"Springsteen, Bruce", NoExpand(b"$2 $last")); 454c67d6573Sopenharmony_ci /// assert_eq!(result, &b"$2 $last"[..]); 455c67d6573Sopenharmony_ci /// # } 456c67d6573Sopenharmony_ci /// ``` 457c67d6573Sopenharmony_ci pub fn replace<'t, R: Replacer>( 458c67d6573Sopenharmony_ci &self, 459c67d6573Sopenharmony_ci text: &'t [u8], 460c67d6573Sopenharmony_ci rep: R, 461c67d6573Sopenharmony_ci ) -> Cow<'t, [u8]> { 462c67d6573Sopenharmony_ci self.replacen(text, 1, rep) 463c67d6573Sopenharmony_ci } 464c67d6573Sopenharmony_ci 465c67d6573Sopenharmony_ci /// Replaces all non-overlapping matches in `text` with the replacement 466c67d6573Sopenharmony_ci /// provided. This is the same as calling `replacen` with `limit` set to 467c67d6573Sopenharmony_ci /// `0`. 468c67d6573Sopenharmony_ci /// 469c67d6573Sopenharmony_ci /// See the documentation for `replace` for details on how to access 470c67d6573Sopenharmony_ci /// capturing group matches in the replacement text. 471c67d6573Sopenharmony_ci pub fn replace_all<'t, R: Replacer>( 472c67d6573Sopenharmony_ci &self, 473c67d6573Sopenharmony_ci text: &'t [u8], 474c67d6573Sopenharmony_ci rep: R, 475c67d6573Sopenharmony_ci ) -> Cow<'t, [u8]> { 476c67d6573Sopenharmony_ci self.replacen(text, 0, rep) 477c67d6573Sopenharmony_ci } 478c67d6573Sopenharmony_ci 479c67d6573Sopenharmony_ci /// Replaces at most `limit` non-overlapping matches in `text` with the 480c67d6573Sopenharmony_ci /// replacement provided. If `limit` is 0, then all non-overlapping matches 481c67d6573Sopenharmony_ci /// are replaced. 482c67d6573Sopenharmony_ci /// 483c67d6573Sopenharmony_ci /// See the documentation for `replace` for details on how to access 484c67d6573Sopenharmony_ci /// capturing group matches in the replacement text. 485c67d6573Sopenharmony_ci pub fn replacen<'t, R: Replacer>( 486c67d6573Sopenharmony_ci &self, 487c67d6573Sopenharmony_ci text: &'t [u8], 488c67d6573Sopenharmony_ci limit: usize, 489c67d6573Sopenharmony_ci mut rep: R, 490c67d6573Sopenharmony_ci ) -> Cow<'t, [u8]> { 491c67d6573Sopenharmony_ci if let Some(rep) = rep.no_expansion() { 492c67d6573Sopenharmony_ci let mut it = self.find_iter(text).enumerate().peekable(); 493c67d6573Sopenharmony_ci if it.peek().is_none() { 494c67d6573Sopenharmony_ci return Cow::Borrowed(text); 495c67d6573Sopenharmony_ci } 496c67d6573Sopenharmony_ci let mut new = Vec::with_capacity(text.len()); 497c67d6573Sopenharmony_ci let mut last_match = 0; 498c67d6573Sopenharmony_ci for (i, m) in it { 499c67d6573Sopenharmony_ci new.extend_from_slice(&text[last_match..m.start()]); 500c67d6573Sopenharmony_ci new.extend_from_slice(&rep); 501c67d6573Sopenharmony_ci last_match = m.end(); 502c67d6573Sopenharmony_ci if limit > 0 && i >= limit - 1 { 503c67d6573Sopenharmony_ci break; 504c67d6573Sopenharmony_ci } 505c67d6573Sopenharmony_ci } 506c67d6573Sopenharmony_ci new.extend_from_slice(&text[last_match..]); 507c67d6573Sopenharmony_ci return Cow::Owned(new); 508c67d6573Sopenharmony_ci } 509c67d6573Sopenharmony_ci 510c67d6573Sopenharmony_ci // The slower path, which we use if the replacement needs access to 511c67d6573Sopenharmony_ci // capture groups. 512c67d6573Sopenharmony_ci let mut it = self.captures_iter(text).enumerate().peekable(); 513c67d6573Sopenharmony_ci if it.peek().is_none() { 514c67d6573Sopenharmony_ci return Cow::Borrowed(text); 515c67d6573Sopenharmony_ci } 516c67d6573Sopenharmony_ci let mut new = Vec::with_capacity(text.len()); 517c67d6573Sopenharmony_ci let mut last_match = 0; 518c67d6573Sopenharmony_ci for (i, cap) in it { 519c67d6573Sopenharmony_ci // unwrap on 0 is OK because captures only reports matches 520c67d6573Sopenharmony_ci let m = cap.get(0).unwrap(); 521c67d6573Sopenharmony_ci new.extend_from_slice(&text[last_match..m.start()]); 522c67d6573Sopenharmony_ci rep.replace_append(&cap, &mut new); 523c67d6573Sopenharmony_ci last_match = m.end(); 524c67d6573Sopenharmony_ci if limit > 0 && i >= limit - 1 { 525c67d6573Sopenharmony_ci break; 526c67d6573Sopenharmony_ci } 527c67d6573Sopenharmony_ci } 528c67d6573Sopenharmony_ci new.extend_from_slice(&text[last_match..]); 529c67d6573Sopenharmony_ci Cow::Owned(new) 530c67d6573Sopenharmony_ci } 531c67d6573Sopenharmony_ci} 532c67d6573Sopenharmony_ci 533c67d6573Sopenharmony_ci/// Advanced or "lower level" search methods. 534c67d6573Sopenharmony_ciimpl Regex { 535c67d6573Sopenharmony_ci /// Returns the end location of a match in the text given. 536c67d6573Sopenharmony_ci /// 537c67d6573Sopenharmony_ci /// This method may have the same performance characteristics as 538c67d6573Sopenharmony_ci /// `is_match`, except it provides an end location for a match. In 539c67d6573Sopenharmony_ci /// particular, the location returned *may be shorter* than the proper end 540c67d6573Sopenharmony_ci /// of the leftmost-first match. 541c67d6573Sopenharmony_ci /// 542c67d6573Sopenharmony_ci /// # Example 543c67d6573Sopenharmony_ci /// 544c67d6573Sopenharmony_ci /// Typically, `a+` would match the entire first sequence of `a` in some 545c67d6573Sopenharmony_ci /// text, but `shortest_match` can give up as soon as it sees the first 546c67d6573Sopenharmony_ci /// `a`. 547c67d6573Sopenharmony_ci /// 548c67d6573Sopenharmony_ci /// ```rust 549c67d6573Sopenharmony_ci /// # use regex::bytes::Regex; 550c67d6573Sopenharmony_ci /// # fn main() { 551c67d6573Sopenharmony_ci /// let text = b"aaaaa"; 552c67d6573Sopenharmony_ci /// let pos = Regex::new(r"a+").unwrap().shortest_match(text); 553c67d6573Sopenharmony_ci /// assert_eq!(pos, Some(1)); 554c67d6573Sopenharmony_ci /// # } 555c67d6573Sopenharmony_ci /// ``` 556c67d6573Sopenharmony_ci pub fn shortest_match(&self, text: &[u8]) -> Option<usize> { 557c67d6573Sopenharmony_ci self.shortest_match_at(text, 0) 558c67d6573Sopenharmony_ci } 559c67d6573Sopenharmony_ci 560c67d6573Sopenharmony_ci /// Returns the same as shortest_match, but starts the search at the given 561c67d6573Sopenharmony_ci /// offset. 562c67d6573Sopenharmony_ci /// 563c67d6573Sopenharmony_ci /// The significance of the starting point is that it takes the surrounding 564c67d6573Sopenharmony_ci /// context into consideration. For example, the `\A` anchor can only 565c67d6573Sopenharmony_ci /// match when `start == 0`. 566c67d6573Sopenharmony_ci pub fn shortest_match_at( 567c67d6573Sopenharmony_ci &self, 568c67d6573Sopenharmony_ci text: &[u8], 569c67d6573Sopenharmony_ci start: usize, 570c67d6573Sopenharmony_ci ) -> Option<usize> { 571c67d6573Sopenharmony_ci self.0.searcher().shortest_match_at(text, start) 572c67d6573Sopenharmony_ci } 573c67d6573Sopenharmony_ci 574c67d6573Sopenharmony_ci /// Returns the same as is_match, but starts the search at the given 575c67d6573Sopenharmony_ci /// offset. 576c67d6573Sopenharmony_ci /// 577c67d6573Sopenharmony_ci /// The significance of the starting point is that it takes the surrounding 578c67d6573Sopenharmony_ci /// context into consideration. For example, the `\A` anchor can only 579c67d6573Sopenharmony_ci /// match when `start == 0`. 580c67d6573Sopenharmony_ci pub fn is_match_at(&self, text: &[u8], start: usize) -> bool { 581c67d6573Sopenharmony_ci self.0.searcher().is_match_at(text, start) 582c67d6573Sopenharmony_ci } 583c67d6573Sopenharmony_ci 584c67d6573Sopenharmony_ci /// Returns the same as find, but starts the search at the given 585c67d6573Sopenharmony_ci /// offset. 586c67d6573Sopenharmony_ci /// 587c67d6573Sopenharmony_ci /// The significance of the starting point is that it takes the surrounding 588c67d6573Sopenharmony_ci /// context into consideration. For example, the `\A` anchor can only 589c67d6573Sopenharmony_ci /// match when `start == 0`. 590c67d6573Sopenharmony_ci pub fn find_at<'t>( 591c67d6573Sopenharmony_ci &self, 592c67d6573Sopenharmony_ci text: &'t [u8], 593c67d6573Sopenharmony_ci start: usize, 594c67d6573Sopenharmony_ci ) -> Option<Match<'t>> { 595c67d6573Sopenharmony_ci self.0 596c67d6573Sopenharmony_ci .searcher() 597c67d6573Sopenharmony_ci .find_at(text, start) 598c67d6573Sopenharmony_ci .map(|(s, e)| Match::new(text, s, e)) 599c67d6573Sopenharmony_ci } 600c67d6573Sopenharmony_ci 601c67d6573Sopenharmony_ci /// This is like `captures`, but uses 602c67d6573Sopenharmony_ci /// [`CaptureLocations`](struct.CaptureLocations.html) 603c67d6573Sopenharmony_ci /// instead of 604c67d6573Sopenharmony_ci /// [`Captures`](struct.Captures.html) in order to amortize allocations. 605c67d6573Sopenharmony_ci /// 606c67d6573Sopenharmony_ci /// To create a `CaptureLocations` value, use the 607c67d6573Sopenharmony_ci /// `Regex::capture_locations` method. 608c67d6573Sopenharmony_ci /// 609c67d6573Sopenharmony_ci /// This returns the overall match if this was successful, which is always 610c67d6573Sopenharmony_ci /// equivalence to the `0`th capture group. 611c67d6573Sopenharmony_ci pub fn captures_read<'t>( 612c67d6573Sopenharmony_ci &self, 613c67d6573Sopenharmony_ci locs: &mut CaptureLocations, 614c67d6573Sopenharmony_ci text: &'t [u8], 615c67d6573Sopenharmony_ci ) -> Option<Match<'t>> { 616c67d6573Sopenharmony_ci self.captures_read_at(locs, text, 0) 617c67d6573Sopenharmony_ci } 618c67d6573Sopenharmony_ci 619c67d6573Sopenharmony_ci /// Returns the same as `captures_read`, but starts the search at the given 620c67d6573Sopenharmony_ci /// offset and populates the capture locations given. 621c67d6573Sopenharmony_ci /// 622c67d6573Sopenharmony_ci /// The significance of the starting point is that it takes the surrounding 623c67d6573Sopenharmony_ci /// context into consideration. For example, the `\A` anchor can only 624c67d6573Sopenharmony_ci /// match when `start == 0`. 625c67d6573Sopenharmony_ci pub fn captures_read_at<'t>( 626c67d6573Sopenharmony_ci &self, 627c67d6573Sopenharmony_ci locs: &mut CaptureLocations, 628c67d6573Sopenharmony_ci text: &'t [u8], 629c67d6573Sopenharmony_ci start: usize, 630c67d6573Sopenharmony_ci ) -> Option<Match<'t>> { 631c67d6573Sopenharmony_ci self.0 632c67d6573Sopenharmony_ci .searcher() 633c67d6573Sopenharmony_ci .captures_read_at(&mut locs.0, text, start) 634c67d6573Sopenharmony_ci .map(|(s, e)| Match::new(text, s, e)) 635c67d6573Sopenharmony_ci } 636c67d6573Sopenharmony_ci 637c67d6573Sopenharmony_ci /// An undocumented alias for `captures_read_at`. 638c67d6573Sopenharmony_ci /// 639c67d6573Sopenharmony_ci /// The `regex-capi` crate previously used this routine, so to avoid 640c67d6573Sopenharmony_ci /// breaking that crate, we continue to provide the name as an undocumented 641c67d6573Sopenharmony_ci /// alias. 642c67d6573Sopenharmony_ci #[doc(hidden)] 643c67d6573Sopenharmony_ci pub fn read_captures_at<'t>( 644c67d6573Sopenharmony_ci &self, 645c67d6573Sopenharmony_ci locs: &mut CaptureLocations, 646c67d6573Sopenharmony_ci text: &'t [u8], 647c67d6573Sopenharmony_ci start: usize, 648c67d6573Sopenharmony_ci ) -> Option<Match<'t>> { 649c67d6573Sopenharmony_ci self.captures_read_at(locs, text, start) 650c67d6573Sopenharmony_ci } 651c67d6573Sopenharmony_ci} 652c67d6573Sopenharmony_ci 653c67d6573Sopenharmony_ci/// Auxiliary methods. 654c67d6573Sopenharmony_ciimpl Regex { 655c67d6573Sopenharmony_ci /// Returns the original string of this regex. 656c67d6573Sopenharmony_ci pub fn as_str(&self) -> &str { 657c67d6573Sopenharmony_ci &self.0.regex_strings()[0] 658c67d6573Sopenharmony_ci } 659c67d6573Sopenharmony_ci 660c67d6573Sopenharmony_ci /// Returns an iterator over the capture names. 661c67d6573Sopenharmony_ci pub fn capture_names(&self) -> CaptureNames<'_> { 662c67d6573Sopenharmony_ci CaptureNames(self.0.capture_names().iter()) 663c67d6573Sopenharmony_ci } 664c67d6573Sopenharmony_ci 665c67d6573Sopenharmony_ci /// Returns the number of captures. 666c67d6573Sopenharmony_ci pub fn captures_len(&self) -> usize { 667c67d6573Sopenharmony_ci self.0.capture_names().len() 668c67d6573Sopenharmony_ci } 669c67d6573Sopenharmony_ci 670c67d6573Sopenharmony_ci /// Returns an empty set of capture locations that can be reused in 671c67d6573Sopenharmony_ci /// multiple calls to `captures_read` or `captures_read_at`. 672c67d6573Sopenharmony_ci pub fn capture_locations(&self) -> CaptureLocations { 673c67d6573Sopenharmony_ci CaptureLocations(self.0.searcher().locations()) 674c67d6573Sopenharmony_ci } 675c67d6573Sopenharmony_ci 676c67d6573Sopenharmony_ci /// An alias for `capture_locations` to preserve backward compatibility. 677c67d6573Sopenharmony_ci /// 678c67d6573Sopenharmony_ci /// The `regex-capi` crate uses this method, so to avoid breaking that 679c67d6573Sopenharmony_ci /// crate, we continue to export it as an undocumented API. 680c67d6573Sopenharmony_ci #[doc(hidden)] 681c67d6573Sopenharmony_ci pub fn locations(&self) -> CaptureLocations { 682c67d6573Sopenharmony_ci CaptureLocations(self.0.searcher().locations()) 683c67d6573Sopenharmony_ci } 684c67d6573Sopenharmony_ci} 685c67d6573Sopenharmony_ci 686c67d6573Sopenharmony_ci/// An iterator over all non-overlapping matches for a particular string. 687c67d6573Sopenharmony_ci/// 688c67d6573Sopenharmony_ci/// The iterator yields a tuple of integers corresponding to the start and end 689c67d6573Sopenharmony_ci/// of the match. The indices are byte offsets. The iterator stops when no more 690c67d6573Sopenharmony_ci/// matches can be found. 691c67d6573Sopenharmony_ci/// 692c67d6573Sopenharmony_ci/// `'r` is the lifetime of the compiled regular expression and `'t` is the 693c67d6573Sopenharmony_ci/// lifetime of the matched byte string. 694c67d6573Sopenharmony_ci#[derive(Debug)] 695c67d6573Sopenharmony_cipub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSync<'r>>); 696c67d6573Sopenharmony_ci 697c67d6573Sopenharmony_ciimpl<'r, 't> Iterator for Matches<'r, 't> { 698c67d6573Sopenharmony_ci type Item = Match<'t>; 699c67d6573Sopenharmony_ci 700c67d6573Sopenharmony_ci fn next(&mut self) -> Option<Match<'t>> { 701c67d6573Sopenharmony_ci let text = self.0.text(); 702c67d6573Sopenharmony_ci self.0.next().map(|(s, e)| Match::new(text, s, e)) 703c67d6573Sopenharmony_ci } 704c67d6573Sopenharmony_ci} 705c67d6573Sopenharmony_ci 706c67d6573Sopenharmony_ciimpl<'r, 't> FusedIterator for Matches<'r, 't> {} 707c67d6573Sopenharmony_ci 708c67d6573Sopenharmony_ci/// An iterator that yields all non-overlapping capture groups matching a 709c67d6573Sopenharmony_ci/// particular regular expression. 710c67d6573Sopenharmony_ci/// 711c67d6573Sopenharmony_ci/// The iterator stops when no more matches can be found. 712c67d6573Sopenharmony_ci/// 713c67d6573Sopenharmony_ci/// `'r` is the lifetime of the compiled regular expression and `'t` is the 714c67d6573Sopenharmony_ci/// lifetime of the matched byte string. 715c67d6573Sopenharmony_ci#[derive(Debug)] 716c67d6573Sopenharmony_cipub struct CaptureMatches<'r, 't>( 717c67d6573Sopenharmony_ci re_trait::CaptureMatches<'t, ExecNoSync<'r>>, 718c67d6573Sopenharmony_ci); 719c67d6573Sopenharmony_ci 720c67d6573Sopenharmony_ciimpl<'r, 't> Iterator for CaptureMatches<'r, 't> { 721c67d6573Sopenharmony_ci type Item = Captures<'t>; 722c67d6573Sopenharmony_ci 723c67d6573Sopenharmony_ci fn next(&mut self) -> Option<Captures<'t>> { 724c67d6573Sopenharmony_ci self.0.next().map(|locs| Captures { 725c67d6573Sopenharmony_ci text: self.0.text(), 726c67d6573Sopenharmony_ci locs, 727c67d6573Sopenharmony_ci named_groups: self.0.regex().capture_name_idx().clone(), 728c67d6573Sopenharmony_ci }) 729c67d6573Sopenharmony_ci } 730c67d6573Sopenharmony_ci} 731c67d6573Sopenharmony_ci 732c67d6573Sopenharmony_ciimpl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {} 733c67d6573Sopenharmony_ci 734c67d6573Sopenharmony_ci/// Yields all substrings delimited by a regular expression match. 735c67d6573Sopenharmony_ci/// 736c67d6573Sopenharmony_ci/// `'r` is the lifetime of the compiled regular expression and `'t` is the 737c67d6573Sopenharmony_ci/// lifetime of the byte string being split. 738c67d6573Sopenharmony_ci#[derive(Debug)] 739c67d6573Sopenharmony_cipub struct Split<'r, 't> { 740c67d6573Sopenharmony_ci finder: Matches<'r, 't>, 741c67d6573Sopenharmony_ci last: usize, 742c67d6573Sopenharmony_ci} 743c67d6573Sopenharmony_ci 744c67d6573Sopenharmony_ciimpl<'r, 't> Iterator for Split<'r, 't> { 745c67d6573Sopenharmony_ci type Item = &'t [u8]; 746c67d6573Sopenharmony_ci 747c67d6573Sopenharmony_ci fn next(&mut self) -> Option<&'t [u8]> { 748c67d6573Sopenharmony_ci let text = self.finder.0.text(); 749c67d6573Sopenharmony_ci match self.finder.next() { 750c67d6573Sopenharmony_ci None => { 751c67d6573Sopenharmony_ci if self.last > text.len() { 752c67d6573Sopenharmony_ci None 753c67d6573Sopenharmony_ci } else { 754c67d6573Sopenharmony_ci let s = &text[self.last..]; 755c67d6573Sopenharmony_ci self.last = text.len() + 1; // Next call will return None 756c67d6573Sopenharmony_ci Some(s) 757c67d6573Sopenharmony_ci } 758c67d6573Sopenharmony_ci } 759c67d6573Sopenharmony_ci Some(m) => { 760c67d6573Sopenharmony_ci let matched = &text[self.last..m.start()]; 761c67d6573Sopenharmony_ci self.last = m.end(); 762c67d6573Sopenharmony_ci Some(matched) 763c67d6573Sopenharmony_ci } 764c67d6573Sopenharmony_ci } 765c67d6573Sopenharmony_ci } 766c67d6573Sopenharmony_ci} 767c67d6573Sopenharmony_ci 768c67d6573Sopenharmony_ciimpl<'r, 't> FusedIterator for Split<'r, 't> {} 769c67d6573Sopenharmony_ci 770c67d6573Sopenharmony_ci/// Yields at most `N` substrings delimited by a regular expression match. 771c67d6573Sopenharmony_ci/// 772c67d6573Sopenharmony_ci/// The last substring will be whatever remains after splitting. 773c67d6573Sopenharmony_ci/// 774c67d6573Sopenharmony_ci/// `'r` is the lifetime of the compiled regular expression and `'t` is the 775c67d6573Sopenharmony_ci/// lifetime of the byte string being split. 776c67d6573Sopenharmony_ci#[derive(Debug)] 777c67d6573Sopenharmony_cipub struct SplitN<'r, 't> { 778c67d6573Sopenharmony_ci splits: Split<'r, 't>, 779c67d6573Sopenharmony_ci n: usize, 780c67d6573Sopenharmony_ci} 781c67d6573Sopenharmony_ci 782c67d6573Sopenharmony_ciimpl<'r, 't> Iterator for SplitN<'r, 't> { 783c67d6573Sopenharmony_ci type Item = &'t [u8]; 784c67d6573Sopenharmony_ci 785c67d6573Sopenharmony_ci fn next(&mut self) -> Option<&'t [u8]> { 786c67d6573Sopenharmony_ci if self.n == 0 { 787c67d6573Sopenharmony_ci return None; 788c67d6573Sopenharmony_ci } 789c67d6573Sopenharmony_ci 790c67d6573Sopenharmony_ci self.n -= 1; 791c67d6573Sopenharmony_ci if self.n > 0 { 792c67d6573Sopenharmony_ci return self.splits.next(); 793c67d6573Sopenharmony_ci } 794c67d6573Sopenharmony_ci 795c67d6573Sopenharmony_ci let text = self.splits.finder.0.text(); 796c67d6573Sopenharmony_ci if self.splits.last > text.len() { 797c67d6573Sopenharmony_ci // We've already returned all substrings. 798c67d6573Sopenharmony_ci None 799c67d6573Sopenharmony_ci } else { 800c67d6573Sopenharmony_ci // self.n == 0, so future calls will return None immediately 801c67d6573Sopenharmony_ci Some(&text[self.splits.last..]) 802c67d6573Sopenharmony_ci } 803c67d6573Sopenharmony_ci } 804c67d6573Sopenharmony_ci 805c67d6573Sopenharmony_ci fn size_hint(&self) -> (usize, Option<usize>) { 806c67d6573Sopenharmony_ci (0, Some(self.n)) 807c67d6573Sopenharmony_ci } 808c67d6573Sopenharmony_ci} 809c67d6573Sopenharmony_ci 810c67d6573Sopenharmony_ciimpl<'r, 't> FusedIterator for SplitN<'r, 't> {} 811c67d6573Sopenharmony_ci 812c67d6573Sopenharmony_ci/// An iterator over the names of all possible captures. 813c67d6573Sopenharmony_ci/// 814c67d6573Sopenharmony_ci/// `None` indicates an unnamed capture; the first element (capture 0, the 815c67d6573Sopenharmony_ci/// whole matched region) is always unnamed. 816c67d6573Sopenharmony_ci/// 817c67d6573Sopenharmony_ci/// `'r` is the lifetime of the compiled regular expression. 818c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 819c67d6573Sopenharmony_cipub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>); 820c67d6573Sopenharmony_ci 821c67d6573Sopenharmony_ciimpl<'r> Iterator for CaptureNames<'r> { 822c67d6573Sopenharmony_ci type Item = Option<&'r str>; 823c67d6573Sopenharmony_ci 824c67d6573Sopenharmony_ci fn next(&mut self) -> Option<Option<&'r str>> { 825c67d6573Sopenharmony_ci self.0 826c67d6573Sopenharmony_ci .next() 827c67d6573Sopenharmony_ci .as_ref() 828c67d6573Sopenharmony_ci .map(|slot| slot.as_ref().map(|name| name.as_ref())) 829c67d6573Sopenharmony_ci } 830c67d6573Sopenharmony_ci 831c67d6573Sopenharmony_ci fn size_hint(&self) -> (usize, Option<usize>) { 832c67d6573Sopenharmony_ci self.0.size_hint() 833c67d6573Sopenharmony_ci } 834c67d6573Sopenharmony_ci 835c67d6573Sopenharmony_ci fn count(self) -> usize { 836c67d6573Sopenharmony_ci self.0.count() 837c67d6573Sopenharmony_ci } 838c67d6573Sopenharmony_ci} 839c67d6573Sopenharmony_ci 840c67d6573Sopenharmony_ciimpl<'r> ExactSizeIterator for CaptureNames<'r> {} 841c67d6573Sopenharmony_ci 842c67d6573Sopenharmony_ciimpl<'r> FusedIterator for CaptureNames<'r> {} 843c67d6573Sopenharmony_ci 844c67d6573Sopenharmony_ci/// CaptureLocations is a low level representation of the raw offsets of each 845c67d6573Sopenharmony_ci/// submatch. 846c67d6573Sopenharmony_ci/// 847c67d6573Sopenharmony_ci/// You can think of this as a lower level 848c67d6573Sopenharmony_ci/// [`Captures`](struct.Captures.html), where this type does not support 849c67d6573Sopenharmony_ci/// named capturing groups directly and it does not borrow the text that these 850c67d6573Sopenharmony_ci/// offsets were matched on. 851c67d6573Sopenharmony_ci/// 852c67d6573Sopenharmony_ci/// Primarily, this type is useful when using the lower level `Regex` APIs 853c67d6573Sopenharmony_ci/// such as `read_captures`, which permits amortizing the allocation in which 854c67d6573Sopenharmony_ci/// capture match locations are stored. 855c67d6573Sopenharmony_ci/// 856c67d6573Sopenharmony_ci/// In order to build a value of this type, you'll need to call the 857c67d6573Sopenharmony_ci/// `capture_locations` method on the `Regex` being used to execute the search. 858c67d6573Sopenharmony_ci/// The value returned can then be reused in subsequent searches. 859c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 860c67d6573Sopenharmony_cipub struct CaptureLocations(re_trait::Locations); 861c67d6573Sopenharmony_ci 862c67d6573Sopenharmony_ci/// A type alias for `CaptureLocations` for backwards compatibility. 863c67d6573Sopenharmony_ci/// 864c67d6573Sopenharmony_ci/// Previously, we exported `CaptureLocations` as `Locations` in an 865c67d6573Sopenharmony_ci/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`), 866c67d6573Sopenharmony_ci/// we continue re-exporting the same undocumented API. 867c67d6573Sopenharmony_ci#[doc(hidden)] 868c67d6573Sopenharmony_cipub type Locations = CaptureLocations; 869c67d6573Sopenharmony_ci 870c67d6573Sopenharmony_ciimpl CaptureLocations { 871c67d6573Sopenharmony_ci /// Returns the start and end positions of the Nth capture group. Returns 872c67d6573Sopenharmony_ci /// `None` if `i` is not a valid capture group or if the capture group did 873c67d6573Sopenharmony_ci /// not match anything. The positions returned are *always* byte indices 874c67d6573Sopenharmony_ci /// with respect to the original string matched. 875c67d6573Sopenharmony_ci #[inline] 876c67d6573Sopenharmony_ci pub fn get(&self, i: usize) -> Option<(usize, usize)> { 877c67d6573Sopenharmony_ci self.0.pos(i) 878c67d6573Sopenharmony_ci } 879c67d6573Sopenharmony_ci 880c67d6573Sopenharmony_ci /// Returns the total number of capture groups (even if they didn't match). 881c67d6573Sopenharmony_ci /// 882c67d6573Sopenharmony_ci /// This is always at least `1` since every regex has at least `1` 883c67d6573Sopenharmony_ci /// capturing group that corresponds to the entire match. 884c67d6573Sopenharmony_ci #[inline] 885c67d6573Sopenharmony_ci pub fn len(&self) -> usize { 886c67d6573Sopenharmony_ci self.0.len() 887c67d6573Sopenharmony_ci } 888c67d6573Sopenharmony_ci 889c67d6573Sopenharmony_ci /// An alias for the `get` method for backwards compatibility. 890c67d6573Sopenharmony_ci /// 891c67d6573Sopenharmony_ci /// Previously, we exported `get` as `pos` in an undocumented API. To 892c67d6573Sopenharmony_ci /// prevent breaking that code (e.g., in `regex-capi`), we continue 893c67d6573Sopenharmony_ci /// re-exporting the same undocumented API. 894c67d6573Sopenharmony_ci #[doc(hidden)] 895c67d6573Sopenharmony_ci #[inline] 896c67d6573Sopenharmony_ci pub fn pos(&self, i: usize) -> Option<(usize, usize)> { 897c67d6573Sopenharmony_ci self.get(i) 898c67d6573Sopenharmony_ci } 899c67d6573Sopenharmony_ci} 900c67d6573Sopenharmony_ci 901c67d6573Sopenharmony_ci/// Captures represents a group of captured byte strings for a single match. 902c67d6573Sopenharmony_ci/// 903c67d6573Sopenharmony_ci/// The 0th capture always corresponds to the entire match. Each subsequent 904c67d6573Sopenharmony_ci/// index corresponds to the next capture group in the regex. If a capture 905c67d6573Sopenharmony_ci/// group is named, then the matched byte string is *also* available via the 906c67d6573Sopenharmony_ci/// `name` method. (Note that the 0th capture is always unnamed and so must be 907c67d6573Sopenharmony_ci/// accessed with the `get` method.) 908c67d6573Sopenharmony_ci/// 909c67d6573Sopenharmony_ci/// Positions returned from a capture group are always byte indices. 910c67d6573Sopenharmony_ci/// 911c67d6573Sopenharmony_ci/// `'t` is the lifetime of the matched text. 912c67d6573Sopenharmony_cipub struct Captures<'t> { 913c67d6573Sopenharmony_ci text: &'t [u8], 914c67d6573Sopenharmony_ci locs: re_trait::Locations, 915c67d6573Sopenharmony_ci named_groups: Arc<HashMap<String, usize>>, 916c67d6573Sopenharmony_ci} 917c67d6573Sopenharmony_ci 918c67d6573Sopenharmony_ciimpl<'t> Captures<'t> { 919c67d6573Sopenharmony_ci /// Returns the match associated with the capture group at index `i`. If 920c67d6573Sopenharmony_ci /// `i` does not correspond to a capture group, or if the capture group 921c67d6573Sopenharmony_ci /// did not participate in the match, then `None` is returned. 922c67d6573Sopenharmony_ci /// 923c67d6573Sopenharmony_ci /// # Examples 924c67d6573Sopenharmony_ci /// 925c67d6573Sopenharmony_ci /// Get the text of the match with a default of an empty string if this 926c67d6573Sopenharmony_ci /// group didn't participate in the match: 927c67d6573Sopenharmony_ci /// 928c67d6573Sopenharmony_ci /// ```rust 929c67d6573Sopenharmony_ci /// # use regex::bytes::Regex; 930c67d6573Sopenharmony_ci /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap(); 931c67d6573Sopenharmony_ci /// let caps = re.captures(b"abc123").unwrap(); 932c67d6573Sopenharmony_ci /// 933c67d6573Sopenharmony_ci /// let text1 = caps.get(1).map_or(&b""[..], |m| m.as_bytes()); 934c67d6573Sopenharmony_ci /// let text2 = caps.get(2).map_or(&b""[..], |m| m.as_bytes()); 935c67d6573Sopenharmony_ci /// assert_eq!(text1, &b"123"[..]); 936c67d6573Sopenharmony_ci /// assert_eq!(text2, &b""[..]); 937c67d6573Sopenharmony_ci /// ``` 938c67d6573Sopenharmony_ci pub fn get(&self, i: usize) -> Option<Match<'t>> { 939c67d6573Sopenharmony_ci self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e)) 940c67d6573Sopenharmony_ci } 941c67d6573Sopenharmony_ci 942c67d6573Sopenharmony_ci /// Returns the match for the capture group named `name`. If `name` isn't a 943c67d6573Sopenharmony_ci /// valid capture group or didn't match anything, then `None` is returned. 944c67d6573Sopenharmony_ci pub fn name(&self, name: &str) -> Option<Match<'t>> { 945c67d6573Sopenharmony_ci self.named_groups.get(name).and_then(|&i| self.get(i)) 946c67d6573Sopenharmony_ci } 947c67d6573Sopenharmony_ci 948c67d6573Sopenharmony_ci /// An iterator that yields all capturing matches in the order in which 949c67d6573Sopenharmony_ci /// they appear in the regex. If a particular capture group didn't 950c67d6573Sopenharmony_ci /// participate in the match, then `None` is yielded for that capture. 951c67d6573Sopenharmony_ci /// 952c67d6573Sopenharmony_ci /// The first match always corresponds to the overall match of the regex. 953c67d6573Sopenharmony_ci pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> { 954c67d6573Sopenharmony_ci SubCaptureMatches { caps: self, it: self.locs.iter() } 955c67d6573Sopenharmony_ci } 956c67d6573Sopenharmony_ci 957c67d6573Sopenharmony_ci /// Expands all instances of `$name` in `replacement` to the corresponding 958c67d6573Sopenharmony_ci /// capture group `name`, and writes them to the `dst` buffer given. 959c67d6573Sopenharmony_ci /// 960c67d6573Sopenharmony_ci /// `name` may be an integer corresponding to the index of the capture 961c67d6573Sopenharmony_ci /// group (counted by order of opening parenthesis where `0` is the 962c67d6573Sopenharmony_ci /// entire match) or it can be a name (consisting of letters, digits or 963c67d6573Sopenharmony_ci /// underscores) corresponding to a named capture group. 964c67d6573Sopenharmony_ci /// 965c67d6573Sopenharmony_ci /// If `name` isn't a valid capture group (whether the name doesn't exist 966c67d6573Sopenharmony_ci /// or isn't a valid index), then it is replaced with the empty string. 967c67d6573Sopenharmony_ci /// 968c67d6573Sopenharmony_ci /// The longest possible name consisting of the characters `[_0-9A-Za-z]` 969c67d6573Sopenharmony_ci /// is used. e.g., `$1a` looks up the capture group named `1a` and not the 970c67d6573Sopenharmony_ci /// capture group at index `1`. To exert more precise control over the 971c67d6573Sopenharmony_ci /// name, or to refer to a capture group name that uses characters outside 972c67d6573Sopenharmony_ci /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When 973c67d6573Sopenharmony_ci /// using braces, any sequence of valid UTF-8 bytes is permitted. If the 974c67d6573Sopenharmony_ci /// sequence does not refer to a capture group name in the corresponding 975c67d6573Sopenharmony_ci /// regex, then it is replaced with an empty string. 976c67d6573Sopenharmony_ci /// 977c67d6573Sopenharmony_ci /// To write a literal `$` use `$$`. 978c67d6573Sopenharmony_ci pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) { 979c67d6573Sopenharmony_ci expand_bytes(self, replacement, dst) 980c67d6573Sopenharmony_ci } 981c67d6573Sopenharmony_ci 982c67d6573Sopenharmony_ci /// Returns the total number of capture groups (even if they didn't match). 983c67d6573Sopenharmony_ci /// 984c67d6573Sopenharmony_ci /// This is always at least `1`, since every regex has at least one capture 985c67d6573Sopenharmony_ci /// group that corresponds to the full match. 986c67d6573Sopenharmony_ci #[inline] 987c67d6573Sopenharmony_ci pub fn len(&self) -> usize { 988c67d6573Sopenharmony_ci self.locs.len() 989c67d6573Sopenharmony_ci } 990c67d6573Sopenharmony_ci} 991c67d6573Sopenharmony_ci 992c67d6573Sopenharmony_ciimpl<'t> fmt::Debug for Captures<'t> { 993c67d6573Sopenharmony_ci fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 994c67d6573Sopenharmony_ci f.debug_tuple("Captures").field(&CapturesDebug(self)).finish() 995c67d6573Sopenharmony_ci } 996c67d6573Sopenharmony_ci} 997c67d6573Sopenharmony_ci 998c67d6573Sopenharmony_cistruct CapturesDebug<'c, 't>(&'c Captures<'t>); 999c67d6573Sopenharmony_ci 1000c67d6573Sopenharmony_ciimpl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> { 1001c67d6573Sopenharmony_ci fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 1002c67d6573Sopenharmony_ci fn escape_bytes(bytes: &[u8]) -> String { 1003c67d6573Sopenharmony_ci let mut s = String::new(); 1004c67d6573Sopenharmony_ci for &b in bytes { 1005c67d6573Sopenharmony_ci s.push_str(&escape_byte(b)); 1006c67d6573Sopenharmony_ci } 1007c67d6573Sopenharmony_ci s 1008c67d6573Sopenharmony_ci } 1009c67d6573Sopenharmony_ci 1010c67d6573Sopenharmony_ci fn escape_byte(byte: u8) -> String { 1011c67d6573Sopenharmony_ci use std::ascii::escape_default; 1012c67d6573Sopenharmony_ci 1013c67d6573Sopenharmony_ci let escaped: Vec<u8> = escape_default(byte).collect(); 1014c67d6573Sopenharmony_ci String::from_utf8_lossy(&escaped).into_owned() 1015c67d6573Sopenharmony_ci } 1016c67d6573Sopenharmony_ci 1017c67d6573Sopenharmony_ci // We'd like to show something nice here, even if it means an 1018c67d6573Sopenharmony_ci // allocation to build a reverse index. 1019c67d6573Sopenharmony_ci let slot_to_name: HashMap<&usize, &String> = 1020c67d6573Sopenharmony_ci self.0.named_groups.iter().map(|(a, b)| (b, a)).collect(); 1021c67d6573Sopenharmony_ci let mut map = f.debug_map(); 1022c67d6573Sopenharmony_ci for (slot, m) in self.0.locs.iter().enumerate() { 1023c67d6573Sopenharmony_ci let m = m.map(|(s, e)| escape_bytes(&self.0.text[s..e])); 1024c67d6573Sopenharmony_ci if let Some(name) = slot_to_name.get(&slot) { 1025c67d6573Sopenharmony_ci map.entry(&name, &m); 1026c67d6573Sopenharmony_ci } else { 1027c67d6573Sopenharmony_ci map.entry(&slot, &m); 1028c67d6573Sopenharmony_ci } 1029c67d6573Sopenharmony_ci } 1030c67d6573Sopenharmony_ci map.finish() 1031c67d6573Sopenharmony_ci } 1032c67d6573Sopenharmony_ci} 1033c67d6573Sopenharmony_ci 1034c67d6573Sopenharmony_ci/// Get a group by index. 1035c67d6573Sopenharmony_ci/// 1036c67d6573Sopenharmony_ci/// `'t` is the lifetime of the matched text. 1037c67d6573Sopenharmony_ci/// 1038c67d6573Sopenharmony_ci/// The text can't outlive the `Captures` object if this method is 1039c67d6573Sopenharmony_ci/// used, because of how `Index` is defined (normally `a[i]` is part 1040c67d6573Sopenharmony_ci/// of `a` and can't outlive it); to do that, use `get()` instead. 1041c67d6573Sopenharmony_ci/// 1042c67d6573Sopenharmony_ci/// # Panics 1043c67d6573Sopenharmony_ci/// 1044c67d6573Sopenharmony_ci/// If there is no group at the given index. 1045c67d6573Sopenharmony_ciimpl<'t> Index<usize> for Captures<'t> { 1046c67d6573Sopenharmony_ci type Output = [u8]; 1047c67d6573Sopenharmony_ci 1048c67d6573Sopenharmony_ci fn index(&self, i: usize) -> &[u8] { 1049c67d6573Sopenharmony_ci self.get(i) 1050c67d6573Sopenharmony_ci .map(|m| m.as_bytes()) 1051c67d6573Sopenharmony_ci .unwrap_or_else(|| panic!("no group at index '{}'", i)) 1052c67d6573Sopenharmony_ci } 1053c67d6573Sopenharmony_ci} 1054c67d6573Sopenharmony_ci 1055c67d6573Sopenharmony_ci/// Get a group by name. 1056c67d6573Sopenharmony_ci/// 1057c67d6573Sopenharmony_ci/// `'t` is the lifetime of the matched text and `'i` is the lifetime 1058c67d6573Sopenharmony_ci/// of the group name (the index). 1059c67d6573Sopenharmony_ci/// 1060c67d6573Sopenharmony_ci/// The text can't outlive the `Captures` object if this method is 1061c67d6573Sopenharmony_ci/// used, because of how `Index` is defined (normally `a[i]` is part 1062c67d6573Sopenharmony_ci/// of `a` and can't outlive it); to do that, use `name` instead. 1063c67d6573Sopenharmony_ci/// 1064c67d6573Sopenharmony_ci/// # Panics 1065c67d6573Sopenharmony_ci/// 1066c67d6573Sopenharmony_ci/// If there is no group named by the given value. 1067c67d6573Sopenharmony_ciimpl<'t, 'i> Index<&'i str> for Captures<'t> { 1068c67d6573Sopenharmony_ci type Output = [u8]; 1069c67d6573Sopenharmony_ci 1070c67d6573Sopenharmony_ci fn index<'a>(&'a self, name: &'i str) -> &'a [u8] { 1071c67d6573Sopenharmony_ci self.name(name) 1072c67d6573Sopenharmony_ci .map(|m| m.as_bytes()) 1073c67d6573Sopenharmony_ci .unwrap_or_else(|| panic!("no group named '{}'", name)) 1074c67d6573Sopenharmony_ci } 1075c67d6573Sopenharmony_ci} 1076c67d6573Sopenharmony_ci 1077c67d6573Sopenharmony_ci/// An iterator that yields all capturing matches in the order in which they 1078c67d6573Sopenharmony_ci/// appear in the regex. 1079c67d6573Sopenharmony_ci/// 1080c67d6573Sopenharmony_ci/// If a particular capture group didn't participate in the match, then `None` 1081c67d6573Sopenharmony_ci/// is yielded for that capture. The first match always corresponds to the 1082c67d6573Sopenharmony_ci/// overall match of the regex. 1083c67d6573Sopenharmony_ci/// 1084c67d6573Sopenharmony_ci/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and 1085c67d6573Sopenharmony_ci/// the lifetime `'t` corresponds to the originally matched text. 1086c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 1087c67d6573Sopenharmony_cipub struct SubCaptureMatches<'c, 't> { 1088c67d6573Sopenharmony_ci caps: &'c Captures<'t>, 1089c67d6573Sopenharmony_ci it: SubCapturesPosIter<'c>, 1090c67d6573Sopenharmony_ci} 1091c67d6573Sopenharmony_ci 1092c67d6573Sopenharmony_ciimpl<'c, 't> Iterator for SubCaptureMatches<'c, 't> { 1093c67d6573Sopenharmony_ci type Item = Option<Match<'t>>; 1094c67d6573Sopenharmony_ci 1095c67d6573Sopenharmony_ci fn next(&mut self) -> Option<Option<Match<'t>>> { 1096c67d6573Sopenharmony_ci self.it 1097c67d6573Sopenharmony_ci .next() 1098c67d6573Sopenharmony_ci .map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e))) 1099c67d6573Sopenharmony_ci } 1100c67d6573Sopenharmony_ci} 1101c67d6573Sopenharmony_ci 1102c67d6573Sopenharmony_ciimpl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {} 1103c67d6573Sopenharmony_ci 1104c67d6573Sopenharmony_ci/// Replacer describes types that can be used to replace matches in a byte 1105c67d6573Sopenharmony_ci/// string. 1106c67d6573Sopenharmony_ci/// 1107c67d6573Sopenharmony_ci/// In general, users of this crate shouldn't need to implement this trait, 1108c67d6573Sopenharmony_ci/// since implementations are already provided for `&[u8]` along with other 1109c67d6573Sopenharmony_ci/// variants of bytes types and `FnMut(&Captures) -> Vec<u8>` (or any 1110c67d6573Sopenharmony_ci/// `FnMut(&Captures) -> T` where `T: AsRef<[u8]>`), which covers most use cases. 1111c67d6573Sopenharmony_cipub trait Replacer { 1112c67d6573Sopenharmony_ci /// Appends text to `dst` to replace the current match. 1113c67d6573Sopenharmony_ci /// 1114c67d6573Sopenharmony_ci /// The current match is represented by `caps`, which is guaranteed to 1115c67d6573Sopenharmony_ci /// have a match at capture group `0`. 1116c67d6573Sopenharmony_ci /// 1117c67d6573Sopenharmony_ci /// For example, a no-op replacement would be 1118c67d6573Sopenharmony_ci /// `dst.extend(&caps[0])`. 1119c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>); 1120c67d6573Sopenharmony_ci 1121c67d6573Sopenharmony_ci /// Return a fixed unchanging replacement byte string. 1122c67d6573Sopenharmony_ci /// 1123c67d6573Sopenharmony_ci /// When doing replacements, if access to `Captures` is not needed (e.g., 1124c67d6573Sopenharmony_ci /// the replacement byte string does not need `$` expansion), then it can 1125c67d6573Sopenharmony_ci /// be beneficial to avoid finding sub-captures. 1126c67d6573Sopenharmony_ci /// 1127c67d6573Sopenharmony_ci /// In general, this is called once for every call to `replacen`. 1128c67d6573Sopenharmony_ci fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> { 1129c67d6573Sopenharmony_ci None 1130c67d6573Sopenharmony_ci } 1131c67d6573Sopenharmony_ci 1132c67d6573Sopenharmony_ci /// Return a `Replacer` that borrows and wraps this `Replacer`. 1133c67d6573Sopenharmony_ci /// 1134c67d6573Sopenharmony_ci /// This is useful when you want to take a generic `Replacer` (which might 1135c67d6573Sopenharmony_ci /// not be cloneable) and use it without consuming it, so it can be used 1136c67d6573Sopenharmony_ci /// more than once. 1137c67d6573Sopenharmony_ci /// 1138c67d6573Sopenharmony_ci /// # Example 1139c67d6573Sopenharmony_ci /// 1140c67d6573Sopenharmony_ci /// ``` 1141c67d6573Sopenharmony_ci /// use regex::bytes::{Regex, Replacer}; 1142c67d6573Sopenharmony_ci /// 1143c67d6573Sopenharmony_ci /// fn replace_all_twice<R: Replacer>( 1144c67d6573Sopenharmony_ci /// re: Regex, 1145c67d6573Sopenharmony_ci /// src: &[u8], 1146c67d6573Sopenharmony_ci /// mut rep: R, 1147c67d6573Sopenharmony_ci /// ) -> Vec<u8> { 1148c67d6573Sopenharmony_ci /// let dst = re.replace_all(src, rep.by_ref()); 1149c67d6573Sopenharmony_ci /// let dst = re.replace_all(&dst, rep.by_ref()); 1150c67d6573Sopenharmony_ci /// dst.into_owned() 1151c67d6573Sopenharmony_ci /// } 1152c67d6573Sopenharmony_ci /// ``` 1153c67d6573Sopenharmony_ci fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> { 1154c67d6573Sopenharmony_ci ReplacerRef(self) 1155c67d6573Sopenharmony_ci } 1156c67d6573Sopenharmony_ci} 1157c67d6573Sopenharmony_ci 1158c67d6573Sopenharmony_ci/// By-reference adaptor for a `Replacer` 1159c67d6573Sopenharmony_ci/// 1160c67d6573Sopenharmony_ci/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref). 1161c67d6573Sopenharmony_ci#[derive(Debug)] 1162c67d6573Sopenharmony_cipub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); 1163c67d6573Sopenharmony_ci 1164c67d6573Sopenharmony_ciimpl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { 1165c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { 1166c67d6573Sopenharmony_ci self.0.replace_append(caps, dst) 1167c67d6573Sopenharmony_ci } 1168c67d6573Sopenharmony_ci fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> { 1169c67d6573Sopenharmony_ci self.0.no_expansion() 1170c67d6573Sopenharmony_ci } 1171c67d6573Sopenharmony_ci} 1172c67d6573Sopenharmony_ci 1173c67d6573Sopenharmony_ciimpl<'a> Replacer for &'a [u8] { 1174c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { 1175c67d6573Sopenharmony_ci caps.expand(*self, dst); 1176c67d6573Sopenharmony_ci } 1177c67d6573Sopenharmony_ci 1178c67d6573Sopenharmony_ci fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { 1179c67d6573Sopenharmony_ci no_expansion(self) 1180c67d6573Sopenharmony_ci } 1181c67d6573Sopenharmony_ci} 1182c67d6573Sopenharmony_ci 1183c67d6573Sopenharmony_ciimpl<'a> Replacer for &'a Vec<u8> { 1184c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { 1185c67d6573Sopenharmony_ci caps.expand(*self, dst); 1186c67d6573Sopenharmony_ci } 1187c67d6573Sopenharmony_ci 1188c67d6573Sopenharmony_ci fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { 1189c67d6573Sopenharmony_ci no_expansion(self) 1190c67d6573Sopenharmony_ci } 1191c67d6573Sopenharmony_ci} 1192c67d6573Sopenharmony_ci 1193c67d6573Sopenharmony_ciimpl Replacer for Vec<u8> { 1194c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { 1195c67d6573Sopenharmony_ci caps.expand(self, dst); 1196c67d6573Sopenharmony_ci } 1197c67d6573Sopenharmony_ci 1198c67d6573Sopenharmony_ci fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { 1199c67d6573Sopenharmony_ci no_expansion(self) 1200c67d6573Sopenharmony_ci } 1201c67d6573Sopenharmony_ci} 1202c67d6573Sopenharmony_ci 1203c67d6573Sopenharmony_ciimpl<'a> Replacer for Cow<'a, [u8]> { 1204c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { 1205c67d6573Sopenharmony_ci caps.expand(self.as_ref(), dst); 1206c67d6573Sopenharmony_ci } 1207c67d6573Sopenharmony_ci 1208c67d6573Sopenharmony_ci fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { 1209c67d6573Sopenharmony_ci no_expansion(self) 1210c67d6573Sopenharmony_ci } 1211c67d6573Sopenharmony_ci} 1212c67d6573Sopenharmony_ci 1213c67d6573Sopenharmony_ciimpl<'a> Replacer for &'a Cow<'a, [u8]> { 1214c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { 1215c67d6573Sopenharmony_ci caps.expand(self.as_ref(), dst); 1216c67d6573Sopenharmony_ci } 1217c67d6573Sopenharmony_ci 1218c67d6573Sopenharmony_ci fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { 1219c67d6573Sopenharmony_ci no_expansion(self) 1220c67d6573Sopenharmony_ci } 1221c67d6573Sopenharmony_ci} 1222c67d6573Sopenharmony_ci 1223c67d6573Sopenharmony_cifn no_expansion<T: AsRef<[u8]>>(t: &T) -> Option<Cow<'_, [u8]>> { 1224c67d6573Sopenharmony_ci let s = t.as_ref(); 1225c67d6573Sopenharmony_ci match find_byte(b'$', s) { 1226c67d6573Sopenharmony_ci Some(_) => None, 1227c67d6573Sopenharmony_ci None => Some(Cow::Borrowed(s)), 1228c67d6573Sopenharmony_ci } 1229c67d6573Sopenharmony_ci} 1230c67d6573Sopenharmony_ci 1231c67d6573Sopenharmony_ciimpl<F, T> Replacer for F 1232c67d6573Sopenharmony_ciwhere 1233c67d6573Sopenharmony_ci F: FnMut(&Captures<'_>) -> T, 1234c67d6573Sopenharmony_ci T: AsRef<[u8]>, 1235c67d6573Sopenharmony_ci{ 1236c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { 1237c67d6573Sopenharmony_ci dst.extend_from_slice((*self)(caps).as_ref()); 1238c67d6573Sopenharmony_ci } 1239c67d6573Sopenharmony_ci} 1240c67d6573Sopenharmony_ci 1241c67d6573Sopenharmony_ci/// `NoExpand` indicates literal byte string replacement. 1242c67d6573Sopenharmony_ci/// 1243c67d6573Sopenharmony_ci/// It can be used with `replace` and `replace_all` to do a literal byte string 1244c67d6573Sopenharmony_ci/// replacement without expanding `$name` to their corresponding capture 1245c67d6573Sopenharmony_ci/// groups. This can be both convenient (to avoid escaping `$`, for example) 1246c67d6573Sopenharmony_ci/// and performant (since capture groups don't need to be found). 1247c67d6573Sopenharmony_ci/// 1248c67d6573Sopenharmony_ci/// `'t` is the lifetime of the literal text. 1249c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 1250c67d6573Sopenharmony_cipub struct NoExpand<'t>(pub &'t [u8]); 1251c67d6573Sopenharmony_ci 1252c67d6573Sopenharmony_ciimpl<'t> Replacer for NoExpand<'t> { 1253c67d6573Sopenharmony_ci fn replace_append(&mut self, _: &Captures<'_>, dst: &mut Vec<u8>) { 1254c67d6573Sopenharmony_ci dst.extend_from_slice(self.0); 1255c67d6573Sopenharmony_ci } 1256c67d6573Sopenharmony_ci 1257c67d6573Sopenharmony_ci fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { 1258c67d6573Sopenharmony_ci Some(Cow::Borrowed(self.0)) 1259c67d6573Sopenharmony_ci } 1260c67d6573Sopenharmony_ci} 1261