1c67d6573Sopenharmony_ciuse std::borrow::Cow; 2c67d6573Sopenharmony_ciuse std::collections::HashMap; 3c67d6573Sopenharmony_ciuse std::fmt; 4c67d6573Sopenharmony_ciuse std::iter::FusedIterator; 5c67d6573Sopenharmony_ciuse std::ops::{Index, Range}; 6c67d6573Sopenharmony_ciuse std::str::FromStr; 7c67d6573Sopenharmony_ciuse std::sync::Arc; 8c67d6573Sopenharmony_ci 9c67d6573Sopenharmony_ciuse crate::find_byte::find_byte; 10c67d6573Sopenharmony_ci 11c67d6573Sopenharmony_ciuse crate::error::Error; 12c67d6573Sopenharmony_ciuse crate::exec::{Exec, ExecNoSyncStr}; 13c67d6573Sopenharmony_ciuse crate::expand::expand_str; 14c67d6573Sopenharmony_ciuse crate::re_builder::unicode::RegexBuilder; 15c67d6573Sopenharmony_ciuse crate::re_trait::{self, RegularExpression, SubCapturesPosIter}; 16c67d6573Sopenharmony_ci 17c67d6573Sopenharmony_ci/// Escapes all regular expression meta characters in `text`. 18c67d6573Sopenharmony_ci/// 19c67d6573Sopenharmony_ci/// The string returned may be safely used as a literal in a regular 20c67d6573Sopenharmony_ci/// expression. 21c67d6573Sopenharmony_cipub fn escape(text: &str) -> String { 22c67d6573Sopenharmony_ci regex_syntax::escape(text) 23c67d6573Sopenharmony_ci} 24c67d6573Sopenharmony_ci 25c67d6573Sopenharmony_ci/// Match represents a single match of a regex in a haystack. 26c67d6573Sopenharmony_ci/// 27c67d6573Sopenharmony_ci/// The lifetime parameter `'t` refers to the lifetime of the matched text. 28c67d6573Sopenharmony_ci#[derive(Copy, Clone, Debug, Eq, PartialEq)] 29c67d6573Sopenharmony_cipub struct Match<'t> { 30c67d6573Sopenharmony_ci text: &'t str, 31c67d6573Sopenharmony_ci start: usize, 32c67d6573Sopenharmony_ci end: usize, 33c67d6573Sopenharmony_ci} 34c67d6573Sopenharmony_ci 35c67d6573Sopenharmony_ciimpl<'t> Match<'t> { 36c67d6573Sopenharmony_ci /// Returns the starting byte offset of the match in the haystack. 37c67d6573Sopenharmony_ci #[inline] 38c67d6573Sopenharmony_ci pub fn start(&self) -> usize { 39c67d6573Sopenharmony_ci self.start 40c67d6573Sopenharmony_ci } 41c67d6573Sopenharmony_ci 42c67d6573Sopenharmony_ci /// Returns the ending byte offset of the match in the haystack. 43c67d6573Sopenharmony_ci #[inline] 44c67d6573Sopenharmony_ci pub fn end(&self) -> usize { 45c67d6573Sopenharmony_ci self.end 46c67d6573Sopenharmony_ci } 47c67d6573Sopenharmony_ci 48c67d6573Sopenharmony_ci /// Returns the range over the starting and ending byte offsets of the 49c67d6573Sopenharmony_ci /// match in the haystack. 50c67d6573Sopenharmony_ci #[inline] 51c67d6573Sopenharmony_ci pub fn range(&self) -> Range<usize> { 52c67d6573Sopenharmony_ci self.start..self.end 53c67d6573Sopenharmony_ci } 54c67d6573Sopenharmony_ci 55c67d6573Sopenharmony_ci /// Returns the matched text. 56c67d6573Sopenharmony_ci #[inline] 57c67d6573Sopenharmony_ci pub fn as_str(&self) -> &'t str { 58c67d6573Sopenharmony_ci &self.text[self.range()] 59c67d6573Sopenharmony_ci } 60c67d6573Sopenharmony_ci 61c67d6573Sopenharmony_ci /// Creates a new match from the given haystack and byte offsets. 62c67d6573Sopenharmony_ci #[inline] 63c67d6573Sopenharmony_ci fn new(haystack: &'t str, start: usize, end: usize) -> Match<'t> { 64c67d6573Sopenharmony_ci Match { text: haystack, start, end } 65c67d6573Sopenharmony_ci } 66c67d6573Sopenharmony_ci} 67c67d6573Sopenharmony_ci 68c67d6573Sopenharmony_ciimpl<'t> From<Match<'t>> for &'t str { 69c67d6573Sopenharmony_ci fn from(m: Match<'t>) -> &'t str { 70c67d6573Sopenharmony_ci m.as_str() 71c67d6573Sopenharmony_ci } 72c67d6573Sopenharmony_ci} 73c67d6573Sopenharmony_ci 74c67d6573Sopenharmony_ciimpl<'t> From<Match<'t>> for Range<usize> { 75c67d6573Sopenharmony_ci fn from(m: Match<'t>) -> Range<usize> { 76c67d6573Sopenharmony_ci m.range() 77c67d6573Sopenharmony_ci } 78c67d6573Sopenharmony_ci} 79c67d6573Sopenharmony_ci 80c67d6573Sopenharmony_ci/// A compiled regular expression for matching Unicode strings. 81c67d6573Sopenharmony_ci/// 82c67d6573Sopenharmony_ci/// It is represented as either a sequence of bytecode instructions (dynamic) 83c67d6573Sopenharmony_ci/// or as a specialized Rust function (native). It can be used to search, split 84c67d6573Sopenharmony_ci/// or replace text. All searching is done with an implicit `.*?` at the 85c67d6573Sopenharmony_ci/// beginning and end of an expression. To force an expression to match the 86c67d6573Sopenharmony_ci/// whole string (or a prefix or a suffix), you must use an anchor like `^` or 87c67d6573Sopenharmony_ci/// `$` (or `\A` and `\z`). 88c67d6573Sopenharmony_ci/// 89c67d6573Sopenharmony_ci/// While this crate will handle Unicode strings (whether in the regular 90c67d6573Sopenharmony_ci/// expression or in the search text), all positions returned are **byte 91c67d6573Sopenharmony_ci/// indices**. Every byte index is guaranteed to be at a Unicode code point 92c67d6573Sopenharmony_ci/// boundary. 93c67d6573Sopenharmony_ci/// 94c67d6573Sopenharmony_ci/// The lifetimes `'r` and `'t` in this crate correspond to the lifetime of a 95c67d6573Sopenharmony_ci/// compiled regular expression and text to search, respectively. 96c67d6573Sopenharmony_ci/// 97c67d6573Sopenharmony_ci/// The only methods that allocate new strings are the string replacement 98c67d6573Sopenharmony_ci/// methods. All other methods (searching and splitting) return borrowed 99c67d6573Sopenharmony_ci/// pointers into the string given. 100c67d6573Sopenharmony_ci/// 101c67d6573Sopenharmony_ci/// # Examples 102c67d6573Sopenharmony_ci/// 103c67d6573Sopenharmony_ci/// Find the location of a US phone number: 104c67d6573Sopenharmony_ci/// 105c67d6573Sopenharmony_ci/// ```rust 106c67d6573Sopenharmony_ci/// # use regex::Regex; 107c67d6573Sopenharmony_ci/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap(); 108c67d6573Sopenharmony_ci/// let mat = re.find("phone: 111-222-3333").unwrap(); 109c67d6573Sopenharmony_ci/// assert_eq!((mat.start(), mat.end()), (7, 19)); 110c67d6573Sopenharmony_ci/// ``` 111c67d6573Sopenharmony_ci/// 112c67d6573Sopenharmony_ci/// # Using the `std::str::pattern` methods with `Regex` 113c67d6573Sopenharmony_ci/// 114c67d6573Sopenharmony_ci/// > **Note**: This section requires that this crate is compiled with the 115c67d6573Sopenharmony_ci/// > `pattern` Cargo feature enabled, which **requires nightly Rust**. 116c67d6573Sopenharmony_ci/// 117c67d6573Sopenharmony_ci/// Since `Regex` implements `Pattern`, you can use regexes with methods 118c67d6573Sopenharmony_ci/// defined on `&str`. For example, `is_match`, `find`, `find_iter` 119c67d6573Sopenharmony_ci/// and `split` can be replaced with `str::contains`, `str::find`, 120c67d6573Sopenharmony_ci/// `str::match_indices` and `str::split`. 121c67d6573Sopenharmony_ci/// 122c67d6573Sopenharmony_ci/// Here are some examples: 123c67d6573Sopenharmony_ci/// 124c67d6573Sopenharmony_ci/// ```rust,ignore 125c67d6573Sopenharmony_ci/// # use regex::Regex; 126c67d6573Sopenharmony_ci/// let re = Regex::new(r"\d+").unwrap(); 127c67d6573Sopenharmony_ci/// let haystack = "a111b222c"; 128c67d6573Sopenharmony_ci/// 129c67d6573Sopenharmony_ci/// assert!(haystack.contains(&re)); 130c67d6573Sopenharmony_ci/// assert_eq!(haystack.find(&re), Some(1)); 131c67d6573Sopenharmony_ci/// assert_eq!(haystack.match_indices(&re).collect::<Vec<_>>(), 132c67d6573Sopenharmony_ci/// vec![(1, "111"), (5, "222")]); 133c67d6573Sopenharmony_ci/// assert_eq!(haystack.split(&re).collect::<Vec<_>>(), vec!["a", "b", "c"]); 134c67d6573Sopenharmony_ci/// ``` 135c67d6573Sopenharmony_ci#[derive(Clone)] 136c67d6573Sopenharmony_cipub struct Regex(Exec); 137c67d6573Sopenharmony_ci 138c67d6573Sopenharmony_ciimpl fmt::Display for Regex { 139c67d6573Sopenharmony_ci /// Shows the original regular expression. 140c67d6573Sopenharmony_ci fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 141c67d6573Sopenharmony_ci write!(f, "{}", self.as_str()) 142c67d6573Sopenharmony_ci } 143c67d6573Sopenharmony_ci} 144c67d6573Sopenharmony_ci 145c67d6573Sopenharmony_ciimpl fmt::Debug for Regex { 146c67d6573Sopenharmony_ci /// Shows the original regular expression. 147c67d6573Sopenharmony_ci fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 148c67d6573Sopenharmony_ci fmt::Display::fmt(self, f) 149c67d6573Sopenharmony_ci } 150c67d6573Sopenharmony_ci} 151c67d6573Sopenharmony_ci 152c67d6573Sopenharmony_ci#[doc(hidden)] 153c67d6573Sopenharmony_ciimpl From<Exec> for Regex { 154c67d6573Sopenharmony_ci fn from(exec: Exec) -> Regex { 155c67d6573Sopenharmony_ci Regex(exec) 156c67d6573Sopenharmony_ci } 157c67d6573Sopenharmony_ci} 158c67d6573Sopenharmony_ci 159c67d6573Sopenharmony_ciimpl FromStr for Regex { 160c67d6573Sopenharmony_ci type Err = Error; 161c67d6573Sopenharmony_ci 162c67d6573Sopenharmony_ci /// Attempts to parse a string into a regular expression 163c67d6573Sopenharmony_ci fn from_str(s: &str) -> Result<Regex, Error> { 164c67d6573Sopenharmony_ci Regex::new(s) 165c67d6573Sopenharmony_ci } 166c67d6573Sopenharmony_ci} 167c67d6573Sopenharmony_ci 168c67d6573Sopenharmony_ci/// Core regular expression methods. 169c67d6573Sopenharmony_ciimpl Regex { 170c67d6573Sopenharmony_ci /// Compiles a regular expression. Once compiled, it can be used repeatedly 171c67d6573Sopenharmony_ci /// to search, split or replace text in a string. 172c67d6573Sopenharmony_ci /// 173c67d6573Sopenharmony_ci /// If an invalid expression is given, then an error is returned. 174c67d6573Sopenharmony_ci pub fn new(re: &str) -> Result<Regex, Error> { 175c67d6573Sopenharmony_ci RegexBuilder::new(re).build() 176c67d6573Sopenharmony_ci } 177c67d6573Sopenharmony_ci 178c67d6573Sopenharmony_ci /// Returns true if and only if there is a match for the regex in the 179c67d6573Sopenharmony_ci /// string given. 180c67d6573Sopenharmony_ci /// 181c67d6573Sopenharmony_ci /// It is recommended to use this method if all you need to do is test 182c67d6573Sopenharmony_ci /// a match, since the underlying matching engine may be able to do less 183c67d6573Sopenharmony_ci /// work. 184c67d6573Sopenharmony_ci /// 185c67d6573Sopenharmony_ci /// # Example 186c67d6573Sopenharmony_ci /// 187c67d6573Sopenharmony_ci /// Test if some text contains at least one word with exactly 13 188c67d6573Sopenharmony_ci /// Unicode word characters: 189c67d6573Sopenharmony_ci /// 190c67d6573Sopenharmony_ci /// ```rust 191c67d6573Sopenharmony_ci /// # use regex::Regex; 192c67d6573Sopenharmony_ci /// # fn main() { 193c67d6573Sopenharmony_ci /// let text = "I categorically deny having triskaidekaphobia."; 194c67d6573Sopenharmony_ci /// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text)); 195c67d6573Sopenharmony_ci /// # } 196c67d6573Sopenharmony_ci /// ``` 197c67d6573Sopenharmony_ci pub fn is_match(&self, text: &str) -> bool { 198c67d6573Sopenharmony_ci self.is_match_at(text, 0) 199c67d6573Sopenharmony_ci } 200c67d6573Sopenharmony_ci 201c67d6573Sopenharmony_ci /// Returns the start and end byte range of the leftmost-first match in 202c67d6573Sopenharmony_ci /// `text`. If no match exists, then `None` is returned. 203c67d6573Sopenharmony_ci /// 204c67d6573Sopenharmony_ci /// Note that this should only be used if you want to discover the position 205c67d6573Sopenharmony_ci /// of the match. Testing the existence of a match is faster if you use 206c67d6573Sopenharmony_ci /// `is_match`. 207c67d6573Sopenharmony_ci /// 208c67d6573Sopenharmony_ci /// # Example 209c67d6573Sopenharmony_ci /// 210c67d6573Sopenharmony_ci /// Find the start and end location of the first word with exactly 13 211c67d6573Sopenharmony_ci /// Unicode word characters: 212c67d6573Sopenharmony_ci /// 213c67d6573Sopenharmony_ci /// ```rust 214c67d6573Sopenharmony_ci /// # use regex::Regex; 215c67d6573Sopenharmony_ci /// # fn main() { 216c67d6573Sopenharmony_ci /// let text = "I categorically deny having triskaidekaphobia."; 217c67d6573Sopenharmony_ci /// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap(); 218c67d6573Sopenharmony_ci /// assert_eq!(mat.start(), 2); 219c67d6573Sopenharmony_ci /// assert_eq!(mat.end(), 15); 220c67d6573Sopenharmony_ci /// # } 221c67d6573Sopenharmony_ci /// ``` 222c67d6573Sopenharmony_ci pub fn find<'t>(&self, text: &'t str) -> Option<Match<'t>> { 223c67d6573Sopenharmony_ci self.find_at(text, 0) 224c67d6573Sopenharmony_ci } 225c67d6573Sopenharmony_ci 226c67d6573Sopenharmony_ci /// Returns an iterator for each successive non-overlapping match in 227c67d6573Sopenharmony_ci /// `text`, returning the start and end byte indices with respect to 228c67d6573Sopenharmony_ci /// `text`. 229c67d6573Sopenharmony_ci /// 230c67d6573Sopenharmony_ci /// # Example 231c67d6573Sopenharmony_ci /// 232c67d6573Sopenharmony_ci /// Find the start and end location of every word with exactly 13 Unicode 233c67d6573Sopenharmony_ci /// word characters: 234c67d6573Sopenharmony_ci /// 235c67d6573Sopenharmony_ci /// ```rust 236c67d6573Sopenharmony_ci /// # use regex::Regex; 237c67d6573Sopenharmony_ci /// # fn main() { 238c67d6573Sopenharmony_ci /// let text = "Retroactively relinquishing remunerations is reprehensible."; 239c67d6573Sopenharmony_ci /// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) { 240c67d6573Sopenharmony_ci /// println!("{:?}", mat); 241c67d6573Sopenharmony_ci /// } 242c67d6573Sopenharmony_ci /// # } 243c67d6573Sopenharmony_ci /// ``` 244c67d6573Sopenharmony_ci pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> Matches<'r, 't> { 245c67d6573Sopenharmony_ci Matches(self.0.searcher_str().find_iter(text)) 246c67d6573Sopenharmony_ci } 247c67d6573Sopenharmony_ci 248c67d6573Sopenharmony_ci /// Returns the capture groups corresponding to the leftmost-first 249c67d6573Sopenharmony_ci /// match in `text`. Capture group `0` always corresponds to the entire 250c67d6573Sopenharmony_ci /// match. If no match is found, then `None` is returned. 251c67d6573Sopenharmony_ci /// 252c67d6573Sopenharmony_ci /// You should only use `captures` if you need access to the location of 253c67d6573Sopenharmony_ci /// capturing group matches. Otherwise, `find` is faster for discovering 254c67d6573Sopenharmony_ci /// the location of the overall match. 255c67d6573Sopenharmony_ci /// 256c67d6573Sopenharmony_ci /// # Examples 257c67d6573Sopenharmony_ci /// 258c67d6573Sopenharmony_ci /// Say you have some text with movie names and their release years, 259c67d6573Sopenharmony_ci /// like "'Citizen Kane' (1941)". It'd be nice if we could search for text 260c67d6573Sopenharmony_ci /// looking like that, while also extracting the movie name and its release 261c67d6573Sopenharmony_ci /// year separately. 262c67d6573Sopenharmony_ci /// 263c67d6573Sopenharmony_ci /// ```rust 264c67d6573Sopenharmony_ci /// # use regex::Regex; 265c67d6573Sopenharmony_ci /// # fn main() { 266c67d6573Sopenharmony_ci /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); 267c67d6573Sopenharmony_ci /// let text = "Not my favorite movie: 'Citizen Kane' (1941)."; 268c67d6573Sopenharmony_ci /// let caps = re.captures(text).unwrap(); 269c67d6573Sopenharmony_ci /// assert_eq!(caps.get(1).unwrap().as_str(), "Citizen Kane"); 270c67d6573Sopenharmony_ci /// assert_eq!(caps.get(2).unwrap().as_str(), "1941"); 271c67d6573Sopenharmony_ci /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)"); 272c67d6573Sopenharmony_ci /// // You can also access the groups by index using the Index notation. 273c67d6573Sopenharmony_ci /// // Note that this will panic on an invalid index. 274c67d6573Sopenharmony_ci /// assert_eq!(&caps[1], "Citizen Kane"); 275c67d6573Sopenharmony_ci /// assert_eq!(&caps[2], "1941"); 276c67d6573Sopenharmony_ci /// assert_eq!(&caps[0], "'Citizen Kane' (1941)"); 277c67d6573Sopenharmony_ci /// # } 278c67d6573Sopenharmony_ci /// ``` 279c67d6573Sopenharmony_ci /// 280c67d6573Sopenharmony_ci /// Note that the full match is at capture group `0`. Each subsequent 281c67d6573Sopenharmony_ci /// capture group is indexed by the order of its opening `(`. 282c67d6573Sopenharmony_ci /// 283c67d6573Sopenharmony_ci /// We can make this example a bit clearer by using *named* capture groups: 284c67d6573Sopenharmony_ci /// 285c67d6573Sopenharmony_ci /// ```rust 286c67d6573Sopenharmony_ci /// # use regex::Regex; 287c67d6573Sopenharmony_ci /// # fn main() { 288c67d6573Sopenharmony_ci /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") 289c67d6573Sopenharmony_ci /// .unwrap(); 290c67d6573Sopenharmony_ci /// let text = "Not my favorite movie: 'Citizen Kane' (1941)."; 291c67d6573Sopenharmony_ci /// let caps = re.captures(text).unwrap(); 292c67d6573Sopenharmony_ci /// assert_eq!(caps.name("title").unwrap().as_str(), "Citizen Kane"); 293c67d6573Sopenharmony_ci /// assert_eq!(caps.name("year").unwrap().as_str(), "1941"); 294c67d6573Sopenharmony_ci /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)"); 295c67d6573Sopenharmony_ci /// // You can also access the groups by name using the Index notation. 296c67d6573Sopenharmony_ci /// // Note that this will panic on an invalid group name. 297c67d6573Sopenharmony_ci /// assert_eq!(&caps["title"], "Citizen Kane"); 298c67d6573Sopenharmony_ci /// assert_eq!(&caps["year"], "1941"); 299c67d6573Sopenharmony_ci /// assert_eq!(&caps[0], "'Citizen Kane' (1941)"); 300c67d6573Sopenharmony_ci /// 301c67d6573Sopenharmony_ci /// # } 302c67d6573Sopenharmony_ci /// ``` 303c67d6573Sopenharmony_ci /// 304c67d6573Sopenharmony_ci /// Here we name the capture groups, which we can access with the `name` 305c67d6573Sopenharmony_ci /// method or the `Index` notation with a `&str`. Note that the named 306c67d6573Sopenharmony_ci /// capture groups are still accessible with `get` or the `Index` notation 307c67d6573Sopenharmony_ci /// with a `usize`. 308c67d6573Sopenharmony_ci /// 309c67d6573Sopenharmony_ci /// The `0`th capture group is always unnamed, so it must always be 310c67d6573Sopenharmony_ci /// accessed with `get(0)` or `[0]`. 311c67d6573Sopenharmony_ci pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> { 312c67d6573Sopenharmony_ci let mut locs = self.capture_locations(); 313c67d6573Sopenharmony_ci self.captures_read_at(&mut locs, text, 0).map(move |_| Captures { 314c67d6573Sopenharmony_ci text, 315c67d6573Sopenharmony_ci locs: locs.0, 316c67d6573Sopenharmony_ci named_groups: self.0.capture_name_idx().clone(), 317c67d6573Sopenharmony_ci }) 318c67d6573Sopenharmony_ci } 319c67d6573Sopenharmony_ci 320c67d6573Sopenharmony_ci /// Returns an iterator over all the non-overlapping capture groups matched 321c67d6573Sopenharmony_ci /// in `text`. This is operationally the same as `find_iter`, except it 322c67d6573Sopenharmony_ci /// yields information about capturing group matches. 323c67d6573Sopenharmony_ci /// 324c67d6573Sopenharmony_ci /// # Example 325c67d6573Sopenharmony_ci /// 326c67d6573Sopenharmony_ci /// We can use this to find all movie titles and their release years in 327c67d6573Sopenharmony_ci /// some text, where the movie is formatted like "'Title' (xxxx)": 328c67d6573Sopenharmony_ci /// 329c67d6573Sopenharmony_ci /// ```rust 330c67d6573Sopenharmony_ci /// # use regex::Regex; 331c67d6573Sopenharmony_ci /// # fn main() { 332c67d6573Sopenharmony_ci /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") 333c67d6573Sopenharmony_ci /// .unwrap(); 334c67d6573Sopenharmony_ci /// let text = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; 335c67d6573Sopenharmony_ci /// for caps in re.captures_iter(text) { 336c67d6573Sopenharmony_ci /// println!("Movie: {:?}, Released: {:?}", 337c67d6573Sopenharmony_ci /// &caps["title"], &caps["year"]); 338c67d6573Sopenharmony_ci /// } 339c67d6573Sopenharmony_ci /// // Output: 340c67d6573Sopenharmony_ci /// // Movie: Citizen Kane, Released: 1941 341c67d6573Sopenharmony_ci /// // Movie: The Wizard of Oz, Released: 1939 342c67d6573Sopenharmony_ci /// // Movie: M, Released: 1931 343c67d6573Sopenharmony_ci /// # } 344c67d6573Sopenharmony_ci /// ``` 345c67d6573Sopenharmony_ci pub fn captures_iter<'r, 't>( 346c67d6573Sopenharmony_ci &'r self, 347c67d6573Sopenharmony_ci text: &'t str, 348c67d6573Sopenharmony_ci ) -> CaptureMatches<'r, 't> { 349c67d6573Sopenharmony_ci CaptureMatches(self.0.searcher_str().captures_iter(text)) 350c67d6573Sopenharmony_ci } 351c67d6573Sopenharmony_ci 352c67d6573Sopenharmony_ci /// Returns an iterator of substrings of `text` delimited by a match of the 353c67d6573Sopenharmony_ci /// regular expression. Namely, each element of the iterator corresponds to 354c67d6573Sopenharmony_ci /// text that *isn't* matched by the regular expression. 355c67d6573Sopenharmony_ci /// 356c67d6573Sopenharmony_ci /// This method will *not* copy the text given. 357c67d6573Sopenharmony_ci /// 358c67d6573Sopenharmony_ci /// # Example 359c67d6573Sopenharmony_ci /// 360c67d6573Sopenharmony_ci /// To split a string delimited by arbitrary amounts of spaces or tabs: 361c67d6573Sopenharmony_ci /// 362c67d6573Sopenharmony_ci /// ```rust 363c67d6573Sopenharmony_ci /// # use regex::Regex; 364c67d6573Sopenharmony_ci /// # fn main() { 365c67d6573Sopenharmony_ci /// let re = Regex::new(r"[ \t]+").unwrap(); 366c67d6573Sopenharmony_ci /// let fields: Vec<&str> = re.split("a b \t c\td e").collect(); 367c67d6573Sopenharmony_ci /// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]); 368c67d6573Sopenharmony_ci /// # } 369c67d6573Sopenharmony_ci /// ``` 370c67d6573Sopenharmony_ci pub fn split<'r, 't>(&'r self, text: &'t str) -> Split<'r, 't> { 371c67d6573Sopenharmony_ci Split { finder: self.find_iter(text), last: 0 } 372c67d6573Sopenharmony_ci } 373c67d6573Sopenharmony_ci 374c67d6573Sopenharmony_ci /// Returns an iterator of at most `limit` substrings of `text` delimited 375c67d6573Sopenharmony_ci /// by a match of the regular expression. (A `limit` of `0` will return no 376c67d6573Sopenharmony_ci /// substrings.) Namely, each element of the iterator corresponds to text 377c67d6573Sopenharmony_ci /// that *isn't* matched by the regular expression. The remainder of the 378c67d6573Sopenharmony_ci /// string that is not split will be the last element in the iterator. 379c67d6573Sopenharmony_ci /// 380c67d6573Sopenharmony_ci /// This method will *not* copy the text given. 381c67d6573Sopenharmony_ci /// 382c67d6573Sopenharmony_ci /// # Example 383c67d6573Sopenharmony_ci /// 384c67d6573Sopenharmony_ci /// Get the first two words in some text: 385c67d6573Sopenharmony_ci /// 386c67d6573Sopenharmony_ci /// ```rust 387c67d6573Sopenharmony_ci /// # use regex::Regex; 388c67d6573Sopenharmony_ci /// # fn main() { 389c67d6573Sopenharmony_ci /// let re = Regex::new(r"\W+").unwrap(); 390c67d6573Sopenharmony_ci /// let fields: Vec<&str> = re.splitn("Hey! How are you?", 3).collect(); 391c67d6573Sopenharmony_ci /// assert_eq!(fields, vec!("Hey", "How", "are you?")); 392c67d6573Sopenharmony_ci /// # } 393c67d6573Sopenharmony_ci /// ``` 394c67d6573Sopenharmony_ci pub fn splitn<'r, 't>( 395c67d6573Sopenharmony_ci &'r self, 396c67d6573Sopenharmony_ci text: &'t str, 397c67d6573Sopenharmony_ci limit: usize, 398c67d6573Sopenharmony_ci ) -> SplitN<'r, 't> { 399c67d6573Sopenharmony_ci SplitN { splits: self.split(text), n: limit } 400c67d6573Sopenharmony_ci } 401c67d6573Sopenharmony_ci 402c67d6573Sopenharmony_ci /// Replaces the leftmost-first match with the replacement provided. 403c67d6573Sopenharmony_ci /// The replacement can be a regular string (where `$N` and `$name` are 404c67d6573Sopenharmony_ci /// expanded to match capture groups) or a function that takes the matches' 405c67d6573Sopenharmony_ci /// `Captures` and returns the replaced string. 406c67d6573Sopenharmony_ci /// 407c67d6573Sopenharmony_ci /// If no match is found, then a copy of the string is returned unchanged. 408c67d6573Sopenharmony_ci /// 409c67d6573Sopenharmony_ci /// # Replacement string syntax 410c67d6573Sopenharmony_ci /// 411c67d6573Sopenharmony_ci /// All instances of `$name` in the replacement text is replaced with the 412c67d6573Sopenharmony_ci /// corresponding capture group `name`. 413c67d6573Sopenharmony_ci /// 414c67d6573Sopenharmony_ci /// `name` may be an integer corresponding to the index of the 415c67d6573Sopenharmony_ci /// capture group (counted by order of opening parenthesis where `0` is the 416c67d6573Sopenharmony_ci /// entire match) or it can be a name (consisting of letters, digits or 417c67d6573Sopenharmony_ci /// underscores) corresponding to a named capture group. 418c67d6573Sopenharmony_ci /// 419c67d6573Sopenharmony_ci /// If `name` isn't a valid capture group (whether the name doesn't exist 420c67d6573Sopenharmony_ci /// or isn't a valid index), then it is replaced with the empty string. 421c67d6573Sopenharmony_ci /// 422c67d6573Sopenharmony_ci /// The longest possible name is used. e.g., `$1a` looks up the capture 423c67d6573Sopenharmony_ci /// group named `1a` and not the capture group at index `1`. To exert more 424c67d6573Sopenharmony_ci /// precise control over the name, use braces, e.g., `${1}a`. 425c67d6573Sopenharmony_ci /// 426c67d6573Sopenharmony_ci /// To write a literal `$` use `$$`. 427c67d6573Sopenharmony_ci /// 428c67d6573Sopenharmony_ci /// # Examples 429c67d6573Sopenharmony_ci /// 430c67d6573Sopenharmony_ci /// Note that this function is polymorphic with respect to the replacement. 431c67d6573Sopenharmony_ci /// In typical usage, this can just be a normal string: 432c67d6573Sopenharmony_ci /// 433c67d6573Sopenharmony_ci /// ```rust 434c67d6573Sopenharmony_ci /// # use regex::Regex; 435c67d6573Sopenharmony_ci /// # fn main() { 436c67d6573Sopenharmony_ci /// let re = Regex::new("[^01]+").unwrap(); 437c67d6573Sopenharmony_ci /// assert_eq!(re.replace("1078910", ""), "1010"); 438c67d6573Sopenharmony_ci /// # } 439c67d6573Sopenharmony_ci /// ``` 440c67d6573Sopenharmony_ci /// 441c67d6573Sopenharmony_ci /// But anything satisfying the `Replacer` trait will work. For example, 442c67d6573Sopenharmony_ci /// a closure of type `|&Captures| -> String` provides direct access to the 443c67d6573Sopenharmony_ci /// captures corresponding to a match. This allows one to access 444c67d6573Sopenharmony_ci /// capturing group matches easily: 445c67d6573Sopenharmony_ci /// 446c67d6573Sopenharmony_ci /// ```rust 447c67d6573Sopenharmony_ci /// # use regex::Regex; 448c67d6573Sopenharmony_ci /// # use regex::Captures; fn main() { 449c67d6573Sopenharmony_ci /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); 450c67d6573Sopenharmony_ci /// let result = re.replace("Springsteen, Bruce", |caps: &Captures| { 451c67d6573Sopenharmony_ci /// format!("{} {}", &caps[2], &caps[1]) 452c67d6573Sopenharmony_ci /// }); 453c67d6573Sopenharmony_ci /// assert_eq!(result, "Bruce Springsteen"); 454c67d6573Sopenharmony_ci /// # } 455c67d6573Sopenharmony_ci /// ``` 456c67d6573Sopenharmony_ci /// 457c67d6573Sopenharmony_ci /// But this is a bit cumbersome to use all the time. Instead, a simple 458c67d6573Sopenharmony_ci /// syntax is supported that expands `$name` into the corresponding capture 459c67d6573Sopenharmony_ci /// group. Here's the last example, but using this expansion technique 460c67d6573Sopenharmony_ci /// with named capture groups: 461c67d6573Sopenharmony_ci /// 462c67d6573Sopenharmony_ci /// ```rust 463c67d6573Sopenharmony_ci /// # use regex::Regex; 464c67d6573Sopenharmony_ci /// # fn main() { 465c67d6573Sopenharmony_ci /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap(); 466c67d6573Sopenharmony_ci /// let result = re.replace("Springsteen, Bruce", "$first $last"); 467c67d6573Sopenharmony_ci /// assert_eq!(result, "Bruce Springsteen"); 468c67d6573Sopenharmony_ci /// # } 469c67d6573Sopenharmony_ci /// ``` 470c67d6573Sopenharmony_ci /// 471c67d6573Sopenharmony_ci /// Note that using `$2` instead of `$first` or `$1` instead of `$last` 472c67d6573Sopenharmony_ci /// would produce the same result. To write a literal `$` use `$$`. 473c67d6573Sopenharmony_ci /// 474c67d6573Sopenharmony_ci /// Sometimes the replacement string requires use of curly braces to 475c67d6573Sopenharmony_ci /// delineate a capture group replacement and surrounding literal text. 476c67d6573Sopenharmony_ci /// For example, if we wanted to join two words together with an 477c67d6573Sopenharmony_ci /// underscore: 478c67d6573Sopenharmony_ci /// 479c67d6573Sopenharmony_ci /// ```rust 480c67d6573Sopenharmony_ci /// # use regex::Regex; 481c67d6573Sopenharmony_ci /// # fn main() { 482c67d6573Sopenharmony_ci /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap(); 483c67d6573Sopenharmony_ci /// let result = re.replace("deep fried", "${first}_$second"); 484c67d6573Sopenharmony_ci /// assert_eq!(result, "deep_fried"); 485c67d6573Sopenharmony_ci /// # } 486c67d6573Sopenharmony_ci /// ``` 487c67d6573Sopenharmony_ci /// 488c67d6573Sopenharmony_ci /// Without the curly braces, the capture group name `first_` would be 489c67d6573Sopenharmony_ci /// used, and since it doesn't exist, it would be replaced with the empty 490c67d6573Sopenharmony_ci /// string. 491c67d6573Sopenharmony_ci /// 492c67d6573Sopenharmony_ci /// Finally, sometimes you just want to replace a literal string with no 493c67d6573Sopenharmony_ci /// regard for capturing group expansion. This can be done by wrapping a 494c67d6573Sopenharmony_ci /// byte string with `NoExpand`: 495c67d6573Sopenharmony_ci /// 496c67d6573Sopenharmony_ci /// ```rust 497c67d6573Sopenharmony_ci /// # use regex::Regex; 498c67d6573Sopenharmony_ci /// # fn main() { 499c67d6573Sopenharmony_ci /// use regex::NoExpand; 500c67d6573Sopenharmony_ci /// 501c67d6573Sopenharmony_ci /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap(); 502c67d6573Sopenharmony_ci /// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last")); 503c67d6573Sopenharmony_ci /// assert_eq!(result, "$2 $last"); 504c67d6573Sopenharmony_ci /// # } 505c67d6573Sopenharmony_ci /// ``` 506c67d6573Sopenharmony_ci pub fn replace<'t, R: Replacer>( 507c67d6573Sopenharmony_ci &self, 508c67d6573Sopenharmony_ci text: &'t str, 509c67d6573Sopenharmony_ci rep: R, 510c67d6573Sopenharmony_ci ) -> Cow<'t, str> { 511c67d6573Sopenharmony_ci self.replacen(text, 1, rep) 512c67d6573Sopenharmony_ci } 513c67d6573Sopenharmony_ci 514c67d6573Sopenharmony_ci /// Replaces all non-overlapping matches in `text` with the replacement 515c67d6573Sopenharmony_ci /// provided. This is the same as calling `replacen` with `limit` set to 516c67d6573Sopenharmony_ci /// `0`. 517c67d6573Sopenharmony_ci /// 518c67d6573Sopenharmony_ci /// See the documentation for `replace` for details on how to access 519c67d6573Sopenharmony_ci /// capturing group matches in the replacement string. 520c67d6573Sopenharmony_ci pub fn replace_all<'t, R: Replacer>( 521c67d6573Sopenharmony_ci &self, 522c67d6573Sopenharmony_ci text: &'t str, 523c67d6573Sopenharmony_ci rep: R, 524c67d6573Sopenharmony_ci ) -> Cow<'t, str> { 525c67d6573Sopenharmony_ci self.replacen(text, 0, rep) 526c67d6573Sopenharmony_ci } 527c67d6573Sopenharmony_ci 528c67d6573Sopenharmony_ci /// Replaces at most `limit` non-overlapping matches in `text` with the 529c67d6573Sopenharmony_ci /// replacement provided. If `limit` is 0, then all non-overlapping matches 530c67d6573Sopenharmony_ci /// are replaced. 531c67d6573Sopenharmony_ci /// 532c67d6573Sopenharmony_ci /// See the documentation for `replace` for details on how to access 533c67d6573Sopenharmony_ci /// capturing group matches in the replacement string. 534c67d6573Sopenharmony_ci pub fn replacen<'t, R: Replacer>( 535c67d6573Sopenharmony_ci &self, 536c67d6573Sopenharmony_ci text: &'t str, 537c67d6573Sopenharmony_ci limit: usize, 538c67d6573Sopenharmony_ci mut rep: R, 539c67d6573Sopenharmony_ci ) -> Cow<'t, str> { 540c67d6573Sopenharmony_ci // If we know that the replacement doesn't have any capture expansions, 541c67d6573Sopenharmony_ci // then we can use the fast path. The fast path can make a tremendous 542c67d6573Sopenharmony_ci // difference: 543c67d6573Sopenharmony_ci // 544c67d6573Sopenharmony_ci // 1) We use `find_iter` instead of `captures_iter`. Not asking for 545c67d6573Sopenharmony_ci // captures generally makes the regex engines faster. 546c67d6573Sopenharmony_ci // 2) We don't need to look up all of the capture groups and do 547c67d6573Sopenharmony_ci // replacements inside the replacement string. We just push it 548c67d6573Sopenharmony_ci // at each match and be done with it. 549c67d6573Sopenharmony_ci if let Some(rep) = rep.no_expansion() { 550c67d6573Sopenharmony_ci let mut it = self.find_iter(text).enumerate().peekable(); 551c67d6573Sopenharmony_ci if it.peek().is_none() { 552c67d6573Sopenharmony_ci return Cow::Borrowed(text); 553c67d6573Sopenharmony_ci } 554c67d6573Sopenharmony_ci let mut new = String::with_capacity(text.len()); 555c67d6573Sopenharmony_ci let mut last_match = 0; 556c67d6573Sopenharmony_ci for (i, m) in it { 557c67d6573Sopenharmony_ci new.push_str(&text[last_match..m.start()]); 558c67d6573Sopenharmony_ci new.push_str(&rep); 559c67d6573Sopenharmony_ci last_match = m.end(); 560c67d6573Sopenharmony_ci if limit > 0 && i >= limit - 1 { 561c67d6573Sopenharmony_ci break; 562c67d6573Sopenharmony_ci } 563c67d6573Sopenharmony_ci } 564c67d6573Sopenharmony_ci new.push_str(&text[last_match..]); 565c67d6573Sopenharmony_ci return Cow::Owned(new); 566c67d6573Sopenharmony_ci } 567c67d6573Sopenharmony_ci 568c67d6573Sopenharmony_ci // The slower path, which we use if the replacement needs access to 569c67d6573Sopenharmony_ci // capture groups. 570c67d6573Sopenharmony_ci let mut it = self.captures_iter(text).enumerate().peekable(); 571c67d6573Sopenharmony_ci if it.peek().is_none() { 572c67d6573Sopenharmony_ci return Cow::Borrowed(text); 573c67d6573Sopenharmony_ci } 574c67d6573Sopenharmony_ci let mut new = String::with_capacity(text.len()); 575c67d6573Sopenharmony_ci let mut last_match = 0; 576c67d6573Sopenharmony_ci for (i, cap) in it { 577c67d6573Sopenharmony_ci // unwrap on 0 is OK because captures only reports matches 578c67d6573Sopenharmony_ci let m = cap.get(0).unwrap(); 579c67d6573Sopenharmony_ci new.push_str(&text[last_match..m.start()]); 580c67d6573Sopenharmony_ci rep.replace_append(&cap, &mut new); 581c67d6573Sopenharmony_ci last_match = m.end(); 582c67d6573Sopenharmony_ci if limit > 0 && i >= limit - 1 { 583c67d6573Sopenharmony_ci break; 584c67d6573Sopenharmony_ci } 585c67d6573Sopenharmony_ci } 586c67d6573Sopenharmony_ci new.push_str(&text[last_match..]); 587c67d6573Sopenharmony_ci Cow::Owned(new) 588c67d6573Sopenharmony_ci } 589c67d6573Sopenharmony_ci} 590c67d6573Sopenharmony_ci 591c67d6573Sopenharmony_ci/// Advanced or "lower level" search methods. 592c67d6573Sopenharmony_ciimpl Regex { 593c67d6573Sopenharmony_ci /// Returns the end location of a match in the text given. 594c67d6573Sopenharmony_ci /// 595c67d6573Sopenharmony_ci /// This method may have the same performance characteristics as 596c67d6573Sopenharmony_ci /// `is_match`, except it provides an end location for a match. In 597c67d6573Sopenharmony_ci /// particular, the location returned *may be shorter* than the proper end 598c67d6573Sopenharmony_ci /// of the leftmost-first match. 599c67d6573Sopenharmony_ci /// 600c67d6573Sopenharmony_ci /// # Example 601c67d6573Sopenharmony_ci /// 602c67d6573Sopenharmony_ci /// Typically, `a+` would match the entire first sequence of `a` in some 603c67d6573Sopenharmony_ci /// text, but `shortest_match` can give up as soon as it sees the first 604c67d6573Sopenharmony_ci /// `a`. 605c67d6573Sopenharmony_ci /// 606c67d6573Sopenharmony_ci /// ```rust 607c67d6573Sopenharmony_ci /// # use regex::Regex; 608c67d6573Sopenharmony_ci /// # fn main() { 609c67d6573Sopenharmony_ci /// let text = "aaaaa"; 610c67d6573Sopenharmony_ci /// let pos = Regex::new(r"a+").unwrap().shortest_match(text); 611c67d6573Sopenharmony_ci /// assert_eq!(pos, Some(1)); 612c67d6573Sopenharmony_ci /// # } 613c67d6573Sopenharmony_ci /// ``` 614c67d6573Sopenharmony_ci pub fn shortest_match(&self, text: &str) -> Option<usize> { 615c67d6573Sopenharmony_ci self.shortest_match_at(text, 0) 616c67d6573Sopenharmony_ci } 617c67d6573Sopenharmony_ci 618c67d6573Sopenharmony_ci /// Returns the same as shortest_match, but starts the search at the given 619c67d6573Sopenharmony_ci /// offset. 620c67d6573Sopenharmony_ci /// 621c67d6573Sopenharmony_ci /// The significance of the starting point is that it takes the surrounding 622c67d6573Sopenharmony_ci /// context into consideration. For example, the `\A` anchor can only 623c67d6573Sopenharmony_ci /// match when `start == 0`. 624c67d6573Sopenharmony_ci pub fn shortest_match_at( 625c67d6573Sopenharmony_ci &self, 626c67d6573Sopenharmony_ci text: &str, 627c67d6573Sopenharmony_ci start: usize, 628c67d6573Sopenharmony_ci ) -> Option<usize> { 629c67d6573Sopenharmony_ci self.0.searcher_str().shortest_match_at(text, start) 630c67d6573Sopenharmony_ci } 631c67d6573Sopenharmony_ci 632c67d6573Sopenharmony_ci /// Returns the same as is_match, but starts the search at the given 633c67d6573Sopenharmony_ci /// offset. 634c67d6573Sopenharmony_ci /// 635c67d6573Sopenharmony_ci /// The significance of the starting point is that it takes the surrounding 636c67d6573Sopenharmony_ci /// context into consideration. For example, the `\A` anchor can only 637c67d6573Sopenharmony_ci /// match when `start == 0`. 638c67d6573Sopenharmony_ci pub fn is_match_at(&self, text: &str, start: usize) -> bool { 639c67d6573Sopenharmony_ci self.0.searcher_str().is_match_at(text, start) 640c67d6573Sopenharmony_ci } 641c67d6573Sopenharmony_ci 642c67d6573Sopenharmony_ci /// Returns the same as find, but starts the search at the given 643c67d6573Sopenharmony_ci /// offset. 644c67d6573Sopenharmony_ci /// 645c67d6573Sopenharmony_ci /// The significance of the starting point is that it takes the surrounding 646c67d6573Sopenharmony_ci /// context into consideration. For example, the `\A` anchor can only 647c67d6573Sopenharmony_ci /// match when `start == 0`. 648c67d6573Sopenharmony_ci pub fn find_at<'t>( 649c67d6573Sopenharmony_ci &self, 650c67d6573Sopenharmony_ci text: &'t str, 651c67d6573Sopenharmony_ci start: usize, 652c67d6573Sopenharmony_ci ) -> Option<Match<'t>> { 653c67d6573Sopenharmony_ci self.0 654c67d6573Sopenharmony_ci .searcher_str() 655c67d6573Sopenharmony_ci .find_at(text, start) 656c67d6573Sopenharmony_ci .map(|(s, e)| Match::new(text, s, e)) 657c67d6573Sopenharmony_ci } 658c67d6573Sopenharmony_ci 659c67d6573Sopenharmony_ci /// This is like `captures`, but uses 660c67d6573Sopenharmony_ci /// [`CaptureLocations`](struct.CaptureLocations.html) 661c67d6573Sopenharmony_ci /// instead of 662c67d6573Sopenharmony_ci /// [`Captures`](struct.Captures.html) in order to amortize allocations. 663c67d6573Sopenharmony_ci /// 664c67d6573Sopenharmony_ci /// To create a `CaptureLocations` value, use the 665c67d6573Sopenharmony_ci /// `Regex::capture_locations` method. 666c67d6573Sopenharmony_ci /// 667c67d6573Sopenharmony_ci /// This returns the overall match if this was successful, which is always 668c67d6573Sopenharmony_ci /// equivalence to the `0`th capture group. 669c67d6573Sopenharmony_ci pub fn captures_read<'t>( 670c67d6573Sopenharmony_ci &self, 671c67d6573Sopenharmony_ci locs: &mut CaptureLocations, 672c67d6573Sopenharmony_ci text: &'t str, 673c67d6573Sopenharmony_ci ) -> Option<Match<'t>> { 674c67d6573Sopenharmony_ci self.captures_read_at(locs, text, 0) 675c67d6573Sopenharmony_ci } 676c67d6573Sopenharmony_ci 677c67d6573Sopenharmony_ci /// Returns the same as captures, but starts the search at the given 678c67d6573Sopenharmony_ci /// offset and populates the capture locations given. 679c67d6573Sopenharmony_ci /// 680c67d6573Sopenharmony_ci /// The significance of the starting point is that it takes the surrounding 681c67d6573Sopenharmony_ci /// context into consideration. For example, the `\A` anchor can only 682c67d6573Sopenharmony_ci /// match when `start == 0`. 683c67d6573Sopenharmony_ci pub fn captures_read_at<'t>( 684c67d6573Sopenharmony_ci &self, 685c67d6573Sopenharmony_ci locs: &mut CaptureLocations, 686c67d6573Sopenharmony_ci text: &'t str, 687c67d6573Sopenharmony_ci start: usize, 688c67d6573Sopenharmony_ci ) -> Option<Match<'t>> { 689c67d6573Sopenharmony_ci self.0 690c67d6573Sopenharmony_ci .searcher_str() 691c67d6573Sopenharmony_ci .captures_read_at(&mut locs.0, text, start) 692c67d6573Sopenharmony_ci .map(|(s, e)| Match::new(text, s, e)) 693c67d6573Sopenharmony_ci } 694c67d6573Sopenharmony_ci 695c67d6573Sopenharmony_ci /// An undocumented alias for `captures_read_at`. 696c67d6573Sopenharmony_ci /// 697c67d6573Sopenharmony_ci /// The `regex-capi` crate previously used this routine, so to avoid 698c67d6573Sopenharmony_ci /// breaking that crate, we continue to provide the name as an undocumented 699c67d6573Sopenharmony_ci /// alias. 700c67d6573Sopenharmony_ci #[doc(hidden)] 701c67d6573Sopenharmony_ci pub fn read_captures_at<'t>( 702c67d6573Sopenharmony_ci &self, 703c67d6573Sopenharmony_ci locs: &mut CaptureLocations, 704c67d6573Sopenharmony_ci text: &'t str, 705c67d6573Sopenharmony_ci start: usize, 706c67d6573Sopenharmony_ci ) -> Option<Match<'t>> { 707c67d6573Sopenharmony_ci self.captures_read_at(locs, text, start) 708c67d6573Sopenharmony_ci } 709c67d6573Sopenharmony_ci} 710c67d6573Sopenharmony_ci 711c67d6573Sopenharmony_ci/// Auxiliary methods. 712c67d6573Sopenharmony_ciimpl Regex { 713c67d6573Sopenharmony_ci /// Returns the original string of this regex. 714c67d6573Sopenharmony_ci pub fn as_str(&self) -> &str { 715c67d6573Sopenharmony_ci &self.0.regex_strings()[0] 716c67d6573Sopenharmony_ci } 717c67d6573Sopenharmony_ci 718c67d6573Sopenharmony_ci /// Returns an iterator over the capture names. 719c67d6573Sopenharmony_ci pub fn capture_names(&self) -> CaptureNames<'_> { 720c67d6573Sopenharmony_ci CaptureNames(self.0.capture_names().iter()) 721c67d6573Sopenharmony_ci } 722c67d6573Sopenharmony_ci 723c67d6573Sopenharmony_ci /// Returns the number of captures. 724c67d6573Sopenharmony_ci pub fn captures_len(&self) -> usize { 725c67d6573Sopenharmony_ci self.0.capture_names().len() 726c67d6573Sopenharmony_ci } 727c67d6573Sopenharmony_ci 728c67d6573Sopenharmony_ci /// Returns an empty set of capture locations that can be reused in 729c67d6573Sopenharmony_ci /// multiple calls to `captures_read` or `captures_read_at`. 730c67d6573Sopenharmony_ci pub fn capture_locations(&self) -> CaptureLocations { 731c67d6573Sopenharmony_ci CaptureLocations(self.0.searcher_str().locations()) 732c67d6573Sopenharmony_ci } 733c67d6573Sopenharmony_ci 734c67d6573Sopenharmony_ci /// An alias for `capture_locations` to preserve backward compatibility. 735c67d6573Sopenharmony_ci /// 736c67d6573Sopenharmony_ci /// The `regex-capi` crate uses this method, so to avoid breaking that 737c67d6573Sopenharmony_ci /// crate, we continue to export it as an undocumented API. 738c67d6573Sopenharmony_ci #[doc(hidden)] 739c67d6573Sopenharmony_ci pub fn locations(&self) -> CaptureLocations { 740c67d6573Sopenharmony_ci CaptureLocations(self.0.searcher_str().locations()) 741c67d6573Sopenharmony_ci } 742c67d6573Sopenharmony_ci} 743c67d6573Sopenharmony_ci 744c67d6573Sopenharmony_ci/// An iterator over the names of all possible captures. 745c67d6573Sopenharmony_ci/// 746c67d6573Sopenharmony_ci/// `None` indicates an unnamed capture; the first element (capture 0, the 747c67d6573Sopenharmony_ci/// whole matched region) is always unnamed. 748c67d6573Sopenharmony_ci/// 749c67d6573Sopenharmony_ci/// `'r` is the lifetime of the compiled regular expression. 750c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 751c67d6573Sopenharmony_cipub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>); 752c67d6573Sopenharmony_ci 753c67d6573Sopenharmony_ciimpl<'r> Iterator for CaptureNames<'r> { 754c67d6573Sopenharmony_ci type Item = Option<&'r str>; 755c67d6573Sopenharmony_ci 756c67d6573Sopenharmony_ci fn next(&mut self) -> Option<Option<&'r str>> { 757c67d6573Sopenharmony_ci self.0 758c67d6573Sopenharmony_ci .next() 759c67d6573Sopenharmony_ci .as_ref() 760c67d6573Sopenharmony_ci .map(|slot| slot.as_ref().map(|name| name.as_ref())) 761c67d6573Sopenharmony_ci } 762c67d6573Sopenharmony_ci 763c67d6573Sopenharmony_ci fn size_hint(&self) -> (usize, Option<usize>) { 764c67d6573Sopenharmony_ci self.0.size_hint() 765c67d6573Sopenharmony_ci } 766c67d6573Sopenharmony_ci 767c67d6573Sopenharmony_ci fn count(self) -> usize { 768c67d6573Sopenharmony_ci self.0.count() 769c67d6573Sopenharmony_ci } 770c67d6573Sopenharmony_ci} 771c67d6573Sopenharmony_ci 772c67d6573Sopenharmony_ciimpl<'r> ExactSizeIterator for CaptureNames<'r> {} 773c67d6573Sopenharmony_ci 774c67d6573Sopenharmony_ciimpl<'r> FusedIterator for CaptureNames<'r> {} 775c67d6573Sopenharmony_ci 776c67d6573Sopenharmony_ci/// Yields all substrings delimited by a regular expression match. 777c67d6573Sopenharmony_ci/// 778c67d6573Sopenharmony_ci/// `'r` is the lifetime of the compiled regular expression and `'t` is the 779c67d6573Sopenharmony_ci/// lifetime of the string being split. 780c67d6573Sopenharmony_ci#[derive(Debug)] 781c67d6573Sopenharmony_cipub struct Split<'r, 't> { 782c67d6573Sopenharmony_ci finder: Matches<'r, 't>, 783c67d6573Sopenharmony_ci last: usize, 784c67d6573Sopenharmony_ci} 785c67d6573Sopenharmony_ci 786c67d6573Sopenharmony_ciimpl<'r, 't> Iterator for Split<'r, 't> { 787c67d6573Sopenharmony_ci type Item = &'t str; 788c67d6573Sopenharmony_ci 789c67d6573Sopenharmony_ci fn next(&mut self) -> Option<&'t str> { 790c67d6573Sopenharmony_ci let text = self.finder.0.text(); 791c67d6573Sopenharmony_ci match self.finder.next() { 792c67d6573Sopenharmony_ci None => { 793c67d6573Sopenharmony_ci if self.last > text.len() { 794c67d6573Sopenharmony_ci None 795c67d6573Sopenharmony_ci } else { 796c67d6573Sopenharmony_ci let s = &text[self.last..]; 797c67d6573Sopenharmony_ci self.last = text.len() + 1; // Next call will return None 798c67d6573Sopenharmony_ci Some(s) 799c67d6573Sopenharmony_ci } 800c67d6573Sopenharmony_ci } 801c67d6573Sopenharmony_ci Some(m) => { 802c67d6573Sopenharmony_ci let matched = &text[self.last..m.start()]; 803c67d6573Sopenharmony_ci self.last = m.end(); 804c67d6573Sopenharmony_ci Some(matched) 805c67d6573Sopenharmony_ci } 806c67d6573Sopenharmony_ci } 807c67d6573Sopenharmony_ci } 808c67d6573Sopenharmony_ci} 809c67d6573Sopenharmony_ci 810c67d6573Sopenharmony_ciimpl<'r, 't> FusedIterator for Split<'r, 't> {} 811c67d6573Sopenharmony_ci 812c67d6573Sopenharmony_ci/// Yields at most `N` substrings delimited by a regular expression match. 813c67d6573Sopenharmony_ci/// 814c67d6573Sopenharmony_ci/// The last substring will be whatever remains after splitting. 815c67d6573Sopenharmony_ci/// 816c67d6573Sopenharmony_ci/// `'r` is the lifetime of the compiled regular expression and `'t` is the 817c67d6573Sopenharmony_ci/// lifetime of the string being split. 818c67d6573Sopenharmony_ci#[derive(Debug)] 819c67d6573Sopenharmony_cipub struct SplitN<'r, 't> { 820c67d6573Sopenharmony_ci splits: Split<'r, 't>, 821c67d6573Sopenharmony_ci n: usize, 822c67d6573Sopenharmony_ci} 823c67d6573Sopenharmony_ci 824c67d6573Sopenharmony_ciimpl<'r, 't> Iterator for SplitN<'r, 't> { 825c67d6573Sopenharmony_ci type Item = &'t str; 826c67d6573Sopenharmony_ci 827c67d6573Sopenharmony_ci fn next(&mut self) -> Option<&'t str> { 828c67d6573Sopenharmony_ci if self.n == 0 { 829c67d6573Sopenharmony_ci return None; 830c67d6573Sopenharmony_ci } 831c67d6573Sopenharmony_ci 832c67d6573Sopenharmony_ci self.n -= 1; 833c67d6573Sopenharmony_ci if self.n > 0 { 834c67d6573Sopenharmony_ci return self.splits.next(); 835c67d6573Sopenharmony_ci } 836c67d6573Sopenharmony_ci 837c67d6573Sopenharmony_ci let text = self.splits.finder.0.text(); 838c67d6573Sopenharmony_ci if self.splits.last > text.len() { 839c67d6573Sopenharmony_ci // We've already returned all substrings. 840c67d6573Sopenharmony_ci None 841c67d6573Sopenharmony_ci } else { 842c67d6573Sopenharmony_ci // self.n == 0, so future calls will return None immediately 843c67d6573Sopenharmony_ci Some(&text[self.splits.last..]) 844c67d6573Sopenharmony_ci } 845c67d6573Sopenharmony_ci } 846c67d6573Sopenharmony_ci 847c67d6573Sopenharmony_ci fn size_hint(&self) -> (usize, Option<usize>) { 848c67d6573Sopenharmony_ci (0, Some(self.n)) 849c67d6573Sopenharmony_ci } 850c67d6573Sopenharmony_ci} 851c67d6573Sopenharmony_ci 852c67d6573Sopenharmony_ciimpl<'r, 't> FusedIterator for SplitN<'r, 't> {} 853c67d6573Sopenharmony_ci 854c67d6573Sopenharmony_ci/// CaptureLocations is a low level representation of the raw offsets of each 855c67d6573Sopenharmony_ci/// submatch. 856c67d6573Sopenharmony_ci/// 857c67d6573Sopenharmony_ci/// You can think of this as a lower level 858c67d6573Sopenharmony_ci/// [`Captures`](struct.Captures.html), where this type does not support 859c67d6573Sopenharmony_ci/// named capturing groups directly and it does not borrow the text that these 860c67d6573Sopenharmony_ci/// offsets were matched on. 861c67d6573Sopenharmony_ci/// 862c67d6573Sopenharmony_ci/// Primarily, this type is useful when using the lower level `Regex` APIs 863c67d6573Sopenharmony_ci/// such as `read_captures`, which permits amortizing the allocation in which 864c67d6573Sopenharmony_ci/// capture match locations are stored. 865c67d6573Sopenharmony_ci/// 866c67d6573Sopenharmony_ci/// In order to build a value of this type, you'll need to call the 867c67d6573Sopenharmony_ci/// `capture_locations` method on the `Regex` being used to execute the search. 868c67d6573Sopenharmony_ci/// The value returned can then be reused in subsequent searches. 869c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 870c67d6573Sopenharmony_cipub struct CaptureLocations(re_trait::Locations); 871c67d6573Sopenharmony_ci 872c67d6573Sopenharmony_ci/// A type alias for `CaptureLocations` for backwards compatibility. 873c67d6573Sopenharmony_ci/// 874c67d6573Sopenharmony_ci/// Previously, we exported `CaptureLocations` as `Locations` in an 875c67d6573Sopenharmony_ci/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`), 876c67d6573Sopenharmony_ci/// we continue re-exporting the same undocumented API. 877c67d6573Sopenharmony_ci#[doc(hidden)] 878c67d6573Sopenharmony_cipub type Locations = CaptureLocations; 879c67d6573Sopenharmony_ci 880c67d6573Sopenharmony_ciimpl CaptureLocations { 881c67d6573Sopenharmony_ci /// Returns the start and end positions of the Nth capture group. Returns 882c67d6573Sopenharmony_ci /// `None` if `i` is not a valid capture group or if the capture group did 883c67d6573Sopenharmony_ci /// not match anything. The positions returned are *always* byte indices 884c67d6573Sopenharmony_ci /// with respect to the original string matched. 885c67d6573Sopenharmony_ci #[inline] 886c67d6573Sopenharmony_ci pub fn get(&self, i: usize) -> Option<(usize, usize)> { 887c67d6573Sopenharmony_ci self.0.pos(i) 888c67d6573Sopenharmony_ci } 889c67d6573Sopenharmony_ci 890c67d6573Sopenharmony_ci /// Returns the total number of capture groups (even if they didn't match). 891c67d6573Sopenharmony_ci /// 892c67d6573Sopenharmony_ci /// This is always at least `1` since every regex has at least `1` 893c67d6573Sopenharmony_ci /// capturing group that corresponds to the entire match. 894c67d6573Sopenharmony_ci #[inline] 895c67d6573Sopenharmony_ci pub fn len(&self) -> usize { 896c67d6573Sopenharmony_ci self.0.len() 897c67d6573Sopenharmony_ci } 898c67d6573Sopenharmony_ci 899c67d6573Sopenharmony_ci /// An alias for the `get` method for backwards compatibility. 900c67d6573Sopenharmony_ci /// 901c67d6573Sopenharmony_ci /// Previously, we exported `get` as `pos` in an undocumented API. To 902c67d6573Sopenharmony_ci /// prevent breaking that code (e.g., in `regex-capi`), we continue 903c67d6573Sopenharmony_ci /// re-exporting the same undocumented API. 904c67d6573Sopenharmony_ci #[doc(hidden)] 905c67d6573Sopenharmony_ci #[inline] 906c67d6573Sopenharmony_ci pub fn pos(&self, i: usize) -> Option<(usize, usize)> { 907c67d6573Sopenharmony_ci self.get(i) 908c67d6573Sopenharmony_ci } 909c67d6573Sopenharmony_ci} 910c67d6573Sopenharmony_ci 911c67d6573Sopenharmony_ci/// Captures represents a group of captured strings for a single match. 912c67d6573Sopenharmony_ci/// 913c67d6573Sopenharmony_ci/// The 0th capture always corresponds to the entire match. Each subsequent 914c67d6573Sopenharmony_ci/// index corresponds to the next capture group in the regex. If a capture 915c67d6573Sopenharmony_ci/// group is named, then the matched string is *also* available via the `name` 916c67d6573Sopenharmony_ci/// method. (Note that the 0th capture is always unnamed and so must be 917c67d6573Sopenharmony_ci/// accessed with the `get` method.) 918c67d6573Sopenharmony_ci/// 919c67d6573Sopenharmony_ci/// Positions returned from a capture group are always byte indices. 920c67d6573Sopenharmony_ci/// 921c67d6573Sopenharmony_ci/// `'t` is the lifetime of the matched text. 922c67d6573Sopenharmony_cipub struct Captures<'t> { 923c67d6573Sopenharmony_ci text: &'t str, 924c67d6573Sopenharmony_ci locs: re_trait::Locations, 925c67d6573Sopenharmony_ci named_groups: Arc<HashMap<String, usize>>, 926c67d6573Sopenharmony_ci} 927c67d6573Sopenharmony_ci 928c67d6573Sopenharmony_ciimpl<'t> Captures<'t> { 929c67d6573Sopenharmony_ci /// Returns the match associated with the capture group at index `i`. If 930c67d6573Sopenharmony_ci /// `i` does not correspond to a capture group, or if the capture group 931c67d6573Sopenharmony_ci /// did not participate in the match, then `None` is returned. 932c67d6573Sopenharmony_ci /// 933c67d6573Sopenharmony_ci /// # Examples 934c67d6573Sopenharmony_ci /// 935c67d6573Sopenharmony_ci /// Get the text of the match with a default of an empty string if this 936c67d6573Sopenharmony_ci /// group didn't participate in the match: 937c67d6573Sopenharmony_ci /// 938c67d6573Sopenharmony_ci /// ```rust 939c67d6573Sopenharmony_ci /// # use regex::Regex; 940c67d6573Sopenharmony_ci /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap(); 941c67d6573Sopenharmony_ci /// let caps = re.captures("abc123").unwrap(); 942c67d6573Sopenharmony_ci /// 943c67d6573Sopenharmony_ci /// let text1 = caps.get(1).map_or("", |m| m.as_str()); 944c67d6573Sopenharmony_ci /// let text2 = caps.get(2).map_or("", |m| m.as_str()); 945c67d6573Sopenharmony_ci /// assert_eq!(text1, "123"); 946c67d6573Sopenharmony_ci /// assert_eq!(text2, ""); 947c67d6573Sopenharmony_ci /// ``` 948c67d6573Sopenharmony_ci pub fn get(&self, i: usize) -> Option<Match<'t>> { 949c67d6573Sopenharmony_ci self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e)) 950c67d6573Sopenharmony_ci } 951c67d6573Sopenharmony_ci 952c67d6573Sopenharmony_ci /// Returns the match for the capture group named `name`. If `name` isn't a 953c67d6573Sopenharmony_ci /// valid capture group or didn't match anything, then `None` is returned. 954c67d6573Sopenharmony_ci pub fn name(&self, name: &str) -> Option<Match<'t>> { 955c67d6573Sopenharmony_ci self.named_groups.get(name).and_then(|&i| self.get(i)) 956c67d6573Sopenharmony_ci } 957c67d6573Sopenharmony_ci 958c67d6573Sopenharmony_ci /// An iterator that yields all capturing matches in the order in which 959c67d6573Sopenharmony_ci /// they appear in the regex. If a particular capture group didn't 960c67d6573Sopenharmony_ci /// participate in the match, then `None` is yielded for that capture. 961c67d6573Sopenharmony_ci /// 962c67d6573Sopenharmony_ci /// The first match always corresponds to the overall match of the regex. 963c67d6573Sopenharmony_ci pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> { 964c67d6573Sopenharmony_ci SubCaptureMatches { caps: self, it: self.locs.iter() } 965c67d6573Sopenharmony_ci } 966c67d6573Sopenharmony_ci 967c67d6573Sopenharmony_ci /// Expands all instances of `$name` in `replacement` to the corresponding 968c67d6573Sopenharmony_ci /// capture group `name`, and writes them to the `dst` buffer given. 969c67d6573Sopenharmony_ci /// 970c67d6573Sopenharmony_ci /// `name` may be an integer corresponding to the index of the capture 971c67d6573Sopenharmony_ci /// group (counted by order of opening parenthesis where `0` is the 972c67d6573Sopenharmony_ci /// entire match) or it can be a name (consisting of letters, digits or 973c67d6573Sopenharmony_ci /// underscores) corresponding to a named capture group. 974c67d6573Sopenharmony_ci /// 975c67d6573Sopenharmony_ci /// If `name` isn't a valid capture group (whether the name doesn't exist 976c67d6573Sopenharmony_ci /// or isn't a valid index), then it is replaced with the empty string. 977c67d6573Sopenharmony_ci /// 978c67d6573Sopenharmony_ci /// The longest possible name consisting of the characters `[_0-9A-Za-z]` 979c67d6573Sopenharmony_ci /// is used. e.g., `$1a` looks up the capture group named `1a` and not the 980c67d6573Sopenharmony_ci /// capture group at index `1`. To exert more precise control over the 981c67d6573Sopenharmony_ci /// name, or to refer to a capture group name that uses characters outside 982c67d6573Sopenharmony_ci /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When 983c67d6573Sopenharmony_ci /// using braces, any sequence of characters is permitted. If the sequence 984c67d6573Sopenharmony_ci /// does not refer to a capture group name in the corresponding regex, then 985c67d6573Sopenharmony_ci /// it is replaced with an empty string. 986c67d6573Sopenharmony_ci /// 987c67d6573Sopenharmony_ci /// To write a literal `$` use `$$`. 988c67d6573Sopenharmony_ci pub fn expand(&self, replacement: &str, dst: &mut String) { 989c67d6573Sopenharmony_ci expand_str(self, replacement, dst) 990c67d6573Sopenharmony_ci } 991c67d6573Sopenharmony_ci 992c67d6573Sopenharmony_ci /// Returns the total number of capture groups (even if they didn't match). 993c67d6573Sopenharmony_ci /// 994c67d6573Sopenharmony_ci /// This is always at least `1`, since every regex has at least one capture 995c67d6573Sopenharmony_ci /// group that corresponds to the full match. 996c67d6573Sopenharmony_ci #[inline] 997c67d6573Sopenharmony_ci pub fn len(&self) -> usize { 998c67d6573Sopenharmony_ci self.locs.len() 999c67d6573Sopenharmony_ci } 1000c67d6573Sopenharmony_ci} 1001c67d6573Sopenharmony_ci 1002c67d6573Sopenharmony_ciimpl<'t> fmt::Debug for Captures<'t> { 1003c67d6573Sopenharmony_ci fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 1004c67d6573Sopenharmony_ci f.debug_tuple("Captures").field(&CapturesDebug(self)).finish() 1005c67d6573Sopenharmony_ci } 1006c67d6573Sopenharmony_ci} 1007c67d6573Sopenharmony_ci 1008c67d6573Sopenharmony_cistruct CapturesDebug<'c, 't>(&'c Captures<'t>); 1009c67d6573Sopenharmony_ci 1010c67d6573Sopenharmony_ciimpl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> { 1011c67d6573Sopenharmony_ci fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 1012c67d6573Sopenharmony_ci // We'd like to show something nice here, even if it means an 1013c67d6573Sopenharmony_ci // allocation to build a reverse index. 1014c67d6573Sopenharmony_ci let slot_to_name: HashMap<&usize, &String> = 1015c67d6573Sopenharmony_ci self.0.named_groups.iter().map(|(a, b)| (b, a)).collect(); 1016c67d6573Sopenharmony_ci let mut map = f.debug_map(); 1017c67d6573Sopenharmony_ci for (slot, m) in self.0.locs.iter().enumerate() { 1018c67d6573Sopenharmony_ci let m = m.map(|(s, e)| &self.0.text[s..e]); 1019c67d6573Sopenharmony_ci if let Some(name) = slot_to_name.get(&slot) { 1020c67d6573Sopenharmony_ci map.entry(&name, &m); 1021c67d6573Sopenharmony_ci } else { 1022c67d6573Sopenharmony_ci map.entry(&slot, &m); 1023c67d6573Sopenharmony_ci } 1024c67d6573Sopenharmony_ci } 1025c67d6573Sopenharmony_ci map.finish() 1026c67d6573Sopenharmony_ci } 1027c67d6573Sopenharmony_ci} 1028c67d6573Sopenharmony_ci 1029c67d6573Sopenharmony_ci/// Get a group by index. 1030c67d6573Sopenharmony_ci/// 1031c67d6573Sopenharmony_ci/// `'t` is the lifetime of the matched text. 1032c67d6573Sopenharmony_ci/// 1033c67d6573Sopenharmony_ci/// The text can't outlive the `Captures` object if this method is 1034c67d6573Sopenharmony_ci/// used, because of how `Index` is defined (normally `a[i]` is part 1035c67d6573Sopenharmony_ci/// of `a` and can't outlive it); to do that, use `get()` instead. 1036c67d6573Sopenharmony_ci/// 1037c67d6573Sopenharmony_ci/// # Panics 1038c67d6573Sopenharmony_ci/// 1039c67d6573Sopenharmony_ci/// If there is no group at the given index. 1040c67d6573Sopenharmony_ciimpl<'t> Index<usize> for Captures<'t> { 1041c67d6573Sopenharmony_ci type Output = str; 1042c67d6573Sopenharmony_ci 1043c67d6573Sopenharmony_ci fn index(&self, i: usize) -> &str { 1044c67d6573Sopenharmony_ci self.get(i) 1045c67d6573Sopenharmony_ci .map(|m| m.as_str()) 1046c67d6573Sopenharmony_ci .unwrap_or_else(|| panic!("no group at index '{}'", i)) 1047c67d6573Sopenharmony_ci } 1048c67d6573Sopenharmony_ci} 1049c67d6573Sopenharmony_ci 1050c67d6573Sopenharmony_ci/// Get a group by name. 1051c67d6573Sopenharmony_ci/// 1052c67d6573Sopenharmony_ci/// `'t` is the lifetime of the matched text and `'i` is the lifetime 1053c67d6573Sopenharmony_ci/// of the group name (the index). 1054c67d6573Sopenharmony_ci/// 1055c67d6573Sopenharmony_ci/// The text can't outlive the `Captures` object if this method is 1056c67d6573Sopenharmony_ci/// used, because of how `Index` is defined (normally `a[i]` is part 1057c67d6573Sopenharmony_ci/// of `a` and can't outlive it); to do that, use `name` instead. 1058c67d6573Sopenharmony_ci/// 1059c67d6573Sopenharmony_ci/// # Panics 1060c67d6573Sopenharmony_ci/// 1061c67d6573Sopenharmony_ci/// If there is no group named by the given value. 1062c67d6573Sopenharmony_ciimpl<'t, 'i> Index<&'i str> for Captures<'t> { 1063c67d6573Sopenharmony_ci type Output = str; 1064c67d6573Sopenharmony_ci 1065c67d6573Sopenharmony_ci fn index<'a>(&'a self, name: &'i str) -> &'a str { 1066c67d6573Sopenharmony_ci self.name(name) 1067c67d6573Sopenharmony_ci .map(|m| m.as_str()) 1068c67d6573Sopenharmony_ci .unwrap_or_else(|| panic!("no group named '{}'", name)) 1069c67d6573Sopenharmony_ci } 1070c67d6573Sopenharmony_ci} 1071c67d6573Sopenharmony_ci 1072c67d6573Sopenharmony_ci/// An iterator that yields all capturing matches in the order in which they 1073c67d6573Sopenharmony_ci/// appear in the regex. 1074c67d6573Sopenharmony_ci/// 1075c67d6573Sopenharmony_ci/// If a particular capture group didn't participate in the match, then `None` 1076c67d6573Sopenharmony_ci/// is yielded for that capture. The first match always corresponds to the 1077c67d6573Sopenharmony_ci/// overall match of the regex. 1078c67d6573Sopenharmony_ci/// 1079c67d6573Sopenharmony_ci/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and 1080c67d6573Sopenharmony_ci/// the lifetime `'t` corresponds to the originally matched text. 1081c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 1082c67d6573Sopenharmony_cipub struct SubCaptureMatches<'c, 't> { 1083c67d6573Sopenharmony_ci caps: &'c Captures<'t>, 1084c67d6573Sopenharmony_ci it: SubCapturesPosIter<'c>, 1085c67d6573Sopenharmony_ci} 1086c67d6573Sopenharmony_ci 1087c67d6573Sopenharmony_ciimpl<'c, 't> Iterator for SubCaptureMatches<'c, 't> { 1088c67d6573Sopenharmony_ci type Item = Option<Match<'t>>; 1089c67d6573Sopenharmony_ci 1090c67d6573Sopenharmony_ci fn next(&mut self) -> Option<Option<Match<'t>>> { 1091c67d6573Sopenharmony_ci self.it 1092c67d6573Sopenharmony_ci .next() 1093c67d6573Sopenharmony_ci .map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e))) 1094c67d6573Sopenharmony_ci } 1095c67d6573Sopenharmony_ci 1096c67d6573Sopenharmony_ci fn size_hint(&self) -> (usize, Option<usize>) { 1097c67d6573Sopenharmony_ci self.it.size_hint() 1098c67d6573Sopenharmony_ci } 1099c67d6573Sopenharmony_ci 1100c67d6573Sopenharmony_ci fn count(self) -> usize { 1101c67d6573Sopenharmony_ci self.it.count() 1102c67d6573Sopenharmony_ci } 1103c67d6573Sopenharmony_ci} 1104c67d6573Sopenharmony_ci 1105c67d6573Sopenharmony_ciimpl<'c, 't> ExactSizeIterator for SubCaptureMatches<'c, 't> {} 1106c67d6573Sopenharmony_ci 1107c67d6573Sopenharmony_ciimpl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {} 1108c67d6573Sopenharmony_ci 1109c67d6573Sopenharmony_ci/// An iterator that yields all non-overlapping capture groups matching a 1110c67d6573Sopenharmony_ci/// particular regular expression. 1111c67d6573Sopenharmony_ci/// 1112c67d6573Sopenharmony_ci/// The iterator stops when no more matches can be found. 1113c67d6573Sopenharmony_ci/// 1114c67d6573Sopenharmony_ci/// `'r` is the lifetime of the compiled regular expression and `'t` is the 1115c67d6573Sopenharmony_ci/// lifetime of the matched string. 1116c67d6573Sopenharmony_ci#[derive(Debug)] 1117c67d6573Sopenharmony_cipub struct CaptureMatches<'r, 't>( 1118c67d6573Sopenharmony_ci re_trait::CaptureMatches<'t, ExecNoSyncStr<'r>>, 1119c67d6573Sopenharmony_ci); 1120c67d6573Sopenharmony_ci 1121c67d6573Sopenharmony_ciimpl<'r, 't> Iterator for CaptureMatches<'r, 't> { 1122c67d6573Sopenharmony_ci type Item = Captures<'t>; 1123c67d6573Sopenharmony_ci 1124c67d6573Sopenharmony_ci fn next(&mut self) -> Option<Captures<'t>> { 1125c67d6573Sopenharmony_ci self.0.next().map(|locs| Captures { 1126c67d6573Sopenharmony_ci text: self.0.text(), 1127c67d6573Sopenharmony_ci locs, 1128c67d6573Sopenharmony_ci named_groups: self.0.regex().capture_name_idx().clone(), 1129c67d6573Sopenharmony_ci }) 1130c67d6573Sopenharmony_ci } 1131c67d6573Sopenharmony_ci} 1132c67d6573Sopenharmony_ci 1133c67d6573Sopenharmony_ciimpl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {} 1134c67d6573Sopenharmony_ci 1135c67d6573Sopenharmony_ci/// An iterator over all non-overlapping matches for a particular string. 1136c67d6573Sopenharmony_ci/// 1137c67d6573Sopenharmony_ci/// The iterator yields a `Match` value. The iterator stops when no more 1138c67d6573Sopenharmony_ci/// matches can be found. 1139c67d6573Sopenharmony_ci/// 1140c67d6573Sopenharmony_ci/// `'r` is the lifetime of the compiled regular expression and `'t` is the 1141c67d6573Sopenharmony_ci/// lifetime of the matched string. 1142c67d6573Sopenharmony_ci#[derive(Debug)] 1143c67d6573Sopenharmony_cipub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSyncStr<'r>>); 1144c67d6573Sopenharmony_ci 1145c67d6573Sopenharmony_ciimpl<'r, 't> Iterator for Matches<'r, 't> { 1146c67d6573Sopenharmony_ci type Item = Match<'t>; 1147c67d6573Sopenharmony_ci 1148c67d6573Sopenharmony_ci fn next(&mut self) -> Option<Match<'t>> { 1149c67d6573Sopenharmony_ci let text = self.0.text(); 1150c67d6573Sopenharmony_ci self.0.next().map(|(s, e)| Match::new(text, s, e)) 1151c67d6573Sopenharmony_ci } 1152c67d6573Sopenharmony_ci} 1153c67d6573Sopenharmony_ci 1154c67d6573Sopenharmony_ciimpl<'r, 't> FusedIterator for Matches<'r, 't> {} 1155c67d6573Sopenharmony_ci 1156c67d6573Sopenharmony_ci/// Replacer describes types that can be used to replace matches in a string. 1157c67d6573Sopenharmony_ci/// 1158c67d6573Sopenharmony_ci/// In general, users of this crate shouldn't need to implement this trait, 1159c67d6573Sopenharmony_ci/// since implementations are already provided for `&str` along with other 1160c67d6573Sopenharmony_ci/// variants of string types and `FnMut(&Captures) -> String` (or any 1161c67d6573Sopenharmony_ci/// `FnMut(&Captures) -> T` where `T: AsRef<str>`), which covers most use cases. 1162c67d6573Sopenharmony_cipub trait Replacer { 1163c67d6573Sopenharmony_ci /// Appends text to `dst` to replace the current match. 1164c67d6573Sopenharmony_ci /// 1165c67d6573Sopenharmony_ci /// The current match is represented by `caps`, which is guaranteed to 1166c67d6573Sopenharmony_ci /// have a match at capture group `0`. 1167c67d6573Sopenharmony_ci /// 1168c67d6573Sopenharmony_ci /// For example, a no-op replacement would be 1169c67d6573Sopenharmony_ci /// `dst.push_str(caps.get(0).unwrap().as_str())`. 1170c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String); 1171c67d6573Sopenharmony_ci 1172c67d6573Sopenharmony_ci /// Return a fixed unchanging replacement string. 1173c67d6573Sopenharmony_ci /// 1174c67d6573Sopenharmony_ci /// When doing replacements, if access to `Captures` is not needed (e.g., 1175c67d6573Sopenharmony_ci /// the replacement byte string does not need `$` expansion), then it can 1176c67d6573Sopenharmony_ci /// be beneficial to avoid finding sub-captures. 1177c67d6573Sopenharmony_ci /// 1178c67d6573Sopenharmony_ci /// In general, this is called once for every call to `replacen`. 1179c67d6573Sopenharmony_ci fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> { 1180c67d6573Sopenharmony_ci None 1181c67d6573Sopenharmony_ci } 1182c67d6573Sopenharmony_ci 1183c67d6573Sopenharmony_ci /// Return a `Replacer` that borrows and wraps this `Replacer`. 1184c67d6573Sopenharmony_ci /// 1185c67d6573Sopenharmony_ci /// This is useful when you want to take a generic `Replacer` (which might 1186c67d6573Sopenharmony_ci /// not be cloneable) and use it without consuming it, so it can be used 1187c67d6573Sopenharmony_ci /// more than once. 1188c67d6573Sopenharmony_ci /// 1189c67d6573Sopenharmony_ci /// # Example 1190c67d6573Sopenharmony_ci /// 1191c67d6573Sopenharmony_ci /// ``` 1192c67d6573Sopenharmony_ci /// use regex::{Regex, Replacer}; 1193c67d6573Sopenharmony_ci /// 1194c67d6573Sopenharmony_ci /// fn replace_all_twice<R: Replacer>( 1195c67d6573Sopenharmony_ci /// re: Regex, 1196c67d6573Sopenharmony_ci /// src: &str, 1197c67d6573Sopenharmony_ci /// mut rep: R, 1198c67d6573Sopenharmony_ci /// ) -> String { 1199c67d6573Sopenharmony_ci /// let dst = re.replace_all(src, rep.by_ref()); 1200c67d6573Sopenharmony_ci /// let dst = re.replace_all(&dst, rep.by_ref()); 1201c67d6573Sopenharmony_ci /// dst.into_owned() 1202c67d6573Sopenharmony_ci /// } 1203c67d6573Sopenharmony_ci /// ``` 1204c67d6573Sopenharmony_ci fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> { 1205c67d6573Sopenharmony_ci ReplacerRef(self) 1206c67d6573Sopenharmony_ci } 1207c67d6573Sopenharmony_ci} 1208c67d6573Sopenharmony_ci 1209c67d6573Sopenharmony_ci/// By-reference adaptor for a `Replacer` 1210c67d6573Sopenharmony_ci/// 1211c67d6573Sopenharmony_ci/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref). 1212c67d6573Sopenharmony_ci#[derive(Debug)] 1213c67d6573Sopenharmony_cipub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); 1214c67d6573Sopenharmony_ci 1215c67d6573Sopenharmony_ciimpl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { 1216c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { 1217c67d6573Sopenharmony_ci self.0.replace_append(caps, dst) 1218c67d6573Sopenharmony_ci } 1219c67d6573Sopenharmony_ci fn no_expansion(&mut self) -> Option<Cow<'_, str>> { 1220c67d6573Sopenharmony_ci self.0.no_expansion() 1221c67d6573Sopenharmony_ci } 1222c67d6573Sopenharmony_ci} 1223c67d6573Sopenharmony_ci 1224c67d6573Sopenharmony_ciimpl<'a> Replacer for &'a str { 1225c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { 1226c67d6573Sopenharmony_ci caps.expand(*self, dst); 1227c67d6573Sopenharmony_ci } 1228c67d6573Sopenharmony_ci 1229c67d6573Sopenharmony_ci fn no_expansion(&mut self) -> Option<Cow<'_, str>> { 1230c67d6573Sopenharmony_ci no_expansion(self) 1231c67d6573Sopenharmony_ci } 1232c67d6573Sopenharmony_ci} 1233c67d6573Sopenharmony_ci 1234c67d6573Sopenharmony_ciimpl<'a> Replacer for &'a String { 1235c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { 1236c67d6573Sopenharmony_ci self.as_str().replace_append(caps, dst) 1237c67d6573Sopenharmony_ci } 1238c67d6573Sopenharmony_ci 1239c67d6573Sopenharmony_ci fn no_expansion(&mut self) -> Option<Cow<'_, str>> { 1240c67d6573Sopenharmony_ci no_expansion(self) 1241c67d6573Sopenharmony_ci } 1242c67d6573Sopenharmony_ci} 1243c67d6573Sopenharmony_ci 1244c67d6573Sopenharmony_ciimpl Replacer for String { 1245c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { 1246c67d6573Sopenharmony_ci self.as_str().replace_append(caps, dst) 1247c67d6573Sopenharmony_ci } 1248c67d6573Sopenharmony_ci 1249c67d6573Sopenharmony_ci fn no_expansion(&mut self) -> Option<Cow<'_, str>> { 1250c67d6573Sopenharmony_ci no_expansion(self) 1251c67d6573Sopenharmony_ci } 1252c67d6573Sopenharmony_ci} 1253c67d6573Sopenharmony_ci 1254c67d6573Sopenharmony_ciimpl<'a> Replacer for Cow<'a, str> { 1255c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { 1256c67d6573Sopenharmony_ci self.as_ref().replace_append(caps, dst) 1257c67d6573Sopenharmony_ci } 1258c67d6573Sopenharmony_ci 1259c67d6573Sopenharmony_ci fn no_expansion(&mut self) -> Option<Cow<'_, str>> { 1260c67d6573Sopenharmony_ci no_expansion(self) 1261c67d6573Sopenharmony_ci } 1262c67d6573Sopenharmony_ci} 1263c67d6573Sopenharmony_ci 1264c67d6573Sopenharmony_ciimpl<'a> Replacer for &'a Cow<'a, str> { 1265c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { 1266c67d6573Sopenharmony_ci self.as_ref().replace_append(caps, dst) 1267c67d6573Sopenharmony_ci } 1268c67d6573Sopenharmony_ci 1269c67d6573Sopenharmony_ci fn no_expansion(&mut self) -> Option<Cow<'_, str>> { 1270c67d6573Sopenharmony_ci no_expansion(self) 1271c67d6573Sopenharmony_ci } 1272c67d6573Sopenharmony_ci} 1273c67d6573Sopenharmony_ci 1274c67d6573Sopenharmony_cifn no_expansion<T: AsRef<str>>(t: &T) -> Option<Cow<'_, str>> { 1275c67d6573Sopenharmony_ci let s = t.as_ref(); 1276c67d6573Sopenharmony_ci match find_byte(b'$', s.as_bytes()) { 1277c67d6573Sopenharmony_ci Some(_) => None, 1278c67d6573Sopenharmony_ci None => Some(Cow::Borrowed(s)), 1279c67d6573Sopenharmony_ci } 1280c67d6573Sopenharmony_ci} 1281c67d6573Sopenharmony_ci 1282c67d6573Sopenharmony_ciimpl<F, T> Replacer for F 1283c67d6573Sopenharmony_ciwhere 1284c67d6573Sopenharmony_ci F: FnMut(&Captures<'_>) -> T, 1285c67d6573Sopenharmony_ci T: AsRef<str>, 1286c67d6573Sopenharmony_ci{ 1287c67d6573Sopenharmony_ci fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { 1288c67d6573Sopenharmony_ci dst.push_str((*self)(caps).as_ref()); 1289c67d6573Sopenharmony_ci } 1290c67d6573Sopenharmony_ci} 1291c67d6573Sopenharmony_ci 1292c67d6573Sopenharmony_ci/// `NoExpand` indicates literal string replacement. 1293c67d6573Sopenharmony_ci/// 1294c67d6573Sopenharmony_ci/// It can be used with `replace` and `replace_all` to do a literal string 1295c67d6573Sopenharmony_ci/// replacement without expanding `$name` to their corresponding capture 1296c67d6573Sopenharmony_ci/// groups. This can be both convenient (to avoid escaping `$`, for example) 1297c67d6573Sopenharmony_ci/// and performant (since capture groups don't need to be found). 1298c67d6573Sopenharmony_ci/// 1299c67d6573Sopenharmony_ci/// `'t` is the lifetime of the literal text. 1300c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 1301c67d6573Sopenharmony_cipub struct NoExpand<'t>(pub &'t str); 1302c67d6573Sopenharmony_ci 1303c67d6573Sopenharmony_ciimpl<'t> Replacer for NoExpand<'t> { 1304c67d6573Sopenharmony_ci fn replace_append(&mut self, _: &Captures<'_>, dst: &mut String) { 1305c67d6573Sopenharmony_ci dst.push_str(self.0); 1306c67d6573Sopenharmony_ci } 1307c67d6573Sopenharmony_ci 1308c67d6573Sopenharmony_ci fn no_expansion(&mut self) -> Option<Cow<'_, str>> { 1309c67d6573Sopenharmony_ci Some(Cow::Borrowed(self.0)) 1310c67d6573Sopenharmony_ci } 1311c67d6573Sopenharmony_ci} 1312