1c67d6573Sopenharmony_ci#![allow(non_snake_case)]
2c67d6573Sopenharmony_ci
3c67d6573Sopenharmony_ciuse std::ffi::{CStr, CString};
4c67d6573Sopenharmony_ciuse std::fmt;
5c67d6573Sopenharmony_ciuse std::ptr;
6c67d6573Sopenharmony_ci
7c67d6573Sopenharmony_ciuse libc::{c_char, c_int, c_void};
8c67d6573Sopenharmony_ciuse libpcre_sys::{
9c67d6573Sopenharmony_ci    pcre, pcre_compile, pcre_exec, pcre_extra, pcre_free, pcre_free_study,
10c67d6573Sopenharmony_ci    pcre_study, PCRE_ERROR_NOMATCH, PCRE_NO_UTF8_CHECK, PCRE_UTF8,
11c67d6573Sopenharmony_ci};
12c67d6573Sopenharmony_ci
13c67d6573Sopenharmony_ciconst PCRE_UCP: c_int = 0x20000000;
14c67d6573Sopenharmony_ciconst PCRE_STUDY_JIT_COMPLETE: c_int = 0x0001;
15c67d6573Sopenharmony_ci
16c67d6573Sopenharmony_ci// We use libpcre-sys directly because the pcre crate has unavoidable
17c67d6573Sopenharmony_ci// performance problems in its core matching routines. (e.g., It always
18c67d6573Sopenharmony_ci// allocates an ovector.)
19c67d6573Sopenharmony_cipub struct Regex {
20c67d6573Sopenharmony_ci    code: *mut pcre,
21c67d6573Sopenharmony_ci    extra: *mut pcre_extra,
22c67d6573Sopenharmony_ci}
23c67d6573Sopenharmony_ci
24c67d6573Sopenharmony_ciunsafe impl Send for Regex {}
25c67d6573Sopenharmony_ci
26c67d6573Sopenharmony_ciimpl Drop for Regex {
27c67d6573Sopenharmony_ci    fn drop(&mut self) {
28c67d6573Sopenharmony_ci        unsafe {
29c67d6573Sopenharmony_ci            pcre_free_study(self.extra);
30c67d6573Sopenharmony_ci            pcre_free(self.code as *mut c_void);
31c67d6573Sopenharmony_ci        }
32c67d6573Sopenharmony_ci    }
33c67d6573Sopenharmony_ci}
34c67d6573Sopenharmony_ci
35c67d6573Sopenharmony_cipub struct Error {
36c67d6573Sopenharmony_ci    msg: String,
37c67d6573Sopenharmony_ci    offset: c_int,
38c67d6573Sopenharmony_ci}
39c67d6573Sopenharmony_ci
40c67d6573Sopenharmony_ciimpl Regex {
41c67d6573Sopenharmony_ci    pub fn new(pattern: &str) -> Result<Regex, Error> {
42c67d6573Sopenharmony_ci        let pattern = CString::new(pattern.to_owned()).unwrap();
43c67d6573Sopenharmony_ci        let mut errptr: *const c_char = ptr::null();
44c67d6573Sopenharmony_ci        let mut erroffset: c_int = 0;
45c67d6573Sopenharmony_ci        let code = unsafe {
46c67d6573Sopenharmony_ci            pcre_compile(
47c67d6573Sopenharmony_ci                pattern.as_ptr(),
48c67d6573Sopenharmony_ci                PCRE_UCP | PCRE_UTF8,
49c67d6573Sopenharmony_ci                &mut errptr,
50c67d6573Sopenharmony_ci                &mut erroffset,
51c67d6573Sopenharmony_ci                ptr::null(),
52c67d6573Sopenharmony_ci            )
53c67d6573Sopenharmony_ci        };
54c67d6573Sopenharmony_ci        if code.is_null() {
55c67d6573Sopenharmony_ci            let msg =
56c67d6573Sopenharmony_ci                unsafe { CStr::from_ptr(errptr).to_str().unwrap().to_owned() };
57c67d6573Sopenharmony_ci            return Err(Error { msg: msg, offset: erroffset });
58c67d6573Sopenharmony_ci        }
59c67d6573Sopenharmony_ci
60c67d6573Sopenharmony_ci        let extra =
61c67d6573Sopenharmony_ci            unsafe { pcre_study(code, PCRE_STUDY_JIT_COMPLETE, &mut errptr) };
62c67d6573Sopenharmony_ci        if extra.is_null() {
63c67d6573Sopenharmony_ci            if errptr.is_null() {
64c67d6573Sopenharmony_ci                panic!("unexpected error. Maybe JIT support isn't enabled?");
65c67d6573Sopenharmony_ci            }
66c67d6573Sopenharmony_ci            let msg =
67c67d6573Sopenharmony_ci                unsafe { CStr::from_ptr(errptr).to_str().unwrap().to_owned() };
68c67d6573Sopenharmony_ci            return Err(Error { msg: msg, offset: 0 });
69c67d6573Sopenharmony_ci        }
70c67d6573Sopenharmony_ci        Ok(Regex { code: code, extra: extra })
71c67d6573Sopenharmony_ci    }
72c67d6573Sopenharmony_ci
73c67d6573Sopenharmony_ci    pub fn is_match(&self, text: &str) -> bool {
74c67d6573Sopenharmony_ci        self.find_at(text, 0).is_some()
75c67d6573Sopenharmony_ci    }
76c67d6573Sopenharmony_ci
77c67d6573Sopenharmony_ci    pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't> {
78c67d6573Sopenharmony_ci        FindMatches { re: self, text: text, last_match_end: 0 }
79c67d6573Sopenharmony_ci    }
80c67d6573Sopenharmony_ci
81c67d6573Sopenharmony_ci    fn find_at(&self, text: &str, start: usize) -> Option<(usize, usize)> {
82c67d6573Sopenharmony_ci        const OVEC_SIZE: usize = 15 * 3; // hopefully enough for benchmarks?
83c67d6573Sopenharmony_ci        let mut ovec: [c_int; OVEC_SIZE] = [0; OVEC_SIZE];
84c67d6573Sopenharmony_ci        let err = unsafe {
85c67d6573Sopenharmony_ci            pcre_exec(
86c67d6573Sopenharmony_ci                self.code,
87c67d6573Sopenharmony_ci                self.extra,
88c67d6573Sopenharmony_ci                text.as_ptr() as *const i8,
89c67d6573Sopenharmony_ci                text.len() as c_int,
90c67d6573Sopenharmony_ci                start as c_int,
91c67d6573Sopenharmony_ci                PCRE_NO_UTF8_CHECK,
92c67d6573Sopenharmony_ci                ovec.as_mut_ptr(),
93c67d6573Sopenharmony_ci                OVEC_SIZE as c_int,
94c67d6573Sopenharmony_ci            )
95c67d6573Sopenharmony_ci        };
96c67d6573Sopenharmony_ci        if err == PCRE_ERROR_NOMATCH {
97c67d6573Sopenharmony_ci            None
98c67d6573Sopenharmony_ci        } else if err < 0 {
99c67d6573Sopenharmony_ci            panic!("unknown error code: {:?}", err)
100c67d6573Sopenharmony_ci        } else {
101c67d6573Sopenharmony_ci            Some((ovec[0] as usize, ovec[1] as usize))
102c67d6573Sopenharmony_ci        }
103c67d6573Sopenharmony_ci    }
104c67d6573Sopenharmony_ci}
105c67d6573Sopenharmony_ci
106c67d6573Sopenharmony_cipub struct FindMatches<'r, 't> {
107c67d6573Sopenharmony_ci    re: &'r Regex,
108c67d6573Sopenharmony_ci    text: &'t str,
109c67d6573Sopenharmony_ci    last_match_end: usize,
110c67d6573Sopenharmony_ci}
111c67d6573Sopenharmony_ci
112c67d6573Sopenharmony_ciimpl<'r, 't> Iterator for FindMatches<'r, 't> {
113c67d6573Sopenharmony_ci    type Item = (usize, usize);
114c67d6573Sopenharmony_ci
115c67d6573Sopenharmony_ci    fn next(&mut self) -> Option<(usize, usize)> {
116c67d6573Sopenharmony_ci        match self.re.find_at(self.text, self.last_match_end) {
117c67d6573Sopenharmony_ci            None => None,
118c67d6573Sopenharmony_ci            Some((s, e)) => {
119c67d6573Sopenharmony_ci                self.last_match_end = e;
120c67d6573Sopenharmony_ci                Some((s, e))
121c67d6573Sopenharmony_ci            }
122c67d6573Sopenharmony_ci        }
123c67d6573Sopenharmony_ci    }
124c67d6573Sopenharmony_ci}
125c67d6573Sopenharmony_ci
126c67d6573Sopenharmony_ciimpl fmt::Debug for Error {
127c67d6573Sopenharmony_ci    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
128c67d6573Sopenharmony_ci        write!(f, "PCRE error at {:?}: {}", self.offset, self.msg)
129c67d6573Sopenharmony_ci    }
130c67d6573Sopenharmony_ci}
131