1c67d6573Sopenharmony_ci#![allow(non_snake_case)] 2c67d6573Sopenharmony_ci 3c67d6573Sopenharmony_ciuse std::ffi::{CStr, CString}; 4c67d6573Sopenharmony_ciuse std::fmt; 5c67d6573Sopenharmony_ciuse std::ptr; 6c67d6573Sopenharmony_ci 7c67d6573Sopenharmony_ciuse libc::{c_char, c_int, c_void}; 8c67d6573Sopenharmony_ciuse libpcre_sys::{ 9c67d6573Sopenharmony_ci pcre, pcre_compile, pcre_exec, pcre_extra, pcre_free, pcre_free_study, 10c67d6573Sopenharmony_ci pcre_study, PCRE_ERROR_NOMATCH, PCRE_NO_UTF8_CHECK, PCRE_UTF8, 11c67d6573Sopenharmony_ci}; 12c67d6573Sopenharmony_ci 13c67d6573Sopenharmony_ciconst PCRE_UCP: c_int = 0x20000000; 14c67d6573Sopenharmony_ciconst PCRE_STUDY_JIT_COMPLETE: c_int = 0x0001; 15c67d6573Sopenharmony_ci 16c67d6573Sopenharmony_ci// We use libpcre-sys directly because the pcre crate has unavoidable 17c67d6573Sopenharmony_ci// performance problems in its core matching routines. (e.g., It always 18c67d6573Sopenharmony_ci// allocates an ovector.) 19c67d6573Sopenharmony_cipub struct Regex { 20c67d6573Sopenharmony_ci code: *mut pcre, 21c67d6573Sopenharmony_ci extra: *mut pcre_extra, 22c67d6573Sopenharmony_ci} 23c67d6573Sopenharmony_ci 24c67d6573Sopenharmony_ciunsafe impl Send for Regex {} 25c67d6573Sopenharmony_ci 26c67d6573Sopenharmony_ciimpl Drop for Regex { 27c67d6573Sopenharmony_ci fn drop(&mut self) { 28c67d6573Sopenharmony_ci unsafe { 29c67d6573Sopenharmony_ci pcre_free_study(self.extra); 30c67d6573Sopenharmony_ci pcre_free(self.code as *mut c_void); 31c67d6573Sopenharmony_ci } 32c67d6573Sopenharmony_ci } 33c67d6573Sopenharmony_ci} 34c67d6573Sopenharmony_ci 35c67d6573Sopenharmony_cipub struct Error { 36c67d6573Sopenharmony_ci msg: String, 37c67d6573Sopenharmony_ci offset: c_int, 38c67d6573Sopenharmony_ci} 39c67d6573Sopenharmony_ci 40c67d6573Sopenharmony_ciimpl Regex { 41c67d6573Sopenharmony_ci pub fn new(pattern: &str) -> Result<Regex, Error> { 42c67d6573Sopenharmony_ci let pattern = CString::new(pattern.to_owned()).unwrap(); 43c67d6573Sopenharmony_ci let mut errptr: *const c_char = ptr::null(); 44c67d6573Sopenharmony_ci let mut erroffset: c_int = 0; 45c67d6573Sopenharmony_ci let code = unsafe { 46c67d6573Sopenharmony_ci pcre_compile( 47c67d6573Sopenharmony_ci pattern.as_ptr(), 48c67d6573Sopenharmony_ci PCRE_UCP | PCRE_UTF8, 49c67d6573Sopenharmony_ci &mut errptr, 50c67d6573Sopenharmony_ci &mut erroffset, 51c67d6573Sopenharmony_ci ptr::null(), 52c67d6573Sopenharmony_ci ) 53c67d6573Sopenharmony_ci }; 54c67d6573Sopenharmony_ci if code.is_null() { 55c67d6573Sopenharmony_ci let msg = 56c67d6573Sopenharmony_ci unsafe { CStr::from_ptr(errptr).to_str().unwrap().to_owned() }; 57c67d6573Sopenharmony_ci return Err(Error { msg: msg, offset: erroffset }); 58c67d6573Sopenharmony_ci } 59c67d6573Sopenharmony_ci 60c67d6573Sopenharmony_ci let extra = 61c67d6573Sopenharmony_ci unsafe { pcre_study(code, PCRE_STUDY_JIT_COMPLETE, &mut errptr) }; 62c67d6573Sopenharmony_ci if extra.is_null() { 63c67d6573Sopenharmony_ci if errptr.is_null() { 64c67d6573Sopenharmony_ci panic!("unexpected error. Maybe JIT support isn't enabled?"); 65c67d6573Sopenharmony_ci } 66c67d6573Sopenharmony_ci let msg = 67c67d6573Sopenharmony_ci unsafe { CStr::from_ptr(errptr).to_str().unwrap().to_owned() }; 68c67d6573Sopenharmony_ci return Err(Error { msg: msg, offset: 0 }); 69c67d6573Sopenharmony_ci } 70c67d6573Sopenharmony_ci Ok(Regex { code: code, extra: extra }) 71c67d6573Sopenharmony_ci } 72c67d6573Sopenharmony_ci 73c67d6573Sopenharmony_ci pub fn is_match(&self, text: &str) -> bool { 74c67d6573Sopenharmony_ci self.find_at(text, 0).is_some() 75c67d6573Sopenharmony_ci } 76c67d6573Sopenharmony_ci 77c67d6573Sopenharmony_ci pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't> { 78c67d6573Sopenharmony_ci FindMatches { re: self, text: text, last_match_end: 0 } 79c67d6573Sopenharmony_ci } 80c67d6573Sopenharmony_ci 81c67d6573Sopenharmony_ci fn find_at(&self, text: &str, start: usize) -> Option<(usize, usize)> { 82c67d6573Sopenharmony_ci const OVEC_SIZE: usize = 15 * 3; // hopefully enough for benchmarks? 83c67d6573Sopenharmony_ci let mut ovec: [c_int; OVEC_SIZE] = [0; OVEC_SIZE]; 84c67d6573Sopenharmony_ci let err = unsafe { 85c67d6573Sopenharmony_ci pcre_exec( 86c67d6573Sopenharmony_ci self.code, 87c67d6573Sopenharmony_ci self.extra, 88c67d6573Sopenharmony_ci text.as_ptr() as *const i8, 89c67d6573Sopenharmony_ci text.len() as c_int, 90c67d6573Sopenharmony_ci start as c_int, 91c67d6573Sopenharmony_ci PCRE_NO_UTF8_CHECK, 92c67d6573Sopenharmony_ci ovec.as_mut_ptr(), 93c67d6573Sopenharmony_ci OVEC_SIZE as c_int, 94c67d6573Sopenharmony_ci ) 95c67d6573Sopenharmony_ci }; 96c67d6573Sopenharmony_ci if err == PCRE_ERROR_NOMATCH { 97c67d6573Sopenharmony_ci None 98c67d6573Sopenharmony_ci } else if err < 0 { 99c67d6573Sopenharmony_ci panic!("unknown error code: {:?}", err) 100c67d6573Sopenharmony_ci } else { 101c67d6573Sopenharmony_ci Some((ovec[0] as usize, ovec[1] as usize)) 102c67d6573Sopenharmony_ci } 103c67d6573Sopenharmony_ci } 104c67d6573Sopenharmony_ci} 105c67d6573Sopenharmony_ci 106c67d6573Sopenharmony_cipub struct FindMatches<'r, 't> { 107c67d6573Sopenharmony_ci re: &'r Regex, 108c67d6573Sopenharmony_ci text: &'t str, 109c67d6573Sopenharmony_ci last_match_end: usize, 110c67d6573Sopenharmony_ci} 111c67d6573Sopenharmony_ci 112c67d6573Sopenharmony_ciimpl<'r, 't> Iterator for FindMatches<'r, 't> { 113c67d6573Sopenharmony_ci type Item = (usize, usize); 114c67d6573Sopenharmony_ci 115c67d6573Sopenharmony_ci fn next(&mut self) -> Option<(usize, usize)> { 116c67d6573Sopenharmony_ci match self.re.find_at(self.text, self.last_match_end) { 117c67d6573Sopenharmony_ci None => None, 118c67d6573Sopenharmony_ci Some((s, e)) => { 119c67d6573Sopenharmony_ci self.last_match_end = e; 120c67d6573Sopenharmony_ci Some((s, e)) 121c67d6573Sopenharmony_ci } 122c67d6573Sopenharmony_ci } 123c67d6573Sopenharmony_ci } 124c67d6573Sopenharmony_ci} 125c67d6573Sopenharmony_ci 126c67d6573Sopenharmony_ciimpl fmt::Debug for Error { 127c67d6573Sopenharmony_ci fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 128c67d6573Sopenharmony_ci write!(f, "PCRE error at {:?}: {}", self.offset, self.msg) 129c67d6573Sopenharmony_ci } 130c67d6573Sopenharmony_ci} 131