1c67d6573Sopenharmony_ciuse std::collections::HashMap; 2c67d6573Sopenharmony_ciuse std::ffi::{CStr, CString}; 3c67d6573Sopenharmony_ciuse std::ops::Deref; 4c67d6573Sopenharmony_ciuse std::ptr; 5c67d6573Sopenharmony_ciuse std::slice; 6c67d6573Sopenharmony_ciuse std::str; 7c67d6573Sopenharmony_ci 8c67d6573Sopenharmony_ciuse libc::{c_char, size_t}; 9c67d6573Sopenharmony_ciuse regex::bytes; 10c67d6573Sopenharmony_ci 11c67d6573Sopenharmony_ciuse crate::error::{Error, ErrorKind}; 12c67d6573Sopenharmony_ci 13c67d6573Sopenharmony_ciconst RURE_FLAG_CASEI: u32 = 1 << 0; 14c67d6573Sopenharmony_ciconst RURE_FLAG_MULTI: u32 = 1 << 1; 15c67d6573Sopenharmony_ciconst RURE_FLAG_DOTNL: u32 = 1 << 2; 16c67d6573Sopenharmony_ciconst RURE_FLAG_SWAP_GREED: u32 = 1 << 3; 17c67d6573Sopenharmony_ciconst RURE_FLAG_SPACE: u32 = 1 << 4; 18c67d6573Sopenharmony_ciconst RURE_FLAG_UNICODE: u32 = 1 << 5; 19c67d6573Sopenharmony_ciconst RURE_DEFAULT_FLAGS: u32 = RURE_FLAG_UNICODE; 20c67d6573Sopenharmony_ci 21c67d6573Sopenharmony_cipub struct Regex { 22c67d6573Sopenharmony_ci re: bytes::Regex, 23c67d6573Sopenharmony_ci capture_names: HashMap<String, i32>, 24c67d6573Sopenharmony_ci} 25c67d6573Sopenharmony_ci 26c67d6573Sopenharmony_cipub struct Options { 27c67d6573Sopenharmony_ci size_limit: usize, 28c67d6573Sopenharmony_ci dfa_size_limit: usize, 29c67d6573Sopenharmony_ci} 30c67d6573Sopenharmony_ci 31c67d6573Sopenharmony_ci// The `RegexSet` is not exposed with option support or matching at an 32c67d6573Sopenharmony_ci// arbitrary position with a crate just yet. To circumvent this, we use 33c67d6573Sopenharmony_ci// the `Exec` structure directly. 34c67d6573Sopenharmony_cipub struct RegexSet { 35c67d6573Sopenharmony_ci re: bytes::RegexSet, 36c67d6573Sopenharmony_ci} 37c67d6573Sopenharmony_ci 38c67d6573Sopenharmony_ci#[repr(C)] 39c67d6573Sopenharmony_cipub struct rure_match { 40c67d6573Sopenharmony_ci pub start: size_t, 41c67d6573Sopenharmony_ci pub end: size_t, 42c67d6573Sopenharmony_ci} 43c67d6573Sopenharmony_ci 44c67d6573Sopenharmony_cipub struct Captures(bytes::Locations); 45c67d6573Sopenharmony_ci 46c67d6573Sopenharmony_cipub struct Iter { 47c67d6573Sopenharmony_ci re: *const Regex, 48c67d6573Sopenharmony_ci last_end: usize, 49c67d6573Sopenharmony_ci last_match: Option<usize>, 50c67d6573Sopenharmony_ci} 51c67d6573Sopenharmony_ci 52c67d6573Sopenharmony_cipub struct IterCaptureNames { 53c67d6573Sopenharmony_ci capture_names: bytes::CaptureNames<'static>, 54c67d6573Sopenharmony_ci name_ptrs: Vec<*mut c_char>, 55c67d6573Sopenharmony_ci} 56c67d6573Sopenharmony_ci 57c67d6573Sopenharmony_ciimpl Deref for Regex { 58c67d6573Sopenharmony_ci type Target = bytes::Regex; 59c67d6573Sopenharmony_ci fn deref(&self) -> &bytes::Regex { 60c67d6573Sopenharmony_ci &self.re 61c67d6573Sopenharmony_ci } 62c67d6573Sopenharmony_ci} 63c67d6573Sopenharmony_ci 64c67d6573Sopenharmony_ciimpl Deref for RegexSet { 65c67d6573Sopenharmony_ci type Target = bytes::RegexSet; 66c67d6573Sopenharmony_ci fn deref(&self) -> &bytes::RegexSet { 67c67d6573Sopenharmony_ci &self.re 68c67d6573Sopenharmony_ci } 69c67d6573Sopenharmony_ci} 70c67d6573Sopenharmony_ci 71c67d6573Sopenharmony_ciimpl Default for Options { 72c67d6573Sopenharmony_ci fn default() -> Options { 73c67d6573Sopenharmony_ci Options { size_limit: 10 * (1 << 20), dfa_size_limit: 2 * (1 << 20) } 74c67d6573Sopenharmony_ci } 75c67d6573Sopenharmony_ci} 76c67d6573Sopenharmony_ci 77c67d6573Sopenharmony_ciffi_fn! { 78c67d6573Sopenharmony_ci fn rure_compile_must(pattern: *const c_char) -> *const Regex { 79c67d6573Sopenharmony_ci let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() }; 80c67d6573Sopenharmony_ci let pat = pattern as *const u8; 81c67d6573Sopenharmony_ci let mut err = Error::new(ErrorKind::None); 82c67d6573Sopenharmony_ci let re = rure_compile( 83c67d6573Sopenharmony_ci pat, len, RURE_DEFAULT_FLAGS, ptr::null(), &mut err); 84c67d6573Sopenharmony_ci if err.is_err() { 85c67d6573Sopenharmony_ci let _ = writeln!(&mut io::stderr(), "{}", err); 86c67d6573Sopenharmony_ci let _ = writeln!( 87c67d6573Sopenharmony_ci &mut io::stderr(), "aborting from rure_compile_must"); 88c67d6573Sopenharmony_ci unsafe { abort() } 89c67d6573Sopenharmony_ci } 90c67d6573Sopenharmony_ci re 91c67d6573Sopenharmony_ci } 92c67d6573Sopenharmony_ci} 93c67d6573Sopenharmony_ci 94c67d6573Sopenharmony_ciffi_fn! { 95c67d6573Sopenharmony_ci fn rure_compile( 96c67d6573Sopenharmony_ci pattern: *const u8, 97c67d6573Sopenharmony_ci length: size_t, 98c67d6573Sopenharmony_ci flags: u32, 99c67d6573Sopenharmony_ci options: *const Options, 100c67d6573Sopenharmony_ci error: *mut Error, 101c67d6573Sopenharmony_ci ) -> *const Regex { 102c67d6573Sopenharmony_ci let pat = unsafe { slice::from_raw_parts(pattern, length) }; 103c67d6573Sopenharmony_ci let pat = match str::from_utf8(pat) { 104c67d6573Sopenharmony_ci Ok(pat) => pat, 105c67d6573Sopenharmony_ci Err(err) => { 106c67d6573Sopenharmony_ci unsafe { 107c67d6573Sopenharmony_ci if !error.is_null() { 108c67d6573Sopenharmony_ci *error = Error::new(ErrorKind::Str(err)); 109c67d6573Sopenharmony_ci } 110c67d6573Sopenharmony_ci return ptr::null(); 111c67d6573Sopenharmony_ci } 112c67d6573Sopenharmony_ci } 113c67d6573Sopenharmony_ci }; 114c67d6573Sopenharmony_ci let mut builder = bytes::RegexBuilder::new(pat); 115c67d6573Sopenharmony_ci if !options.is_null() { 116c67d6573Sopenharmony_ci let options = unsafe { &*options }; 117c67d6573Sopenharmony_ci builder.size_limit(options.size_limit); 118c67d6573Sopenharmony_ci builder.dfa_size_limit(options.dfa_size_limit); 119c67d6573Sopenharmony_ci } 120c67d6573Sopenharmony_ci builder.case_insensitive(flags & RURE_FLAG_CASEI > 0); 121c67d6573Sopenharmony_ci builder.multi_line(flags & RURE_FLAG_MULTI > 0); 122c67d6573Sopenharmony_ci builder.dot_matches_new_line(flags & RURE_FLAG_DOTNL > 0); 123c67d6573Sopenharmony_ci builder.swap_greed(flags & RURE_FLAG_SWAP_GREED > 0); 124c67d6573Sopenharmony_ci builder.ignore_whitespace(flags & RURE_FLAG_SPACE > 0); 125c67d6573Sopenharmony_ci builder.unicode(flags & RURE_FLAG_UNICODE > 0); 126c67d6573Sopenharmony_ci match builder.build() { 127c67d6573Sopenharmony_ci Ok(re) => { 128c67d6573Sopenharmony_ci let mut capture_names = HashMap::new(); 129c67d6573Sopenharmony_ci for (i, name) in re.capture_names().enumerate() { 130c67d6573Sopenharmony_ci if let Some(name) = name { 131c67d6573Sopenharmony_ci capture_names.insert(name.to_owned(), i as i32); 132c67d6573Sopenharmony_ci } 133c67d6573Sopenharmony_ci } 134c67d6573Sopenharmony_ci let re = Regex { 135c67d6573Sopenharmony_ci re: re, 136c67d6573Sopenharmony_ci capture_names: capture_names, 137c67d6573Sopenharmony_ci }; 138c67d6573Sopenharmony_ci Box::into_raw(Box::new(re)) 139c67d6573Sopenharmony_ci } 140c67d6573Sopenharmony_ci Err(err) => { 141c67d6573Sopenharmony_ci unsafe { 142c67d6573Sopenharmony_ci if !error.is_null() { 143c67d6573Sopenharmony_ci *error = Error::new(ErrorKind::Regex(err)); 144c67d6573Sopenharmony_ci } 145c67d6573Sopenharmony_ci ptr::null() 146c67d6573Sopenharmony_ci } 147c67d6573Sopenharmony_ci } 148c67d6573Sopenharmony_ci } 149c67d6573Sopenharmony_ci } 150c67d6573Sopenharmony_ci} 151c67d6573Sopenharmony_ci 152c67d6573Sopenharmony_ciffi_fn! { 153c67d6573Sopenharmony_ci fn rure_free(re: *const Regex) { 154c67d6573Sopenharmony_ci unsafe { drop(Box::from_raw(re as *mut Regex)); } 155c67d6573Sopenharmony_ci } 156c67d6573Sopenharmony_ci} 157c67d6573Sopenharmony_ci 158c67d6573Sopenharmony_ciffi_fn! { 159c67d6573Sopenharmony_ci fn rure_is_match( 160c67d6573Sopenharmony_ci re: *const Regex, 161c67d6573Sopenharmony_ci haystack: *const u8, 162c67d6573Sopenharmony_ci len: size_t, 163c67d6573Sopenharmony_ci start: size_t, 164c67d6573Sopenharmony_ci ) -> bool { 165c67d6573Sopenharmony_ci let re = unsafe { &*re }; 166c67d6573Sopenharmony_ci let haystack = unsafe { slice::from_raw_parts(haystack, len) }; 167c67d6573Sopenharmony_ci re.is_match_at(haystack, start) 168c67d6573Sopenharmony_ci } 169c67d6573Sopenharmony_ci} 170c67d6573Sopenharmony_ci 171c67d6573Sopenharmony_ciffi_fn! { 172c67d6573Sopenharmony_ci fn rure_find( 173c67d6573Sopenharmony_ci re: *const Regex, 174c67d6573Sopenharmony_ci haystack: *const u8, 175c67d6573Sopenharmony_ci len: size_t, 176c67d6573Sopenharmony_ci start: size_t, 177c67d6573Sopenharmony_ci match_info: *mut rure_match, 178c67d6573Sopenharmony_ci ) -> bool { 179c67d6573Sopenharmony_ci let re = unsafe { &*re }; 180c67d6573Sopenharmony_ci let haystack = unsafe { slice::from_raw_parts(haystack, len) }; 181c67d6573Sopenharmony_ci re.find_at(haystack, start).map(|m| unsafe { 182c67d6573Sopenharmony_ci if !match_info.is_null() { 183c67d6573Sopenharmony_ci (*match_info).start = m.start(); 184c67d6573Sopenharmony_ci (*match_info).end = m.end(); 185c67d6573Sopenharmony_ci } 186c67d6573Sopenharmony_ci }).is_some() 187c67d6573Sopenharmony_ci } 188c67d6573Sopenharmony_ci} 189c67d6573Sopenharmony_ci 190c67d6573Sopenharmony_ciffi_fn! { 191c67d6573Sopenharmony_ci fn rure_find_captures( 192c67d6573Sopenharmony_ci re: *const Regex, 193c67d6573Sopenharmony_ci haystack: *const u8, 194c67d6573Sopenharmony_ci len: size_t, 195c67d6573Sopenharmony_ci start: size_t, 196c67d6573Sopenharmony_ci captures: *mut Captures, 197c67d6573Sopenharmony_ci ) -> bool { 198c67d6573Sopenharmony_ci let re = unsafe { &*re }; 199c67d6573Sopenharmony_ci let haystack = unsafe { slice::from_raw_parts(haystack, len) }; 200c67d6573Sopenharmony_ci let slots = unsafe { &mut (*captures).0 }; 201c67d6573Sopenharmony_ci re.read_captures_at(slots, haystack, start).is_some() 202c67d6573Sopenharmony_ci } 203c67d6573Sopenharmony_ci} 204c67d6573Sopenharmony_ci 205c67d6573Sopenharmony_ciffi_fn! { 206c67d6573Sopenharmony_ci fn rure_shortest_match( 207c67d6573Sopenharmony_ci re: *const Regex, 208c67d6573Sopenharmony_ci haystack: *const u8, 209c67d6573Sopenharmony_ci len: size_t, 210c67d6573Sopenharmony_ci start: size_t, 211c67d6573Sopenharmony_ci end: *mut usize, 212c67d6573Sopenharmony_ci ) -> bool { 213c67d6573Sopenharmony_ci let re = unsafe { &*re }; 214c67d6573Sopenharmony_ci let haystack = unsafe { slice::from_raw_parts(haystack, len) }; 215c67d6573Sopenharmony_ci match re.shortest_match_at(haystack, start) { 216c67d6573Sopenharmony_ci None => false, 217c67d6573Sopenharmony_ci Some(i) => { 218c67d6573Sopenharmony_ci if !end.is_null() { 219c67d6573Sopenharmony_ci unsafe { 220c67d6573Sopenharmony_ci *end = i; 221c67d6573Sopenharmony_ci } 222c67d6573Sopenharmony_ci } 223c67d6573Sopenharmony_ci true 224c67d6573Sopenharmony_ci } 225c67d6573Sopenharmony_ci } 226c67d6573Sopenharmony_ci } 227c67d6573Sopenharmony_ci} 228c67d6573Sopenharmony_ci 229c67d6573Sopenharmony_ciffi_fn! { 230c67d6573Sopenharmony_ci fn rure_capture_name_index( 231c67d6573Sopenharmony_ci re: *const Regex, 232c67d6573Sopenharmony_ci name: *const c_char, 233c67d6573Sopenharmony_ci ) -> i32 { 234c67d6573Sopenharmony_ci let re = unsafe { &*re }; 235c67d6573Sopenharmony_ci let name = unsafe { CStr::from_ptr(name) }; 236c67d6573Sopenharmony_ci let name = match name.to_str() { 237c67d6573Sopenharmony_ci Err(_) => return -1, 238c67d6573Sopenharmony_ci Ok(name) => name, 239c67d6573Sopenharmony_ci }; 240c67d6573Sopenharmony_ci re.capture_names.get(name).map(|&i|i).unwrap_or(-1) 241c67d6573Sopenharmony_ci } 242c67d6573Sopenharmony_ci} 243c67d6573Sopenharmony_ci 244c67d6573Sopenharmony_ciffi_fn! { 245c67d6573Sopenharmony_ci fn rure_iter_capture_names_new( 246c67d6573Sopenharmony_ci re: *const Regex, 247c67d6573Sopenharmony_ci ) -> *mut IterCaptureNames { 248c67d6573Sopenharmony_ci let re = unsafe { &*re }; 249c67d6573Sopenharmony_ci Box::into_raw(Box::new(IterCaptureNames { 250c67d6573Sopenharmony_ci capture_names: re.re.capture_names(), 251c67d6573Sopenharmony_ci name_ptrs: Vec::new(), 252c67d6573Sopenharmony_ci })) 253c67d6573Sopenharmony_ci } 254c67d6573Sopenharmony_ci} 255c67d6573Sopenharmony_ci 256c67d6573Sopenharmony_ciffi_fn! { 257c67d6573Sopenharmony_ci fn rure_iter_capture_names_free(it: *mut IterCaptureNames) { 258c67d6573Sopenharmony_ci unsafe { 259c67d6573Sopenharmony_ci let it = &mut *it; 260c67d6573Sopenharmony_ci while let Some(ptr) = it.name_ptrs.pop() { 261c67d6573Sopenharmony_ci drop(CString::from_raw(ptr)); 262c67d6573Sopenharmony_ci } 263c67d6573Sopenharmony_ci drop(Box::from_raw(it)); 264c67d6573Sopenharmony_ci } 265c67d6573Sopenharmony_ci } 266c67d6573Sopenharmony_ci} 267c67d6573Sopenharmony_ci 268c67d6573Sopenharmony_ciffi_fn! { 269c67d6573Sopenharmony_ci fn rure_iter_capture_names_next( 270c67d6573Sopenharmony_ci it: *mut IterCaptureNames, 271c67d6573Sopenharmony_ci capture_name: *mut *mut c_char, 272c67d6573Sopenharmony_ci ) -> bool { 273c67d6573Sopenharmony_ci if capture_name.is_null() { 274c67d6573Sopenharmony_ci return false; 275c67d6573Sopenharmony_ci } 276c67d6573Sopenharmony_ci 277c67d6573Sopenharmony_ci let it = unsafe { &mut *it }; 278c67d6573Sopenharmony_ci let cn = match it.capture_names.next() { 279c67d6573Sopenharmony_ci // Top-level iterator ran out of capture groups 280c67d6573Sopenharmony_ci None => return false, 281c67d6573Sopenharmony_ci Some(val) => { 282c67d6573Sopenharmony_ci let name = match val { 283c67d6573Sopenharmony_ci // inner Option didn't have a name 284c67d6573Sopenharmony_ci None => "", 285c67d6573Sopenharmony_ci Some(name) => name 286c67d6573Sopenharmony_ci }; 287c67d6573Sopenharmony_ci name 288c67d6573Sopenharmony_ci } 289c67d6573Sopenharmony_ci }; 290c67d6573Sopenharmony_ci 291c67d6573Sopenharmony_ci unsafe { 292c67d6573Sopenharmony_ci let cs = match CString::new(cn.as_bytes()) { 293c67d6573Sopenharmony_ci Result::Ok(val) => val, 294c67d6573Sopenharmony_ci Result::Err(_) => return false 295c67d6573Sopenharmony_ci }; 296c67d6573Sopenharmony_ci let ptr = cs.into_raw(); 297c67d6573Sopenharmony_ci it.name_ptrs.push(ptr); 298c67d6573Sopenharmony_ci *capture_name = ptr; 299c67d6573Sopenharmony_ci } 300c67d6573Sopenharmony_ci true 301c67d6573Sopenharmony_ci 302c67d6573Sopenharmony_ci } 303c67d6573Sopenharmony_ci} 304c67d6573Sopenharmony_ci 305c67d6573Sopenharmony_ciffi_fn! { 306c67d6573Sopenharmony_ci fn rure_iter_new( 307c67d6573Sopenharmony_ci re: *const Regex, 308c67d6573Sopenharmony_ci ) -> *mut Iter { 309c67d6573Sopenharmony_ci Box::into_raw(Box::new(Iter { 310c67d6573Sopenharmony_ci re: re, 311c67d6573Sopenharmony_ci last_end: 0, 312c67d6573Sopenharmony_ci last_match: None, 313c67d6573Sopenharmony_ci })) 314c67d6573Sopenharmony_ci } 315c67d6573Sopenharmony_ci} 316c67d6573Sopenharmony_ci 317c67d6573Sopenharmony_ciffi_fn! { 318c67d6573Sopenharmony_ci fn rure_iter_free(it: *mut Iter) { 319c67d6573Sopenharmony_ci unsafe { drop(Box::from_raw(it)); } 320c67d6573Sopenharmony_ci } 321c67d6573Sopenharmony_ci} 322c67d6573Sopenharmony_ci 323c67d6573Sopenharmony_ciffi_fn! { 324c67d6573Sopenharmony_ci fn rure_iter_next( 325c67d6573Sopenharmony_ci it: *mut Iter, 326c67d6573Sopenharmony_ci haystack: *const u8, 327c67d6573Sopenharmony_ci len: size_t, 328c67d6573Sopenharmony_ci match_info: *mut rure_match, 329c67d6573Sopenharmony_ci ) -> bool { 330c67d6573Sopenharmony_ci let it = unsafe { &mut *it }; 331c67d6573Sopenharmony_ci let re = unsafe { &*it.re }; 332c67d6573Sopenharmony_ci let text = unsafe { slice::from_raw_parts(haystack, len) }; 333c67d6573Sopenharmony_ci if it.last_end > text.len() { 334c67d6573Sopenharmony_ci return false; 335c67d6573Sopenharmony_ci } 336c67d6573Sopenharmony_ci let (s, e) = match re.find_at(text, it.last_end) { 337c67d6573Sopenharmony_ci None => return false, 338c67d6573Sopenharmony_ci Some(m) => (m.start(), m.end()), 339c67d6573Sopenharmony_ci }; 340c67d6573Sopenharmony_ci if s == e { 341c67d6573Sopenharmony_ci // This is an empty match. To ensure we make progress, start 342c67d6573Sopenharmony_ci // the next search at the smallest possible starting position 343c67d6573Sopenharmony_ci // of the next match following this one. 344c67d6573Sopenharmony_ci it.last_end += 1; 345c67d6573Sopenharmony_ci // Don't accept empty matches immediately following a match. 346c67d6573Sopenharmony_ci // Just move on to the next match. 347c67d6573Sopenharmony_ci if Some(e) == it.last_match { 348c67d6573Sopenharmony_ci return rure_iter_next(it, haystack, len, match_info); 349c67d6573Sopenharmony_ci } 350c67d6573Sopenharmony_ci } else { 351c67d6573Sopenharmony_ci it.last_end = e; 352c67d6573Sopenharmony_ci } 353c67d6573Sopenharmony_ci it.last_match = Some(e); 354c67d6573Sopenharmony_ci if !match_info.is_null() { 355c67d6573Sopenharmony_ci unsafe { 356c67d6573Sopenharmony_ci (*match_info).start = s; 357c67d6573Sopenharmony_ci (*match_info).end = e; 358c67d6573Sopenharmony_ci } 359c67d6573Sopenharmony_ci } 360c67d6573Sopenharmony_ci true 361c67d6573Sopenharmony_ci } 362c67d6573Sopenharmony_ci} 363c67d6573Sopenharmony_ci 364c67d6573Sopenharmony_ciffi_fn! { 365c67d6573Sopenharmony_ci fn rure_iter_next_captures( 366c67d6573Sopenharmony_ci it: *mut Iter, 367c67d6573Sopenharmony_ci haystack: *const u8, 368c67d6573Sopenharmony_ci len: size_t, 369c67d6573Sopenharmony_ci captures: *mut Captures, 370c67d6573Sopenharmony_ci ) -> bool { 371c67d6573Sopenharmony_ci let it = unsafe { &mut *it }; 372c67d6573Sopenharmony_ci let re = unsafe { &*it.re }; 373c67d6573Sopenharmony_ci let slots = unsafe { &mut (*captures).0 }; 374c67d6573Sopenharmony_ci let text = unsafe { slice::from_raw_parts(haystack, len) }; 375c67d6573Sopenharmony_ci if it.last_end > text.len() { 376c67d6573Sopenharmony_ci return false; 377c67d6573Sopenharmony_ci } 378c67d6573Sopenharmony_ci let (s, e) = match re.read_captures_at(slots, text, it.last_end) { 379c67d6573Sopenharmony_ci None => return false, 380c67d6573Sopenharmony_ci Some(m) => (m.start(), m.end()), 381c67d6573Sopenharmony_ci }; 382c67d6573Sopenharmony_ci if s == e { 383c67d6573Sopenharmony_ci // This is an empty match. To ensure we make progress, start 384c67d6573Sopenharmony_ci // the next search at the smallest possible starting position 385c67d6573Sopenharmony_ci // of the next match following this one. 386c67d6573Sopenharmony_ci it.last_end += 1; 387c67d6573Sopenharmony_ci // Don't accept empty matches immediately following a match. 388c67d6573Sopenharmony_ci // Just move on to the next match. 389c67d6573Sopenharmony_ci if Some(e) == it.last_match { 390c67d6573Sopenharmony_ci return rure_iter_next_captures(it, haystack, len, captures); 391c67d6573Sopenharmony_ci } 392c67d6573Sopenharmony_ci } else { 393c67d6573Sopenharmony_ci it.last_end = e; 394c67d6573Sopenharmony_ci } 395c67d6573Sopenharmony_ci it.last_match = Some(e); 396c67d6573Sopenharmony_ci true 397c67d6573Sopenharmony_ci } 398c67d6573Sopenharmony_ci} 399c67d6573Sopenharmony_ci 400c67d6573Sopenharmony_ciffi_fn! { 401c67d6573Sopenharmony_ci fn rure_captures_new(re: *const Regex) -> *mut Captures { 402c67d6573Sopenharmony_ci let re = unsafe { &*re }; 403c67d6573Sopenharmony_ci let captures = Captures(re.locations()); 404c67d6573Sopenharmony_ci Box::into_raw(Box::new(captures)) 405c67d6573Sopenharmony_ci } 406c67d6573Sopenharmony_ci} 407c67d6573Sopenharmony_ci 408c67d6573Sopenharmony_ciffi_fn! { 409c67d6573Sopenharmony_ci fn rure_captures_free(captures: *const Captures) { 410c67d6573Sopenharmony_ci unsafe { drop(Box::from_raw(captures as *mut Captures)); } 411c67d6573Sopenharmony_ci } 412c67d6573Sopenharmony_ci} 413c67d6573Sopenharmony_ci 414c67d6573Sopenharmony_ciffi_fn! { 415c67d6573Sopenharmony_ci fn rure_captures_at( 416c67d6573Sopenharmony_ci captures: *const Captures, 417c67d6573Sopenharmony_ci i: size_t, 418c67d6573Sopenharmony_ci match_info: *mut rure_match, 419c67d6573Sopenharmony_ci ) -> bool { 420c67d6573Sopenharmony_ci let locs = unsafe { &(*captures).0 }; 421c67d6573Sopenharmony_ci match locs.pos(i) { 422c67d6573Sopenharmony_ci Some((start, end)) => { 423c67d6573Sopenharmony_ci if !match_info.is_null() { 424c67d6573Sopenharmony_ci unsafe { 425c67d6573Sopenharmony_ci (*match_info).start = start; 426c67d6573Sopenharmony_ci (*match_info).end = end; 427c67d6573Sopenharmony_ci } 428c67d6573Sopenharmony_ci } 429c67d6573Sopenharmony_ci true 430c67d6573Sopenharmony_ci } 431c67d6573Sopenharmony_ci _ => false 432c67d6573Sopenharmony_ci } 433c67d6573Sopenharmony_ci } 434c67d6573Sopenharmony_ci} 435c67d6573Sopenharmony_ci 436c67d6573Sopenharmony_ciffi_fn! { 437c67d6573Sopenharmony_ci fn rure_captures_len(captures: *const Captures) -> size_t { 438c67d6573Sopenharmony_ci unsafe { (*captures).0.len() } 439c67d6573Sopenharmony_ci } 440c67d6573Sopenharmony_ci} 441c67d6573Sopenharmony_ci 442c67d6573Sopenharmony_ciffi_fn! { 443c67d6573Sopenharmony_ci fn rure_options_new() -> *mut Options { 444c67d6573Sopenharmony_ci Box::into_raw(Box::new(Options::default())) 445c67d6573Sopenharmony_ci } 446c67d6573Sopenharmony_ci} 447c67d6573Sopenharmony_ci 448c67d6573Sopenharmony_ciffi_fn! { 449c67d6573Sopenharmony_ci fn rure_options_free(options: *mut Options) { 450c67d6573Sopenharmony_ci unsafe { drop(Box::from_raw(options)); } 451c67d6573Sopenharmony_ci } 452c67d6573Sopenharmony_ci} 453c67d6573Sopenharmony_ci 454c67d6573Sopenharmony_ciffi_fn! { 455c67d6573Sopenharmony_ci fn rure_options_size_limit(options: *mut Options, limit: size_t) { 456c67d6573Sopenharmony_ci let options = unsafe { &mut *options }; 457c67d6573Sopenharmony_ci options.size_limit = limit; 458c67d6573Sopenharmony_ci } 459c67d6573Sopenharmony_ci} 460c67d6573Sopenharmony_ci 461c67d6573Sopenharmony_ciffi_fn! { 462c67d6573Sopenharmony_ci fn rure_options_dfa_size_limit(options: *mut Options, limit: size_t) { 463c67d6573Sopenharmony_ci let options = unsafe { &mut *options }; 464c67d6573Sopenharmony_ci options.dfa_size_limit = limit; 465c67d6573Sopenharmony_ci } 466c67d6573Sopenharmony_ci} 467c67d6573Sopenharmony_ci 468c67d6573Sopenharmony_ciffi_fn! { 469c67d6573Sopenharmony_ci fn rure_compile_set( 470c67d6573Sopenharmony_ci patterns: *const *const u8, 471c67d6573Sopenharmony_ci patterns_lengths: *const size_t, 472c67d6573Sopenharmony_ci patterns_count: size_t, 473c67d6573Sopenharmony_ci flags: u32, 474c67d6573Sopenharmony_ci options: *const Options, 475c67d6573Sopenharmony_ci error: *mut Error 476c67d6573Sopenharmony_ci ) -> *const RegexSet { 477c67d6573Sopenharmony_ci let (raw_pats, raw_patsl) = unsafe { 478c67d6573Sopenharmony_ci ( 479c67d6573Sopenharmony_ci slice::from_raw_parts(patterns, patterns_count), 480c67d6573Sopenharmony_ci slice::from_raw_parts(patterns_lengths, patterns_count) 481c67d6573Sopenharmony_ci ) 482c67d6573Sopenharmony_ci }; 483c67d6573Sopenharmony_ci 484c67d6573Sopenharmony_ci let mut pats = Vec::with_capacity(patterns_count); 485c67d6573Sopenharmony_ci for (&raw_pat, &raw_patl) in raw_pats.iter().zip(raw_patsl) { 486c67d6573Sopenharmony_ci let pat = unsafe { slice::from_raw_parts(raw_pat, raw_patl) }; 487c67d6573Sopenharmony_ci pats.push(match str::from_utf8(pat) { 488c67d6573Sopenharmony_ci Ok(pat) => pat, 489c67d6573Sopenharmony_ci Err(err) => { 490c67d6573Sopenharmony_ci unsafe { 491c67d6573Sopenharmony_ci if !error.is_null() { 492c67d6573Sopenharmony_ci *error = Error::new(ErrorKind::Str(err)); 493c67d6573Sopenharmony_ci } 494c67d6573Sopenharmony_ci return ptr::null(); 495c67d6573Sopenharmony_ci } 496c67d6573Sopenharmony_ci } 497c67d6573Sopenharmony_ci }); 498c67d6573Sopenharmony_ci } 499c67d6573Sopenharmony_ci 500c67d6573Sopenharmony_ci let mut builder = bytes::RegexSetBuilder::new(pats); 501c67d6573Sopenharmony_ci if !options.is_null() { 502c67d6573Sopenharmony_ci let options = unsafe { &*options }; 503c67d6573Sopenharmony_ci builder.size_limit(options.size_limit); 504c67d6573Sopenharmony_ci builder.dfa_size_limit(options.dfa_size_limit); 505c67d6573Sopenharmony_ci } 506c67d6573Sopenharmony_ci builder.case_insensitive(flags & RURE_FLAG_CASEI > 0); 507c67d6573Sopenharmony_ci builder.multi_line(flags & RURE_FLAG_MULTI > 0); 508c67d6573Sopenharmony_ci builder.dot_matches_new_line(flags & RURE_FLAG_DOTNL > 0); 509c67d6573Sopenharmony_ci builder.swap_greed(flags & RURE_FLAG_SWAP_GREED > 0); 510c67d6573Sopenharmony_ci builder.ignore_whitespace(flags & RURE_FLAG_SPACE > 0); 511c67d6573Sopenharmony_ci builder.unicode(flags & RURE_FLAG_UNICODE > 0); 512c67d6573Sopenharmony_ci match builder.build() { 513c67d6573Sopenharmony_ci Ok(re) => { 514c67d6573Sopenharmony_ci Box::into_raw(Box::new(RegexSet { re: re })) 515c67d6573Sopenharmony_ci } 516c67d6573Sopenharmony_ci Err(err) => { 517c67d6573Sopenharmony_ci unsafe { 518c67d6573Sopenharmony_ci if !error.is_null() { 519c67d6573Sopenharmony_ci *error = Error::new(ErrorKind::Regex(err)) 520c67d6573Sopenharmony_ci } 521c67d6573Sopenharmony_ci ptr::null() 522c67d6573Sopenharmony_ci } 523c67d6573Sopenharmony_ci } 524c67d6573Sopenharmony_ci } 525c67d6573Sopenharmony_ci } 526c67d6573Sopenharmony_ci} 527c67d6573Sopenharmony_ci 528c67d6573Sopenharmony_ciffi_fn! { 529c67d6573Sopenharmony_ci fn rure_set_free(re: *const RegexSet) { 530c67d6573Sopenharmony_ci unsafe { drop(Box::from_raw(re as *mut RegexSet)); } 531c67d6573Sopenharmony_ci } 532c67d6573Sopenharmony_ci} 533c67d6573Sopenharmony_ci 534c67d6573Sopenharmony_ciffi_fn! { 535c67d6573Sopenharmony_ci fn rure_set_is_match( 536c67d6573Sopenharmony_ci re: *const RegexSet, 537c67d6573Sopenharmony_ci haystack: *const u8, 538c67d6573Sopenharmony_ci len: size_t, 539c67d6573Sopenharmony_ci start: size_t 540c67d6573Sopenharmony_ci ) -> bool { 541c67d6573Sopenharmony_ci let re = unsafe { &*re }; 542c67d6573Sopenharmony_ci let haystack = unsafe { slice::from_raw_parts(haystack, len) }; 543c67d6573Sopenharmony_ci re.is_match_at(haystack, start) 544c67d6573Sopenharmony_ci } 545c67d6573Sopenharmony_ci} 546c67d6573Sopenharmony_ci 547c67d6573Sopenharmony_ciffi_fn! { 548c67d6573Sopenharmony_ci fn rure_set_matches( 549c67d6573Sopenharmony_ci re: *const RegexSet, 550c67d6573Sopenharmony_ci haystack: *const u8, 551c67d6573Sopenharmony_ci len: size_t, 552c67d6573Sopenharmony_ci start: size_t, 553c67d6573Sopenharmony_ci matches: *mut bool 554c67d6573Sopenharmony_ci ) -> bool { 555c67d6573Sopenharmony_ci let re = unsafe { &*re }; 556c67d6573Sopenharmony_ci let mut matches = unsafe { 557c67d6573Sopenharmony_ci slice::from_raw_parts_mut(matches, re.len()) 558c67d6573Sopenharmony_ci }; 559c67d6573Sopenharmony_ci let haystack = unsafe { slice::from_raw_parts(haystack, len) }; 560c67d6573Sopenharmony_ci 561c67d6573Sopenharmony_ci // read_matches_at isn't guaranteed to set non-matches to false 562c67d6573Sopenharmony_ci for item in matches.iter_mut() { 563c67d6573Sopenharmony_ci *item = false; 564c67d6573Sopenharmony_ci } 565c67d6573Sopenharmony_ci re.read_matches_at(&mut matches, haystack, start) 566c67d6573Sopenharmony_ci } 567c67d6573Sopenharmony_ci} 568c67d6573Sopenharmony_ci 569c67d6573Sopenharmony_ciffi_fn! { 570c67d6573Sopenharmony_ci fn rure_set_len(re: *const RegexSet) -> size_t { 571c67d6573Sopenharmony_ci unsafe { (*re).len() } 572c67d6573Sopenharmony_ci } 573c67d6573Sopenharmony_ci} 574c67d6573Sopenharmony_ci 575c67d6573Sopenharmony_ciffi_fn! { 576c67d6573Sopenharmony_ci fn rure_escape_must(pattern: *const c_char) -> *const c_char { 577c67d6573Sopenharmony_ci let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() }; 578c67d6573Sopenharmony_ci let pat = pattern as *const u8; 579c67d6573Sopenharmony_ci let mut err = Error::new(ErrorKind::None); 580c67d6573Sopenharmony_ci let esc = rure_escape(pat, len, &mut err); 581c67d6573Sopenharmony_ci if err.is_err() { 582c67d6573Sopenharmony_ci let _ = writeln!(&mut io::stderr(), "{}", err); 583c67d6573Sopenharmony_ci let _ = writeln!( 584c67d6573Sopenharmony_ci &mut io::stderr(), "aborting from rure_escape_must"); 585c67d6573Sopenharmony_ci unsafe { abort() } 586c67d6573Sopenharmony_ci } 587c67d6573Sopenharmony_ci esc 588c67d6573Sopenharmony_ci } 589c67d6573Sopenharmony_ci} 590c67d6573Sopenharmony_ci 591c67d6573Sopenharmony_ci/// A helper function that implements fallible escaping in a way that returns 592c67d6573Sopenharmony_ci/// an error if escaping failed. 593c67d6573Sopenharmony_ci/// 594c67d6573Sopenharmony_ci/// This should ideally be exposed, but it needs API design work. In 595c67d6573Sopenharmony_ci/// particular, this should not return a C string, but a `const uint8_t *` 596c67d6573Sopenharmony_ci/// instead, since it may contain a NUL byte. 597c67d6573Sopenharmony_cifn rure_escape( 598c67d6573Sopenharmony_ci pattern: *const u8, 599c67d6573Sopenharmony_ci length: size_t, 600c67d6573Sopenharmony_ci error: *mut Error, 601c67d6573Sopenharmony_ci) -> *const c_char { 602c67d6573Sopenharmony_ci let pat: &[u8] = unsafe { slice::from_raw_parts(pattern, length) }; 603c67d6573Sopenharmony_ci let str_pat = match str::from_utf8(pat) { 604c67d6573Sopenharmony_ci Ok(val) => val, 605c67d6573Sopenharmony_ci Err(err) => unsafe { 606c67d6573Sopenharmony_ci if !error.is_null() { 607c67d6573Sopenharmony_ci *error = Error::new(ErrorKind::Str(err)); 608c67d6573Sopenharmony_ci } 609c67d6573Sopenharmony_ci return ptr::null(); 610c67d6573Sopenharmony_ci }, 611c67d6573Sopenharmony_ci }; 612c67d6573Sopenharmony_ci let esc_pat = regex::escape(str_pat); 613c67d6573Sopenharmony_ci let c_esc_pat = match CString::new(esc_pat) { 614c67d6573Sopenharmony_ci Ok(val) => val, 615c67d6573Sopenharmony_ci Err(err) => unsafe { 616c67d6573Sopenharmony_ci if !error.is_null() { 617c67d6573Sopenharmony_ci *error = Error::new(ErrorKind::Nul(err)); 618c67d6573Sopenharmony_ci } 619c67d6573Sopenharmony_ci return ptr::null(); 620c67d6573Sopenharmony_ci }, 621c67d6573Sopenharmony_ci }; 622c67d6573Sopenharmony_ci c_esc_pat.into_raw() as *const c_char 623c67d6573Sopenharmony_ci} 624c67d6573Sopenharmony_ci 625c67d6573Sopenharmony_ciffi_fn! { 626c67d6573Sopenharmony_ci fn rure_cstring_free(s: *mut c_char) { 627c67d6573Sopenharmony_ci unsafe { drop(CString::from_raw(s)); } 628c67d6573Sopenharmony_ci } 629c67d6573Sopenharmony_ci} 630