1c67d6573Sopenharmony_ciuse std::collections::HashMap;
2c67d6573Sopenharmony_ciuse std::ffi::{CStr, CString};
3c67d6573Sopenharmony_ciuse std::ops::Deref;
4c67d6573Sopenharmony_ciuse std::ptr;
5c67d6573Sopenharmony_ciuse std::slice;
6c67d6573Sopenharmony_ciuse std::str;
7c67d6573Sopenharmony_ci
8c67d6573Sopenharmony_ciuse libc::{c_char, size_t};
9c67d6573Sopenharmony_ciuse regex::bytes;
10c67d6573Sopenharmony_ci
11c67d6573Sopenharmony_ciuse crate::error::{Error, ErrorKind};
12c67d6573Sopenharmony_ci
13c67d6573Sopenharmony_ciconst RURE_FLAG_CASEI: u32 = 1 << 0;
14c67d6573Sopenharmony_ciconst RURE_FLAG_MULTI: u32 = 1 << 1;
15c67d6573Sopenharmony_ciconst RURE_FLAG_DOTNL: u32 = 1 << 2;
16c67d6573Sopenharmony_ciconst RURE_FLAG_SWAP_GREED: u32 = 1 << 3;
17c67d6573Sopenharmony_ciconst RURE_FLAG_SPACE: u32 = 1 << 4;
18c67d6573Sopenharmony_ciconst RURE_FLAG_UNICODE: u32 = 1 << 5;
19c67d6573Sopenharmony_ciconst RURE_DEFAULT_FLAGS: u32 = RURE_FLAG_UNICODE;
20c67d6573Sopenharmony_ci
21c67d6573Sopenharmony_cipub struct Regex {
22c67d6573Sopenharmony_ci    re: bytes::Regex,
23c67d6573Sopenharmony_ci    capture_names: HashMap<String, i32>,
24c67d6573Sopenharmony_ci}
25c67d6573Sopenharmony_ci
26c67d6573Sopenharmony_cipub struct Options {
27c67d6573Sopenharmony_ci    size_limit: usize,
28c67d6573Sopenharmony_ci    dfa_size_limit: usize,
29c67d6573Sopenharmony_ci}
30c67d6573Sopenharmony_ci
31c67d6573Sopenharmony_ci// The `RegexSet` is not exposed with option support or matching at an
32c67d6573Sopenharmony_ci// arbitrary position with a crate just yet. To circumvent this, we use
33c67d6573Sopenharmony_ci// the `Exec` structure directly.
34c67d6573Sopenharmony_cipub struct RegexSet {
35c67d6573Sopenharmony_ci    re: bytes::RegexSet,
36c67d6573Sopenharmony_ci}
37c67d6573Sopenharmony_ci
38c67d6573Sopenharmony_ci#[repr(C)]
39c67d6573Sopenharmony_cipub struct rure_match {
40c67d6573Sopenharmony_ci    pub start: size_t,
41c67d6573Sopenharmony_ci    pub end: size_t,
42c67d6573Sopenharmony_ci}
43c67d6573Sopenharmony_ci
44c67d6573Sopenharmony_cipub struct Captures(bytes::Locations);
45c67d6573Sopenharmony_ci
46c67d6573Sopenharmony_cipub struct Iter {
47c67d6573Sopenharmony_ci    re: *const Regex,
48c67d6573Sopenharmony_ci    last_end: usize,
49c67d6573Sopenharmony_ci    last_match: Option<usize>,
50c67d6573Sopenharmony_ci}
51c67d6573Sopenharmony_ci
52c67d6573Sopenharmony_cipub struct IterCaptureNames {
53c67d6573Sopenharmony_ci    capture_names: bytes::CaptureNames<'static>,
54c67d6573Sopenharmony_ci    name_ptrs: Vec<*mut c_char>,
55c67d6573Sopenharmony_ci}
56c67d6573Sopenharmony_ci
57c67d6573Sopenharmony_ciimpl Deref for Regex {
58c67d6573Sopenharmony_ci    type Target = bytes::Regex;
59c67d6573Sopenharmony_ci    fn deref(&self) -> &bytes::Regex {
60c67d6573Sopenharmony_ci        &self.re
61c67d6573Sopenharmony_ci    }
62c67d6573Sopenharmony_ci}
63c67d6573Sopenharmony_ci
64c67d6573Sopenharmony_ciimpl Deref for RegexSet {
65c67d6573Sopenharmony_ci    type Target = bytes::RegexSet;
66c67d6573Sopenharmony_ci    fn deref(&self) -> &bytes::RegexSet {
67c67d6573Sopenharmony_ci        &self.re
68c67d6573Sopenharmony_ci    }
69c67d6573Sopenharmony_ci}
70c67d6573Sopenharmony_ci
71c67d6573Sopenharmony_ciimpl Default for Options {
72c67d6573Sopenharmony_ci    fn default() -> Options {
73c67d6573Sopenharmony_ci        Options { size_limit: 10 * (1 << 20), dfa_size_limit: 2 * (1 << 20) }
74c67d6573Sopenharmony_ci    }
75c67d6573Sopenharmony_ci}
76c67d6573Sopenharmony_ci
77c67d6573Sopenharmony_ciffi_fn! {
78c67d6573Sopenharmony_ci    fn rure_compile_must(pattern: *const c_char) -> *const Regex {
79c67d6573Sopenharmony_ci        let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() };
80c67d6573Sopenharmony_ci        let pat = pattern as *const u8;
81c67d6573Sopenharmony_ci        let mut err = Error::new(ErrorKind::None);
82c67d6573Sopenharmony_ci        let re = rure_compile(
83c67d6573Sopenharmony_ci            pat, len, RURE_DEFAULT_FLAGS, ptr::null(), &mut err);
84c67d6573Sopenharmony_ci        if err.is_err() {
85c67d6573Sopenharmony_ci            let _ = writeln!(&mut io::stderr(), "{}", err);
86c67d6573Sopenharmony_ci            let _ = writeln!(
87c67d6573Sopenharmony_ci                &mut io::stderr(), "aborting from rure_compile_must");
88c67d6573Sopenharmony_ci            unsafe { abort() }
89c67d6573Sopenharmony_ci        }
90c67d6573Sopenharmony_ci        re
91c67d6573Sopenharmony_ci    }
92c67d6573Sopenharmony_ci}
93c67d6573Sopenharmony_ci
94c67d6573Sopenharmony_ciffi_fn! {
95c67d6573Sopenharmony_ci    fn rure_compile(
96c67d6573Sopenharmony_ci        pattern: *const u8,
97c67d6573Sopenharmony_ci        length: size_t,
98c67d6573Sopenharmony_ci        flags: u32,
99c67d6573Sopenharmony_ci        options: *const Options,
100c67d6573Sopenharmony_ci        error: *mut Error,
101c67d6573Sopenharmony_ci    ) -> *const Regex {
102c67d6573Sopenharmony_ci        let pat = unsafe { slice::from_raw_parts(pattern, length) };
103c67d6573Sopenharmony_ci        let pat = match str::from_utf8(pat) {
104c67d6573Sopenharmony_ci            Ok(pat) => pat,
105c67d6573Sopenharmony_ci            Err(err) => {
106c67d6573Sopenharmony_ci                unsafe {
107c67d6573Sopenharmony_ci                    if !error.is_null() {
108c67d6573Sopenharmony_ci                        *error = Error::new(ErrorKind::Str(err));
109c67d6573Sopenharmony_ci                    }
110c67d6573Sopenharmony_ci                    return ptr::null();
111c67d6573Sopenharmony_ci                }
112c67d6573Sopenharmony_ci            }
113c67d6573Sopenharmony_ci        };
114c67d6573Sopenharmony_ci        let mut builder = bytes::RegexBuilder::new(pat);
115c67d6573Sopenharmony_ci        if !options.is_null() {
116c67d6573Sopenharmony_ci            let options = unsafe { &*options };
117c67d6573Sopenharmony_ci            builder.size_limit(options.size_limit);
118c67d6573Sopenharmony_ci            builder.dfa_size_limit(options.dfa_size_limit);
119c67d6573Sopenharmony_ci        }
120c67d6573Sopenharmony_ci        builder.case_insensitive(flags & RURE_FLAG_CASEI > 0);
121c67d6573Sopenharmony_ci        builder.multi_line(flags & RURE_FLAG_MULTI > 0);
122c67d6573Sopenharmony_ci        builder.dot_matches_new_line(flags & RURE_FLAG_DOTNL > 0);
123c67d6573Sopenharmony_ci        builder.swap_greed(flags & RURE_FLAG_SWAP_GREED > 0);
124c67d6573Sopenharmony_ci        builder.ignore_whitespace(flags & RURE_FLAG_SPACE > 0);
125c67d6573Sopenharmony_ci        builder.unicode(flags & RURE_FLAG_UNICODE > 0);
126c67d6573Sopenharmony_ci        match builder.build() {
127c67d6573Sopenharmony_ci            Ok(re) => {
128c67d6573Sopenharmony_ci                let mut capture_names = HashMap::new();
129c67d6573Sopenharmony_ci                for (i, name) in re.capture_names().enumerate() {
130c67d6573Sopenharmony_ci                    if let Some(name) = name {
131c67d6573Sopenharmony_ci                        capture_names.insert(name.to_owned(), i as i32);
132c67d6573Sopenharmony_ci                    }
133c67d6573Sopenharmony_ci                }
134c67d6573Sopenharmony_ci                let re = Regex {
135c67d6573Sopenharmony_ci                    re: re,
136c67d6573Sopenharmony_ci                    capture_names: capture_names,
137c67d6573Sopenharmony_ci                };
138c67d6573Sopenharmony_ci                Box::into_raw(Box::new(re))
139c67d6573Sopenharmony_ci            }
140c67d6573Sopenharmony_ci            Err(err) => {
141c67d6573Sopenharmony_ci                unsafe {
142c67d6573Sopenharmony_ci                    if !error.is_null() {
143c67d6573Sopenharmony_ci                        *error = Error::new(ErrorKind::Regex(err));
144c67d6573Sopenharmony_ci                    }
145c67d6573Sopenharmony_ci                    ptr::null()
146c67d6573Sopenharmony_ci                }
147c67d6573Sopenharmony_ci            }
148c67d6573Sopenharmony_ci        }
149c67d6573Sopenharmony_ci    }
150c67d6573Sopenharmony_ci}
151c67d6573Sopenharmony_ci
152c67d6573Sopenharmony_ciffi_fn! {
153c67d6573Sopenharmony_ci    fn rure_free(re: *const Regex) {
154c67d6573Sopenharmony_ci        unsafe { drop(Box::from_raw(re as *mut Regex)); }
155c67d6573Sopenharmony_ci    }
156c67d6573Sopenharmony_ci}
157c67d6573Sopenharmony_ci
158c67d6573Sopenharmony_ciffi_fn! {
159c67d6573Sopenharmony_ci    fn rure_is_match(
160c67d6573Sopenharmony_ci        re: *const Regex,
161c67d6573Sopenharmony_ci        haystack: *const u8,
162c67d6573Sopenharmony_ci        len: size_t,
163c67d6573Sopenharmony_ci        start: size_t,
164c67d6573Sopenharmony_ci    ) -> bool {
165c67d6573Sopenharmony_ci        let re = unsafe { &*re };
166c67d6573Sopenharmony_ci        let haystack = unsafe { slice::from_raw_parts(haystack, len) };
167c67d6573Sopenharmony_ci        re.is_match_at(haystack, start)
168c67d6573Sopenharmony_ci    }
169c67d6573Sopenharmony_ci}
170c67d6573Sopenharmony_ci
171c67d6573Sopenharmony_ciffi_fn! {
172c67d6573Sopenharmony_ci    fn rure_find(
173c67d6573Sopenharmony_ci        re: *const Regex,
174c67d6573Sopenharmony_ci        haystack: *const u8,
175c67d6573Sopenharmony_ci        len: size_t,
176c67d6573Sopenharmony_ci        start: size_t,
177c67d6573Sopenharmony_ci        match_info: *mut rure_match,
178c67d6573Sopenharmony_ci    ) -> bool {
179c67d6573Sopenharmony_ci        let re = unsafe { &*re };
180c67d6573Sopenharmony_ci        let haystack = unsafe { slice::from_raw_parts(haystack, len) };
181c67d6573Sopenharmony_ci        re.find_at(haystack, start).map(|m| unsafe {
182c67d6573Sopenharmony_ci            if !match_info.is_null() {
183c67d6573Sopenharmony_ci                (*match_info).start = m.start();
184c67d6573Sopenharmony_ci                (*match_info).end = m.end();
185c67d6573Sopenharmony_ci            }
186c67d6573Sopenharmony_ci        }).is_some()
187c67d6573Sopenharmony_ci    }
188c67d6573Sopenharmony_ci}
189c67d6573Sopenharmony_ci
190c67d6573Sopenharmony_ciffi_fn! {
191c67d6573Sopenharmony_ci    fn rure_find_captures(
192c67d6573Sopenharmony_ci        re: *const Regex,
193c67d6573Sopenharmony_ci        haystack: *const u8,
194c67d6573Sopenharmony_ci        len: size_t,
195c67d6573Sopenharmony_ci        start: size_t,
196c67d6573Sopenharmony_ci        captures: *mut Captures,
197c67d6573Sopenharmony_ci    ) -> bool {
198c67d6573Sopenharmony_ci        let re = unsafe { &*re };
199c67d6573Sopenharmony_ci        let haystack = unsafe { slice::from_raw_parts(haystack, len) };
200c67d6573Sopenharmony_ci        let slots = unsafe { &mut (*captures).0 };
201c67d6573Sopenharmony_ci        re.read_captures_at(slots, haystack, start).is_some()
202c67d6573Sopenharmony_ci    }
203c67d6573Sopenharmony_ci}
204c67d6573Sopenharmony_ci
205c67d6573Sopenharmony_ciffi_fn! {
206c67d6573Sopenharmony_ci    fn rure_shortest_match(
207c67d6573Sopenharmony_ci        re: *const Regex,
208c67d6573Sopenharmony_ci        haystack: *const u8,
209c67d6573Sopenharmony_ci        len: size_t,
210c67d6573Sopenharmony_ci        start: size_t,
211c67d6573Sopenharmony_ci        end: *mut usize,
212c67d6573Sopenharmony_ci    ) -> bool {
213c67d6573Sopenharmony_ci        let re = unsafe { &*re };
214c67d6573Sopenharmony_ci        let haystack = unsafe { slice::from_raw_parts(haystack, len) };
215c67d6573Sopenharmony_ci        match re.shortest_match_at(haystack, start) {
216c67d6573Sopenharmony_ci            None => false,
217c67d6573Sopenharmony_ci            Some(i) => {
218c67d6573Sopenharmony_ci                if !end.is_null() {
219c67d6573Sopenharmony_ci                    unsafe {
220c67d6573Sopenharmony_ci                        *end = i;
221c67d6573Sopenharmony_ci                    }
222c67d6573Sopenharmony_ci                }
223c67d6573Sopenharmony_ci                true
224c67d6573Sopenharmony_ci            }
225c67d6573Sopenharmony_ci        }
226c67d6573Sopenharmony_ci    }
227c67d6573Sopenharmony_ci}
228c67d6573Sopenharmony_ci
229c67d6573Sopenharmony_ciffi_fn! {
230c67d6573Sopenharmony_ci    fn rure_capture_name_index(
231c67d6573Sopenharmony_ci        re: *const Regex,
232c67d6573Sopenharmony_ci        name: *const c_char,
233c67d6573Sopenharmony_ci    ) -> i32 {
234c67d6573Sopenharmony_ci        let re = unsafe { &*re };
235c67d6573Sopenharmony_ci        let name = unsafe { CStr::from_ptr(name) };
236c67d6573Sopenharmony_ci        let name = match name.to_str() {
237c67d6573Sopenharmony_ci            Err(_) => return -1,
238c67d6573Sopenharmony_ci            Ok(name) => name,
239c67d6573Sopenharmony_ci        };
240c67d6573Sopenharmony_ci        re.capture_names.get(name).map(|&i|i).unwrap_or(-1)
241c67d6573Sopenharmony_ci    }
242c67d6573Sopenharmony_ci}
243c67d6573Sopenharmony_ci
244c67d6573Sopenharmony_ciffi_fn! {
245c67d6573Sopenharmony_ci    fn rure_iter_capture_names_new(
246c67d6573Sopenharmony_ci        re: *const Regex,
247c67d6573Sopenharmony_ci    ) -> *mut IterCaptureNames {
248c67d6573Sopenharmony_ci        let re = unsafe { &*re };
249c67d6573Sopenharmony_ci        Box::into_raw(Box::new(IterCaptureNames {
250c67d6573Sopenharmony_ci            capture_names: re.re.capture_names(),
251c67d6573Sopenharmony_ci            name_ptrs: Vec::new(),
252c67d6573Sopenharmony_ci        }))
253c67d6573Sopenharmony_ci    }
254c67d6573Sopenharmony_ci}
255c67d6573Sopenharmony_ci
256c67d6573Sopenharmony_ciffi_fn! {
257c67d6573Sopenharmony_ci    fn rure_iter_capture_names_free(it: *mut IterCaptureNames) {
258c67d6573Sopenharmony_ci        unsafe {
259c67d6573Sopenharmony_ci            let it = &mut *it;
260c67d6573Sopenharmony_ci            while let Some(ptr) = it.name_ptrs.pop() {
261c67d6573Sopenharmony_ci                drop(CString::from_raw(ptr));
262c67d6573Sopenharmony_ci            }
263c67d6573Sopenharmony_ci            drop(Box::from_raw(it));
264c67d6573Sopenharmony_ci        }
265c67d6573Sopenharmony_ci    }
266c67d6573Sopenharmony_ci}
267c67d6573Sopenharmony_ci
268c67d6573Sopenharmony_ciffi_fn! {
269c67d6573Sopenharmony_ci    fn rure_iter_capture_names_next(
270c67d6573Sopenharmony_ci        it: *mut IterCaptureNames,
271c67d6573Sopenharmony_ci        capture_name: *mut *mut c_char,
272c67d6573Sopenharmony_ci    ) -> bool {
273c67d6573Sopenharmony_ci        if capture_name.is_null() {
274c67d6573Sopenharmony_ci            return false;
275c67d6573Sopenharmony_ci        }
276c67d6573Sopenharmony_ci
277c67d6573Sopenharmony_ci        let it = unsafe { &mut *it };
278c67d6573Sopenharmony_ci        let cn = match it.capture_names.next() {
279c67d6573Sopenharmony_ci            // Top-level iterator ran out of capture groups
280c67d6573Sopenharmony_ci            None => return false,
281c67d6573Sopenharmony_ci            Some(val) => {
282c67d6573Sopenharmony_ci                let name = match val {
283c67d6573Sopenharmony_ci                    // inner Option didn't have a name
284c67d6573Sopenharmony_ci                    None => "",
285c67d6573Sopenharmony_ci                    Some(name) => name
286c67d6573Sopenharmony_ci                };
287c67d6573Sopenharmony_ci                name
288c67d6573Sopenharmony_ci            }
289c67d6573Sopenharmony_ci        };
290c67d6573Sopenharmony_ci
291c67d6573Sopenharmony_ci        unsafe {
292c67d6573Sopenharmony_ci            let cs = match CString::new(cn.as_bytes()) {
293c67d6573Sopenharmony_ci                Result::Ok(val) => val,
294c67d6573Sopenharmony_ci                Result::Err(_) => return false
295c67d6573Sopenharmony_ci            };
296c67d6573Sopenharmony_ci            let ptr = cs.into_raw();
297c67d6573Sopenharmony_ci            it.name_ptrs.push(ptr);
298c67d6573Sopenharmony_ci            *capture_name = ptr;
299c67d6573Sopenharmony_ci        }
300c67d6573Sopenharmony_ci        true
301c67d6573Sopenharmony_ci
302c67d6573Sopenharmony_ci    }
303c67d6573Sopenharmony_ci}
304c67d6573Sopenharmony_ci
305c67d6573Sopenharmony_ciffi_fn! {
306c67d6573Sopenharmony_ci    fn rure_iter_new(
307c67d6573Sopenharmony_ci        re: *const Regex,
308c67d6573Sopenharmony_ci    ) -> *mut Iter {
309c67d6573Sopenharmony_ci        Box::into_raw(Box::new(Iter {
310c67d6573Sopenharmony_ci            re: re,
311c67d6573Sopenharmony_ci            last_end: 0,
312c67d6573Sopenharmony_ci            last_match: None,
313c67d6573Sopenharmony_ci        }))
314c67d6573Sopenharmony_ci    }
315c67d6573Sopenharmony_ci}
316c67d6573Sopenharmony_ci
317c67d6573Sopenharmony_ciffi_fn! {
318c67d6573Sopenharmony_ci    fn rure_iter_free(it: *mut Iter) {
319c67d6573Sopenharmony_ci        unsafe { drop(Box::from_raw(it)); }
320c67d6573Sopenharmony_ci    }
321c67d6573Sopenharmony_ci}
322c67d6573Sopenharmony_ci
323c67d6573Sopenharmony_ciffi_fn! {
324c67d6573Sopenharmony_ci    fn rure_iter_next(
325c67d6573Sopenharmony_ci        it: *mut Iter,
326c67d6573Sopenharmony_ci        haystack: *const u8,
327c67d6573Sopenharmony_ci        len: size_t,
328c67d6573Sopenharmony_ci        match_info: *mut rure_match,
329c67d6573Sopenharmony_ci    ) -> bool {
330c67d6573Sopenharmony_ci        let it = unsafe { &mut *it };
331c67d6573Sopenharmony_ci        let re = unsafe { &*it.re };
332c67d6573Sopenharmony_ci        let text = unsafe { slice::from_raw_parts(haystack, len) };
333c67d6573Sopenharmony_ci        if it.last_end > text.len() {
334c67d6573Sopenharmony_ci            return false;
335c67d6573Sopenharmony_ci        }
336c67d6573Sopenharmony_ci        let (s, e) = match re.find_at(text, it.last_end) {
337c67d6573Sopenharmony_ci            None => return false,
338c67d6573Sopenharmony_ci            Some(m) => (m.start(), m.end()),
339c67d6573Sopenharmony_ci        };
340c67d6573Sopenharmony_ci        if s == e {
341c67d6573Sopenharmony_ci            // This is an empty match. To ensure we make progress, start
342c67d6573Sopenharmony_ci            // the next search at the smallest possible starting position
343c67d6573Sopenharmony_ci            // of the next match following this one.
344c67d6573Sopenharmony_ci            it.last_end += 1;
345c67d6573Sopenharmony_ci            // Don't accept empty matches immediately following a match.
346c67d6573Sopenharmony_ci            // Just move on to the next match.
347c67d6573Sopenharmony_ci            if Some(e) == it.last_match {
348c67d6573Sopenharmony_ci                return rure_iter_next(it, haystack, len, match_info);
349c67d6573Sopenharmony_ci            }
350c67d6573Sopenharmony_ci        } else {
351c67d6573Sopenharmony_ci            it.last_end = e;
352c67d6573Sopenharmony_ci        }
353c67d6573Sopenharmony_ci        it.last_match = Some(e);
354c67d6573Sopenharmony_ci        if !match_info.is_null() {
355c67d6573Sopenharmony_ci            unsafe {
356c67d6573Sopenharmony_ci                (*match_info).start = s;
357c67d6573Sopenharmony_ci                (*match_info).end = e;
358c67d6573Sopenharmony_ci            }
359c67d6573Sopenharmony_ci        }
360c67d6573Sopenharmony_ci        true
361c67d6573Sopenharmony_ci    }
362c67d6573Sopenharmony_ci}
363c67d6573Sopenharmony_ci
364c67d6573Sopenharmony_ciffi_fn! {
365c67d6573Sopenharmony_ci    fn rure_iter_next_captures(
366c67d6573Sopenharmony_ci        it: *mut Iter,
367c67d6573Sopenharmony_ci        haystack: *const u8,
368c67d6573Sopenharmony_ci        len: size_t,
369c67d6573Sopenharmony_ci        captures: *mut Captures,
370c67d6573Sopenharmony_ci    ) -> bool {
371c67d6573Sopenharmony_ci        let it = unsafe { &mut *it };
372c67d6573Sopenharmony_ci        let re = unsafe { &*it.re };
373c67d6573Sopenharmony_ci        let slots = unsafe { &mut (*captures).0 };
374c67d6573Sopenharmony_ci        let text = unsafe { slice::from_raw_parts(haystack, len) };
375c67d6573Sopenharmony_ci        if it.last_end > text.len() {
376c67d6573Sopenharmony_ci            return false;
377c67d6573Sopenharmony_ci        }
378c67d6573Sopenharmony_ci        let (s, e) = match re.read_captures_at(slots, text, it.last_end) {
379c67d6573Sopenharmony_ci            None => return false,
380c67d6573Sopenharmony_ci            Some(m) => (m.start(), m.end()),
381c67d6573Sopenharmony_ci        };
382c67d6573Sopenharmony_ci        if s == e {
383c67d6573Sopenharmony_ci            // This is an empty match. To ensure we make progress, start
384c67d6573Sopenharmony_ci            // the next search at the smallest possible starting position
385c67d6573Sopenharmony_ci            // of the next match following this one.
386c67d6573Sopenharmony_ci            it.last_end += 1;
387c67d6573Sopenharmony_ci            // Don't accept empty matches immediately following a match.
388c67d6573Sopenharmony_ci            // Just move on to the next match.
389c67d6573Sopenharmony_ci            if Some(e) == it.last_match {
390c67d6573Sopenharmony_ci                return rure_iter_next_captures(it, haystack, len, captures);
391c67d6573Sopenharmony_ci            }
392c67d6573Sopenharmony_ci        } else {
393c67d6573Sopenharmony_ci            it.last_end = e;
394c67d6573Sopenharmony_ci        }
395c67d6573Sopenharmony_ci        it.last_match = Some(e);
396c67d6573Sopenharmony_ci        true
397c67d6573Sopenharmony_ci    }
398c67d6573Sopenharmony_ci}
399c67d6573Sopenharmony_ci
400c67d6573Sopenharmony_ciffi_fn! {
401c67d6573Sopenharmony_ci    fn rure_captures_new(re: *const Regex) -> *mut Captures {
402c67d6573Sopenharmony_ci        let re = unsafe { &*re };
403c67d6573Sopenharmony_ci        let captures = Captures(re.locations());
404c67d6573Sopenharmony_ci        Box::into_raw(Box::new(captures))
405c67d6573Sopenharmony_ci    }
406c67d6573Sopenharmony_ci}
407c67d6573Sopenharmony_ci
408c67d6573Sopenharmony_ciffi_fn! {
409c67d6573Sopenharmony_ci    fn rure_captures_free(captures: *const Captures) {
410c67d6573Sopenharmony_ci        unsafe { drop(Box::from_raw(captures as *mut Captures)); }
411c67d6573Sopenharmony_ci    }
412c67d6573Sopenharmony_ci}
413c67d6573Sopenharmony_ci
414c67d6573Sopenharmony_ciffi_fn! {
415c67d6573Sopenharmony_ci    fn rure_captures_at(
416c67d6573Sopenharmony_ci        captures: *const Captures,
417c67d6573Sopenharmony_ci        i: size_t,
418c67d6573Sopenharmony_ci        match_info: *mut rure_match,
419c67d6573Sopenharmony_ci    ) -> bool {
420c67d6573Sopenharmony_ci        let locs = unsafe { &(*captures).0 };
421c67d6573Sopenharmony_ci        match locs.pos(i) {
422c67d6573Sopenharmony_ci            Some((start, end)) => {
423c67d6573Sopenharmony_ci                if !match_info.is_null() {
424c67d6573Sopenharmony_ci                    unsafe {
425c67d6573Sopenharmony_ci                        (*match_info).start = start;
426c67d6573Sopenharmony_ci                        (*match_info).end = end;
427c67d6573Sopenharmony_ci                    }
428c67d6573Sopenharmony_ci                }
429c67d6573Sopenharmony_ci                true
430c67d6573Sopenharmony_ci            }
431c67d6573Sopenharmony_ci            _ => false
432c67d6573Sopenharmony_ci        }
433c67d6573Sopenharmony_ci    }
434c67d6573Sopenharmony_ci}
435c67d6573Sopenharmony_ci
436c67d6573Sopenharmony_ciffi_fn! {
437c67d6573Sopenharmony_ci    fn rure_captures_len(captures: *const Captures) -> size_t {
438c67d6573Sopenharmony_ci        unsafe { (*captures).0.len() }
439c67d6573Sopenharmony_ci    }
440c67d6573Sopenharmony_ci}
441c67d6573Sopenharmony_ci
442c67d6573Sopenharmony_ciffi_fn! {
443c67d6573Sopenharmony_ci    fn rure_options_new() -> *mut Options {
444c67d6573Sopenharmony_ci        Box::into_raw(Box::new(Options::default()))
445c67d6573Sopenharmony_ci    }
446c67d6573Sopenharmony_ci}
447c67d6573Sopenharmony_ci
448c67d6573Sopenharmony_ciffi_fn! {
449c67d6573Sopenharmony_ci    fn rure_options_free(options: *mut Options) {
450c67d6573Sopenharmony_ci        unsafe { drop(Box::from_raw(options)); }
451c67d6573Sopenharmony_ci    }
452c67d6573Sopenharmony_ci}
453c67d6573Sopenharmony_ci
454c67d6573Sopenharmony_ciffi_fn! {
455c67d6573Sopenharmony_ci    fn rure_options_size_limit(options: *mut Options, limit: size_t) {
456c67d6573Sopenharmony_ci        let options = unsafe { &mut *options };
457c67d6573Sopenharmony_ci        options.size_limit = limit;
458c67d6573Sopenharmony_ci    }
459c67d6573Sopenharmony_ci}
460c67d6573Sopenharmony_ci
461c67d6573Sopenharmony_ciffi_fn! {
462c67d6573Sopenharmony_ci    fn rure_options_dfa_size_limit(options: *mut Options, limit: size_t) {
463c67d6573Sopenharmony_ci        let options = unsafe { &mut *options };
464c67d6573Sopenharmony_ci        options.dfa_size_limit = limit;
465c67d6573Sopenharmony_ci    }
466c67d6573Sopenharmony_ci}
467c67d6573Sopenharmony_ci
468c67d6573Sopenharmony_ciffi_fn! {
469c67d6573Sopenharmony_ci    fn rure_compile_set(
470c67d6573Sopenharmony_ci        patterns: *const *const u8,
471c67d6573Sopenharmony_ci        patterns_lengths: *const size_t,
472c67d6573Sopenharmony_ci        patterns_count: size_t,
473c67d6573Sopenharmony_ci        flags: u32,
474c67d6573Sopenharmony_ci        options: *const Options,
475c67d6573Sopenharmony_ci        error: *mut Error
476c67d6573Sopenharmony_ci    ) -> *const RegexSet {
477c67d6573Sopenharmony_ci        let (raw_pats, raw_patsl) = unsafe {
478c67d6573Sopenharmony_ci            (
479c67d6573Sopenharmony_ci                slice::from_raw_parts(patterns, patterns_count),
480c67d6573Sopenharmony_ci                slice::from_raw_parts(patterns_lengths, patterns_count)
481c67d6573Sopenharmony_ci            )
482c67d6573Sopenharmony_ci        };
483c67d6573Sopenharmony_ci
484c67d6573Sopenharmony_ci        let mut pats = Vec::with_capacity(patterns_count);
485c67d6573Sopenharmony_ci        for (&raw_pat, &raw_patl) in raw_pats.iter().zip(raw_patsl) {
486c67d6573Sopenharmony_ci            let pat = unsafe { slice::from_raw_parts(raw_pat, raw_patl) };
487c67d6573Sopenharmony_ci            pats.push(match str::from_utf8(pat) {
488c67d6573Sopenharmony_ci                Ok(pat) => pat,
489c67d6573Sopenharmony_ci                Err(err) => {
490c67d6573Sopenharmony_ci                    unsafe {
491c67d6573Sopenharmony_ci                        if !error.is_null() {
492c67d6573Sopenharmony_ci                            *error = Error::new(ErrorKind::Str(err));
493c67d6573Sopenharmony_ci                        }
494c67d6573Sopenharmony_ci                        return ptr::null();
495c67d6573Sopenharmony_ci                    }
496c67d6573Sopenharmony_ci                }
497c67d6573Sopenharmony_ci            });
498c67d6573Sopenharmony_ci        }
499c67d6573Sopenharmony_ci
500c67d6573Sopenharmony_ci        let mut builder = bytes::RegexSetBuilder::new(pats);
501c67d6573Sopenharmony_ci        if !options.is_null() {
502c67d6573Sopenharmony_ci            let options = unsafe { &*options };
503c67d6573Sopenharmony_ci            builder.size_limit(options.size_limit);
504c67d6573Sopenharmony_ci            builder.dfa_size_limit(options.dfa_size_limit);
505c67d6573Sopenharmony_ci        }
506c67d6573Sopenharmony_ci        builder.case_insensitive(flags & RURE_FLAG_CASEI > 0);
507c67d6573Sopenharmony_ci        builder.multi_line(flags & RURE_FLAG_MULTI > 0);
508c67d6573Sopenharmony_ci        builder.dot_matches_new_line(flags & RURE_FLAG_DOTNL > 0);
509c67d6573Sopenharmony_ci        builder.swap_greed(flags & RURE_FLAG_SWAP_GREED > 0);
510c67d6573Sopenharmony_ci        builder.ignore_whitespace(flags & RURE_FLAG_SPACE > 0);
511c67d6573Sopenharmony_ci        builder.unicode(flags & RURE_FLAG_UNICODE > 0);
512c67d6573Sopenharmony_ci        match builder.build() {
513c67d6573Sopenharmony_ci            Ok(re) => {
514c67d6573Sopenharmony_ci                Box::into_raw(Box::new(RegexSet { re: re }))
515c67d6573Sopenharmony_ci            }
516c67d6573Sopenharmony_ci            Err(err) => {
517c67d6573Sopenharmony_ci                unsafe {
518c67d6573Sopenharmony_ci                    if !error.is_null() {
519c67d6573Sopenharmony_ci                        *error = Error::new(ErrorKind::Regex(err))
520c67d6573Sopenharmony_ci                    }
521c67d6573Sopenharmony_ci                    ptr::null()
522c67d6573Sopenharmony_ci                }
523c67d6573Sopenharmony_ci            }
524c67d6573Sopenharmony_ci        }
525c67d6573Sopenharmony_ci    }
526c67d6573Sopenharmony_ci}
527c67d6573Sopenharmony_ci
528c67d6573Sopenharmony_ciffi_fn! {
529c67d6573Sopenharmony_ci    fn rure_set_free(re: *const RegexSet) {
530c67d6573Sopenharmony_ci        unsafe { drop(Box::from_raw(re as *mut RegexSet)); }
531c67d6573Sopenharmony_ci    }
532c67d6573Sopenharmony_ci}
533c67d6573Sopenharmony_ci
534c67d6573Sopenharmony_ciffi_fn! {
535c67d6573Sopenharmony_ci    fn rure_set_is_match(
536c67d6573Sopenharmony_ci        re: *const RegexSet,
537c67d6573Sopenharmony_ci        haystack: *const u8,
538c67d6573Sopenharmony_ci        len: size_t,
539c67d6573Sopenharmony_ci        start: size_t
540c67d6573Sopenharmony_ci    ) -> bool {
541c67d6573Sopenharmony_ci        let re = unsafe { &*re };
542c67d6573Sopenharmony_ci        let haystack = unsafe { slice::from_raw_parts(haystack, len) };
543c67d6573Sopenharmony_ci        re.is_match_at(haystack, start)
544c67d6573Sopenharmony_ci    }
545c67d6573Sopenharmony_ci}
546c67d6573Sopenharmony_ci
547c67d6573Sopenharmony_ciffi_fn! {
548c67d6573Sopenharmony_ci    fn rure_set_matches(
549c67d6573Sopenharmony_ci        re: *const RegexSet,
550c67d6573Sopenharmony_ci        haystack: *const u8,
551c67d6573Sopenharmony_ci        len: size_t,
552c67d6573Sopenharmony_ci        start: size_t,
553c67d6573Sopenharmony_ci        matches: *mut bool
554c67d6573Sopenharmony_ci    ) -> bool {
555c67d6573Sopenharmony_ci        let re = unsafe { &*re };
556c67d6573Sopenharmony_ci        let mut matches = unsafe {
557c67d6573Sopenharmony_ci            slice::from_raw_parts_mut(matches, re.len())
558c67d6573Sopenharmony_ci        };
559c67d6573Sopenharmony_ci        let haystack = unsafe { slice::from_raw_parts(haystack, len) };
560c67d6573Sopenharmony_ci
561c67d6573Sopenharmony_ci        // read_matches_at isn't guaranteed to set non-matches to false
562c67d6573Sopenharmony_ci        for item in matches.iter_mut() {
563c67d6573Sopenharmony_ci            *item = false;
564c67d6573Sopenharmony_ci        }
565c67d6573Sopenharmony_ci        re.read_matches_at(&mut matches, haystack, start)
566c67d6573Sopenharmony_ci    }
567c67d6573Sopenharmony_ci}
568c67d6573Sopenharmony_ci
569c67d6573Sopenharmony_ciffi_fn! {
570c67d6573Sopenharmony_ci    fn rure_set_len(re: *const RegexSet) -> size_t {
571c67d6573Sopenharmony_ci        unsafe { (*re).len() }
572c67d6573Sopenharmony_ci    }
573c67d6573Sopenharmony_ci}
574c67d6573Sopenharmony_ci
575c67d6573Sopenharmony_ciffi_fn! {
576c67d6573Sopenharmony_ci    fn rure_escape_must(pattern: *const c_char) -> *const c_char {
577c67d6573Sopenharmony_ci        let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() };
578c67d6573Sopenharmony_ci        let pat = pattern as *const u8;
579c67d6573Sopenharmony_ci        let mut err = Error::new(ErrorKind::None);
580c67d6573Sopenharmony_ci        let esc = rure_escape(pat, len, &mut err);
581c67d6573Sopenharmony_ci        if err.is_err() {
582c67d6573Sopenharmony_ci            let _ = writeln!(&mut io::stderr(), "{}", err);
583c67d6573Sopenharmony_ci            let _ = writeln!(
584c67d6573Sopenharmony_ci                &mut io::stderr(), "aborting from rure_escape_must");
585c67d6573Sopenharmony_ci            unsafe { abort() }
586c67d6573Sopenharmony_ci        }
587c67d6573Sopenharmony_ci        esc
588c67d6573Sopenharmony_ci    }
589c67d6573Sopenharmony_ci}
590c67d6573Sopenharmony_ci
591c67d6573Sopenharmony_ci/// A helper function that implements fallible escaping in a way that returns
592c67d6573Sopenharmony_ci/// an error if escaping failed.
593c67d6573Sopenharmony_ci///
594c67d6573Sopenharmony_ci/// This should ideally be exposed, but it needs API design work. In
595c67d6573Sopenharmony_ci/// particular, this should not return a C string, but a `const uint8_t *`
596c67d6573Sopenharmony_ci/// instead, since it may contain a NUL byte.
597c67d6573Sopenharmony_cifn rure_escape(
598c67d6573Sopenharmony_ci    pattern: *const u8,
599c67d6573Sopenharmony_ci    length: size_t,
600c67d6573Sopenharmony_ci    error: *mut Error,
601c67d6573Sopenharmony_ci) -> *const c_char {
602c67d6573Sopenharmony_ci    let pat: &[u8] = unsafe { slice::from_raw_parts(pattern, length) };
603c67d6573Sopenharmony_ci    let str_pat = match str::from_utf8(pat) {
604c67d6573Sopenharmony_ci        Ok(val) => val,
605c67d6573Sopenharmony_ci        Err(err) => unsafe {
606c67d6573Sopenharmony_ci            if !error.is_null() {
607c67d6573Sopenharmony_ci                *error = Error::new(ErrorKind::Str(err));
608c67d6573Sopenharmony_ci            }
609c67d6573Sopenharmony_ci            return ptr::null();
610c67d6573Sopenharmony_ci        },
611c67d6573Sopenharmony_ci    };
612c67d6573Sopenharmony_ci    let esc_pat = regex::escape(str_pat);
613c67d6573Sopenharmony_ci    let c_esc_pat = match CString::new(esc_pat) {
614c67d6573Sopenharmony_ci        Ok(val) => val,
615c67d6573Sopenharmony_ci        Err(err) => unsafe {
616c67d6573Sopenharmony_ci            if !error.is_null() {
617c67d6573Sopenharmony_ci                *error = Error::new(ErrorKind::Nul(err));
618c67d6573Sopenharmony_ci            }
619c67d6573Sopenharmony_ci            return ptr::null();
620c67d6573Sopenharmony_ci        },
621c67d6573Sopenharmony_ci    };
622c67d6573Sopenharmony_ci    c_esc_pat.into_raw() as *const c_char
623c67d6573Sopenharmony_ci}
624c67d6573Sopenharmony_ci
625c67d6573Sopenharmony_ciffi_fn! {
626c67d6573Sopenharmony_ci    fn rure_cstring_free(s: *mut c_char) {
627c67d6573Sopenharmony_ci        unsafe { drop(CString::from_raw(s)); }
628c67d6573Sopenharmony_ci    }
629c67d6573Sopenharmony_ci}
630