1use std::collections::HashMap;
2use std::ffi::{CStr, CString};
3use std::ops::Deref;
4use std::ptr;
5use std::slice;
6use std::str;
7
8use libc::{c_char, size_t};
9use regex::bytes;
10
11use crate::error::{Error, ErrorKind};
12
13const RURE_FLAG_CASEI: u32 = 1 << 0;
14const RURE_FLAG_MULTI: u32 = 1 << 1;
15const RURE_FLAG_DOTNL: u32 = 1 << 2;
16const RURE_FLAG_SWAP_GREED: u32 = 1 << 3;
17const RURE_FLAG_SPACE: u32 = 1 << 4;
18const RURE_FLAG_UNICODE: u32 = 1 << 5;
19const RURE_DEFAULT_FLAGS: u32 = RURE_FLAG_UNICODE;
20
21pub struct Regex {
22    re: bytes::Regex,
23    capture_names: HashMap<String, i32>,
24}
25
26pub struct Options {
27    size_limit: usize,
28    dfa_size_limit: usize,
29}
30
31// The `RegexSet` is not exposed with option support or matching at an
32// arbitrary position with a crate just yet. To circumvent this, we use
33// the `Exec` structure directly.
34pub struct RegexSet {
35    re: bytes::RegexSet,
36}
37
38#[repr(C)]
39pub struct rure_match {
40    pub start: size_t,
41    pub end: size_t,
42}
43
44pub struct Captures(bytes::Locations);
45
46pub struct Iter {
47    re: *const Regex,
48    last_end: usize,
49    last_match: Option<usize>,
50}
51
52pub struct IterCaptureNames {
53    capture_names: bytes::CaptureNames<'static>,
54    name_ptrs: Vec<*mut c_char>,
55}
56
57impl Deref for Regex {
58    type Target = bytes::Regex;
59    fn deref(&self) -> &bytes::Regex {
60        &self.re
61    }
62}
63
64impl Deref for RegexSet {
65    type Target = bytes::RegexSet;
66    fn deref(&self) -> &bytes::RegexSet {
67        &self.re
68    }
69}
70
71impl Default for Options {
72    fn default() -> Options {
73        Options { size_limit: 10 * (1 << 20), dfa_size_limit: 2 * (1 << 20) }
74    }
75}
76
77ffi_fn! {
78    fn rure_compile_must(pattern: *const c_char) -> *const Regex {
79        let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() };
80        let pat = pattern as *const u8;
81        let mut err = Error::new(ErrorKind::None);
82        let re = rure_compile(
83            pat, len, RURE_DEFAULT_FLAGS, ptr::null(), &mut err);
84        if err.is_err() {
85            let _ = writeln!(&mut io::stderr(), "{}", err);
86            let _ = writeln!(
87                &mut io::stderr(), "aborting from rure_compile_must");
88            unsafe { abort() }
89        }
90        re
91    }
92}
93
94ffi_fn! {
95    fn rure_compile(
96        pattern: *const u8,
97        length: size_t,
98        flags: u32,
99        options: *const Options,
100        error: *mut Error,
101    ) -> *const Regex {
102        let pat = unsafe { slice::from_raw_parts(pattern, length) };
103        let pat = match str::from_utf8(pat) {
104            Ok(pat) => pat,
105            Err(err) => {
106                unsafe {
107                    if !error.is_null() {
108                        *error = Error::new(ErrorKind::Str(err));
109                    }
110                    return ptr::null();
111                }
112            }
113        };
114        let mut builder = bytes::RegexBuilder::new(pat);
115        if !options.is_null() {
116            let options = unsafe { &*options };
117            builder.size_limit(options.size_limit);
118            builder.dfa_size_limit(options.dfa_size_limit);
119        }
120        builder.case_insensitive(flags & RURE_FLAG_CASEI > 0);
121        builder.multi_line(flags & RURE_FLAG_MULTI > 0);
122        builder.dot_matches_new_line(flags & RURE_FLAG_DOTNL > 0);
123        builder.swap_greed(flags & RURE_FLAG_SWAP_GREED > 0);
124        builder.ignore_whitespace(flags & RURE_FLAG_SPACE > 0);
125        builder.unicode(flags & RURE_FLAG_UNICODE > 0);
126        match builder.build() {
127            Ok(re) => {
128                let mut capture_names = HashMap::new();
129                for (i, name) in re.capture_names().enumerate() {
130                    if let Some(name) = name {
131                        capture_names.insert(name.to_owned(), i as i32);
132                    }
133                }
134                let re = Regex {
135                    re: re,
136                    capture_names: capture_names,
137                };
138                Box::into_raw(Box::new(re))
139            }
140            Err(err) => {
141                unsafe {
142                    if !error.is_null() {
143                        *error = Error::new(ErrorKind::Regex(err));
144                    }
145                    ptr::null()
146                }
147            }
148        }
149    }
150}
151
152ffi_fn! {
153    fn rure_free(re: *const Regex) {
154        unsafe { drop(Box::from_raw(re as *mut Regex)); }
155    }
156}
157
158ffi_fn! {
159    fn rure_is_match(
160        re: *const Regex,
161        haystack: *const u8,
162        len: size_t,
163        start: size_t,
164    ) -> bool {
165        let re = unsafe { &*re };
166        let haystack = unsafe { slice::from_raw_parts(haystack, len) };
167        re.is_match_at(haystack, start)
168    }
169}
170
171ffi_fn! {
172    fn rure_find(
173        re: *const Regex,
174        haystack: *const u8,
175        len: size_t,
176        start: size_t,
177        match_info: *mut rure_match,
178    ) -> bool {
179        let re = unsafe { &*re };
180        let haystack = unsafe { slice::from_raw_parts(haystack, len) };
181        re.find_at(haystack, start).map(|m| unsafe {
182            if !match_info.is_null() {
183                (*match_info).start = m.start();
184                (*match_info).end = m.end();
185            }
186        }).is_some()
187    }
188}
189
190ffi_fn! {
191    fn rure_find_captures(
192        re: *const Regex,
193        haystack: *const u8,
194        len: size_t,
195        start: size_t,
196        captures: *mut Captures,
197    ) -> bool {
198        let re = unsafe { &*re };
199        let haystack = unsafe { slice::from_raw_parts(haystack, len) };
200        let slots = unsafe { &mut (*captures).0 };
201        re.read_captures_at(slots, haystack, start).is_some()
202    }
203}
204
205ffi_fn! {
206    fn rure_shortest_match(
207        re: *const Regex,
208        haystack: *const u8,
209        len: size_t,
210        start: size_t,
211        end: *mut usize,
212    ) -> bool {
213        let re = unsafe { &*re };
214        let haystack = unsafe { slice::from_raw_parts(haystack, len) };
215        match re.shortest_match_at(haystack, start) {
216            None => false,
217            Some(i) => {
218                if !end.is_null() {
219                    unsafe {
220                        *end = i;
221                    }
222                }
223                true
224            }
225        }
226    }
227}
228
229ffi_fn! {
230    fn rure_capture_name_index(
231        re: *const Regex,
232        name: *const c_char,
233    ) -> i32 {
234        let re = unsafe { &*re };
235        let name = unsafe { CStr::from_ptr(name) };
236        let name = match name.to_str() {
237            Err(_) => return -1,
238            Ok(name) => name,
239        };
240        re.capture_names.get(name).map(|&i|i).unwrap_or(-1)
241    }
242}
243
244ffi_fn! {
245    fn rure_iter_capture_names_new(
246        re: *const Regex,
247    ) -> *mut IterCaptureNames {
248        let re = unsafe { &*re };
249        Box::into_raw(Box::new(IterCaptureNames {
250            capture_names: re.re.capture_names(),
251            name_ptrs: Vec::new(),
252        }))
253    }
254}
255
256ffi_fn! {
257    fn rure_iter_capture_names_free(it: *mut IterCaptureNames) {
258        unsafe {
259            let it = &mut *it;
260            while let Some(ptr) = it.name_ptrs.pop() {
261                drop(CString::from_raw(ptr));
262            }
263            drop(Box::from_raw(it));
264        }
265    }
266}
267
268ffi_fn! {
269    fn rure_iter_capture_names_next(
270        it: *mut IterCaptureNames,
271        capture_name: *mut *mut c_char,
272    ) -> bool {
273        if capture_name.is_null() {
274            return false;
275        }
276
277        let it = unsafe { &mut *it };
278        let cn = match it.capture_names.next() {
279            // Top-level iterator ran out of capture groups
280            None => return false,
281            Some(val) => {
282                let name = match val {
283                    // inner Option didn't have a name
284                    None => "",
285                    Some(name) => name
286                };
287                name
288            }
289        };
290
291        unsafe {
292            let cs = match CString::new(cn.as_bytes()) {
293                Result::Ok(val) => val,
294                Result::Err(_) => return false
295            };
296            let ptr = cs.into_raw();
297            it.name_ptrs.push(ptr);
298            *capture_name = ptr;
299        }
300        true
301
302    }
303}
304
305ffi_fn! {
306    fn rure_iter_new(
307        re: *const Regex,
308    ) -> *mut Iter {
309        Box::into_raw(Box::new(Iter {
310            re: re,
311            last_end: 0,
312            last_match: None,
313        }))
314    }
315}
316
317ffi_fn! {
318    fn rure_iter_free(it: *mut Iter) {
319        unsafe { drop(Box::from_raw(it)); }
320    }
321}
322
323ffi_fn! {
324    fn rure_iter_next(
325        it: *mut Iter,
326        haystack: *const u8,
327        len: size_t,
328        match_info: *mut rure_match,
329    ) -> bool {
330        let it = unsafe { &mut *it };
331        let re = unsafe { &*it.re };
332        let text = unsafe { slice::from_raw_parts(haystack, len) };
333        if it.last_end > text.len() {
334            return false;
335        }
336        let (s, e) = match re.find_at(text, it.last_end) {
337            None => return false,
338            Some(m) => (m.start(), m.end()),
339        };
340        if s == e {
341            // This is an empty match. To ensure we make progress, start
342            // the next search at the smallest possible starting position
343            // of the next match following this one.
344            it.last_end += 1;
345            // Don't accept empty matches immediately following a match.
346            // Just move on to the next match.
347            if Some(e) == it.last_match {
348                return rure_iter_next(it, haystack, len, match_info);
349            }
350        } else {
351            it.last_end = e;
352        }
353        it.last_match = Some(e);
354        if !match_info.is_null() {
355            unsafe {
356                (*match_info).start = s;
357                (*match_info).end = e;
358            }
359        }
360        true
361    }
362}
363
364ffi_fn! {
365    fn rure_iter_next_captures(
366        it: *mut Iter,
367        haystack: *const u8,
368        len: size_t,
369        captures: *mut Captures,
370    ) -> bool {
371        let it = unsafe { &mut *it };
372        let re = unsafe { &*it.re };
373        let slots = unsafe { &mut (*captures).0 };
374        let text = unsafe { slice::from_raw_parts(haystack, len) };
375        if it.last_end > text.len() {
376            return false;
377        }
378        let (s, e) = match re.read_captures_at(slots, text, it.last_end) {
379            None => return false,
380            Some(m) => (m.start(), m.end()),
381        };
382        if s == e {
383            // This is an empty match. To ensure we make progress, start
384            // the next search at the smallest possible starting position
385            // of the next match following this one.
386            it.last_end += 1;
387            // Don't accept empty matches immediately following a match.
388            // Just move on to the next match.
389            if Some(e) == it.last_match {
390                return rure_iter_next_captures(it, haystack, len, captures);
391            }
392        } else {
393            it.last_end = e;
394        }
395        it.last_match = Some(e);
396        true
397    }
398}
399
400ffi_fn! {
401    fn rure_captures_new(re: *const Regex) -> *mut Captures {
402        let re = unsafe { &*re };
403        let captures = Captures(re.locations());
404        Box::into_raw(Box::new(captures))
405    }
406}
407
408ffi_fn! {
409    fn rure_captures_free(captures: *const Captures) {
410        unsafe { drop(Box::from_raw(captures as *mut Captures)); }
411    }
412}
413
414ffi_fn! {
415    fn rure_captures_at(
416        captures: *const Captures,
417        i: size_t,
418        match_info: *mut rure_match,
419    ) -> bool {
420        let locs = unsafe { &(*captures).0 };
421        match locs.pos(i) {
422            Some((start, end)) => {
423                if !match_info.is_null() {
424                    unsafe {
425                        (*match_info).start = start;
426                        (*match_info).end = end;
427                    }
428                }
429                true
430            }
431            _ => false
432        }
433    }
434}
435
436ffi_fn! {
437    fn rure_captures_len(captures: *const Captures) -> size_t {
438        unsafe { (*captures).0.len() }
439    }
440}
441
442ffi_fn! {
443    fn rure_options_new() -> *mut Options {
444        Box::into_raw(Box::new(Options::default()))
445    }
446}
447
448ffi_fn! {
449    fn rure_options_free(options: *mut Options) {
450        unsafe { drop(Box::from_raw(options)); }
451    }
452}
453
454ffi_fn! {
455    fn rure_options_size_limit(options: *mut Options, limit: size_t) {
456        let options = unsafe { &mut *options };
457        options.size_limit = limit;
458    }
459}
460
461ffi_fn! {
462    fn rure_options_dfa_size_limit(options: *mut Options, limit: size_t) {
463        let options = unsafe { &mut *options };
464        options.dfa_size_limit = limit;
465    }
466}
467
468ffi_fn! {
469    fn rure_compile_set(
470        patterns: *const *const u8,
471        patterns_lengths: *const size_t,
472        patterns_count: size_t,
473        flags: u32,
474        options: *const Options,
475        error: *mut Error
476    ) -> *const RegexSet {
477        let (raw_pats, raw_patsl) = unsafe {
478            (
479                slice::from_raw_parts(patterns, patterns_count),
480                slice::from_raw_parts(patterns_lengths, patterns_count)
481            )
482        };
483
484        let mut pats = Vec::with_capacity(patterns_count);
485        for (&raw_pat, &raw_patl) in raw_pats.iter().zip(raw_patsl) {
486            let pat = unsafe { slice::from_raw_parts(raw_pat, raw_patl) };
487            pats.push(match str::from_utf8(pat) {
488                Ok(pat) => pat,
489                Err(err) => {
490                    unsafe {
491                        if !error.is_null() {
492                            *error = Error::new(ErrorKind::Str(err));
493                        }
494                        return ptr::null();
495                    }
496                }
497            });
498        }
499
500        let mut builder = bytes::RegexSetBuilder::new(pats);
501        if !options.is_null() {
502            let options = unsafe { &*options };
503            builder.size_limit(options.size_limit);
504            builder.dfa_size_limit(options.dfa_size_limit);
505        }
506        builder.case_insensitive(flags & RURE_FLAG_CASEI > 0);
507        builder.multi_line(flags & RURE_FLAG_MULTI > 0);
508        builder.dot_matches_new_line(flags & RURE_FLAG_DOTNL > 0);
509        builder.swap_greed(flags & RURE_FLAG_SWAP_GREED > 0);
510        builder.ignore_whitespace(flags & RURE_FLAG_SPACE > 0);
511        builder.unicode(flags & RURE_FLAG_UNICODE > 0);
512        match builder.build() {
513            Ok(re) => {
514                Box::into_raw(Box::new(RegexSet { re: re }))
515            }
516            Err(err) => {
517                unsafe {
518                    if !error.is_null() {
519                        *error = Error::new(ErrorKind::Regex(err))
520                    }
521                    ptr::null()
522                }
523            }
524        }
525    }
526}
527
528ffi_fn! {
529    fn rure_set_free(re: *const RegexSet) {
530        unsafe { drop(Box::from_raw(re as *mut RegexSet)); }
531    }
532}
533
534ffi_fn! {
535    fn rure_set_is_match(
536        re: *const RegexSet,
537        haystack: *const u8,
538        len: size_t,
539        start: size_t
540    ) -> bool {
541        let re = unsafe { &*re };
542        let haystack = unsafe { slice::from_raw_parts(haystack, len) };
543        re.is_match_at(haystack, start)
544    }
545}
546
547ffi_fn! {
548    fn rure_set_matches(
549        re: *const RegexSet,
550        haystack: *const u8,
551        len: size_t,
552        start: size_t,
553        matches: *mut bool
554    ) -> bool {
555        let re = unsafe { &*re };
556        let mut matches = unsafe {
557            slice::from_raw_parts_mut(matches, re.len())
558        };
559        let haystack = unsafe { slice::from_raw_parts(haystack, len) };
560
561        // read_matches_at isn't guaranteed to set non-matches to false
562        for item in matches.iter_mut() {
563            *item = false;
564        }
565        re.read_matches_at(&mut matches, haystack, start)
566    }
567}
568
569ffi_fn! {
570    fn rure_set_len(re: *const RegexSet) -> size_t {
571        unsafe { (*re).len() }
572    }
573}
574
575ffi_fn! {
576    fn rure_escape_must(pattern: *const c_char) -> *const c_char {
577        let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() };
578        let pat = pattern as *const u8;
579        let mut err = Error::new(ErrorKind::None);
580        let esc = rure_escape(pat, len, &mut err);
581        if err.is_err() {
582            let _ = writeln!(&mut io::stderr(), "{}", err);
583            let _ = writeln!(
584                &mut io::stderr(), "aborting from rure_escape_must");
585            unsafe { abort() }
586        }
587        esc
588    }
589}
590
591/// A helper function that implements fallible escaping in a way that returns
592/// an error if escaping failed.
593///
594/// This should ideally be exposed, but it needs API design work. In
595/// particular, this should not return a C string, but a `const uint8_t *`
596/// instead, since it may contain a NUL byte.
597fn rure_escape(
598    pattern: *const u8,
599    length: size_t,
600    error: *mut Error,
601) -> *const c_char {
602    let pat: &[u8] = unsafe { slice::from_raw_parts(pattern, length) };
603    let str_pat = match str::from_utf8(pat) {
604        Ok(val) => val,
605        Err(err) => unsafe {
606            if !error.is_null() {
607                *error = Error::new(ErrorKind::Str(err));
608            }
609            return ptr::null();
610        },
611    };
612    let esc_pat = regex::escape(str_pat);
613    let c_esc_pat = match CString::new(esc_pat) {
614        Ok(val) => val,
615        Err(err) => unsafe {
616            if !error.is_null() {
617                *error = Error::new(ErrorKind::Nul(err));
618            }
619            return ptr::null();
620        },
621    };
622    c_esc_pat.into_raw() as *const c_char
623}
624
625ffi_fn! {
626    fn rure_cstring_free(s: *mut c_char) {
627        unsafe { drop(CString::from_raw(s)); }
628    }
629}
630