xref: /third_party/rust/crates/regex/bench/src/ffi/tcl.rs (revision c67d6573)
1#![allow(non_camel_case_types)]
2
3use std::mem;
4use std::ptr;
5use std::sync::Once;
6
7use libc::{c_char, c_int, c_long, c_void};
8
9// Used to initialize the TCL interpreter exactly once.
10static ONCE: Once = Once::new();
11
12/// Text is a TCL string object backed by a Rust string.
13///
14/// This is a special type that is created once per benchmark and is not
15/// included in timings. In particular, all regex searches execute on values
16/// of this type, so we're careful to avoid the overhead of creating such
17/// objects on every search.
18pub struct Text {
19    s: String,
20    obj: *mut tcl_obj,
21}
22
23// TCL's objects are ref-counted in a thread-unsafe manner, which would
24// normally disqualify a Send bound. However, we don't permit Text to be used
25// in a way that can lead to unsafety. In particular, the ref count is always
26// 1, until it is dropped, in which the ref count is decreased to zero and
27// the underlying memory is freed.
28unsafe impl Send for Text {}
29
30impl Drop for Text {
31    fn drop(&mut self) {
32        unsafe {
33            assert_eq!((*self.obj).ref_count, 1);
34            // This will drop the ref count to 0 and cause it to be freed.
35            (*self.obj).decr_ref_count();
36        }
37    }
38}
39
40impl Text {
41    pub fn new(text: String) -> Text {
42        let ptr = text.as_ptr() as *const c_char;
43        let len = text.len() as c_int;
44        let obj = unsafe { Tcl_NewStringObj(ptr, len) };
45        unsafe {
46            (*obj).incr_ref_count();
47        }
48        Text { s: text, obj: obj }
49    }
50
51    pub fn len(&self) -> usize {
52        self.s.len()
53    }
54}
55
56/// Regex wraps a TCL regex. It owns a TCL string object and a pointer to a
57/// regexp object. The two share storage.
58///
59/// There's no Drop impl for Regex because the memory for the regex will be
60/// freed when `pat` is dropped.
61pub struct Regex {
62    pat: Text,
63    re: *mut tcl_regexp,
64}
65
66unsafe impl Send for Regex {}
67
68#[derive(Debug)]
69pub struct Error(());
70
71impl Regex {
72    pub fn new(pattern: &str) -> Result<Regex, Error> {
73        ONCE.call_once(|| unsafe {
74            Tcl_CreateInterp();
75        });
76
77        let pat = Text::new(pattern.to_owned());
78        let re = unsafe {
79            Tcl_GetRegExpFromObj(ptr::null_mut(), pat.obj, TCL_REG_ADVANCED)
80        };
81        if re.is_null() {
82            return Err(Error(()));
83        }
84        Ok(Regex { pat: pat, re: re })
85    }
86
87    pub fn is_match(&self, text: &Text) -> bool {
88        let result = unsafe {
89            Tcl_RegExpExecObj(ptr::null_mut(), self.re, text.obj, 0, 1, 0)
90        };
91        if result == -1 {
92            panic!("Tcl_RegExpExecObj failed");
93        }
94        result > 0
95    }
96
97    pub fn find_iter<'r, 't>(&'r self, text: &'t Text) -> FindMatches<'r, 't> {
98        FindMatches { re: self, text: text, last_match: 0 }
99    }
100
101    fn find_at(&self, text: &Text, start: usize) -> Option<(usize, usize)> {
102        let result = unsafe {
103            Tcl_RegExpExecObj(
104                ptr::null_mut(),
105                self.re,
106                text.obj,
107                start as c_int,
108                1,
109                0,
110            )
111        };
112        if result == -1 {
113            panic!("Tcl_RegExpExecObj failed");
114        } else if result == 0 {
115            return None;
116        }
117        let mut info: tcl_regexp_info = unsafe { mem::zeroed() };
118        unsafe {
119            Tcl_RegExpGetInfo(self.re, &mut info);
120            let s = start as c_long + (*info.matches).start;
121            let e = start as c_long + (*info.matches).end;
122            Some((s as usize, e as usize))
123        }
124    }
125}
126
127pub struct FindMatches<'r, 't> {
128    re: &'r Regex,
129    text: &'t Text,
130    last_match: usize,
131}
132
133impl<'r, 't> Iterator for FindMatches<'r, 't> {
134    type Item = (usize, usize);
135
136    fn next(&mut self) -> Option<(usize, usize)> {
137        match self.re.find_at(self.text, self.last_match) {
138            None => None,
139            Some((s, e)) => {
140                self.last_match = e;
141                Some((s, e))
142            }
143        }
144    }
145}
146
147// TCL's FFI. We only wrap the bits we need.
148
149const TCL_REG_ADVANCED: c_int = 3;
150
151type tcl_interp = c_void;
152type tcl_regexp = c_void;
153
154#[repr(C)]
155struct tcl_obj {
156    ref_count: c_int,
157    // There are more fields, but we don't care about them.
158    // We're careful to only access ref_count so we can increment/decrement it.
159    // This is necessary because Tcl_IncRefCount and Tcl_DecrRefCount are
160    // macros.
161}
162
163impl tcl_obj {
164    unsafe fn incr_ref_count(&mut self) {
165        self.ref_count += 1;
166    }
167
168    unsafe fn decr_ref_count(&mut self) {
169        self.ref_count -= 1;
170        if self.ref_count <= 0 {
171            TclFreeObj(self);
172        }
173    }
174}
175
176#[repr(C)]
177struct tcl_regexp_info {
178    nsubs: c_int,
179    matches: *mut tcl_regexp_indices,
180    extend_start: c_long,
181    reserved: c_long,
182}
183
184#[repr(C)]
185struct tcl_regexp_indices {
186    start: c_long,
187    end: c_long,
188}
189
190extern "C" {
191    fn Tcl_CreateInterp() -> *mut tcl_interp;
192
193    fn Tcl_NewStringObj(pat: *const c_char, len: c_int) -> *mut tcl_obj;
194
195    fn TclFreeObj(obj: *mut tcl_obj);
196
197    fn Tcl_GetRegExpFromObj(
198        int: *mut tcl_interp,
199        pat: *mut tcl_obj,
200        flags: c_int,
201    ) -> *mut tcl_regexp;
202
203    fn Tcl_RegExpExecObj(
204        int: *mut tcl_interp,
205        re: *mut tcl_regexp,
206        text: *mut tcl_obj,
207        offset: c_int,
208        nmatches: c_int,
209        flags: c_int,
210    ) -> c_int;
211
212    fn Tcl_RegExpGetInfo(re: *mut tcl_regexp, info: *mut tcl_regexp_info);
213}
214