1#![allow(non_camel_case_types)] 2 3use std::mem; 4use std::ptr; 5use std::sync::Once; 6 7use libc::{c_char, c_int, c_long, c_void}; 8 9// Used to initialize the TCL interpreter exactly once. 10static ONCE: Once = Once::new(); 11 12/// Text is a TCL string object backed by a Rust string. 13/// 14/// This is a special type that is created once per benchmark and is not 15/// included in timings. In particular, all regex searches execute on values 16/// of this type, so we're careful to avoid the overhead of creating such 17/// objects on every search. 18pub struct Text { 19 s: String, 20 obj: *mut tcl_obj, 21} 22 23// TCL's objects are ref-counted in a thread-unsafe manner, which would 24// normally disqualify a Send bound. However, we don't permit Text to be used 25// in a way that can lead to unsafety. In particular, the ref count is always 26// 1, until it is dropped, in which the ref count is decreased to zero and 27// the underlying memory is freed. 28unsafe impl Send for Text {} 29 30impl Drop for Text { 31 fn drop(&mut self) { 32 unsafe { 33 assert_eq!((*self.obj).ref_count, 1); 34 // This will drop the ref count to 0 and cause it to be freed. 35 (*self.obj).decr_ref_count(); 36 } 37 } 38} 39 40impl Text { 41 pub fn new(text: String) -> Text { 42 let ptr = text.as_ptr() as *const c_char; 43 let len = text.len() as c_int; 44 let obj = unsafe { Tcl_NewStringObj(ptr, len) }; 45 unsafe { 46 (*obj).incr_ref_count(); 47 } 48 Text { s: text, obj: obj } 49 } 50 51 pub fn len(&self) -> usize { 52 self.s.len() 53 } 54} 55 56/// Regex wraps a TCL regex. It owns a TCL string object and a pointer to a 57/// regexp object. The two share storage. 58/// 59/// There's no Drop impl for Regex because the memory for the regex will be 60/// freed when `pat` is dropped. 61pub struct Regex { 62 pat: Text, 63 re: *mut tcl_regexp, 64} 65 66unsafe impl Send for Regex {} 67 68#[derive(Debug)] 69pub struct Error(()); 70 71impl Regex { 72 pub fn new(pattern: &str) -> Result<Regex, Error> { 73 ONCE.call_once(|| unsafe { 74 Tcl_CreateInterp(); 75 }); 76 77 let pat = Text::new(pattern.to_owned()); 78 let re = unsafe { 79 Tcl_GetRegExpFromObj(ptr::null_mut(), pat.obj, TCL_REG_ADVANCED) 80 }; 81 if re.is_null() { 82 return Err(Error(())); 83 } 84 Ok(Regex { pat: pat, re: re }) 85 } 86 87 pub fn is_match(&self, text: &Text) -> bool { 88 let result = unsafe { 89 Tcl_RegExpExecObj(ptr::null_mut(), self.re, text.obj, 0, 1, 0) 90 }; 91 if result == -1 { 92 panic!("Tcl_RegExpExecObj failed"); 93 } 94 result > 0 95 } 96 97 pub fn find_iter<'r, 't>(&'r self, text: &'t Text) -> FindMatches<'r, 't> { 98 FindMatches { re: self, text: text, last_match: 0 } 99 } 100 101 fn find_at(&self, text: &Text, start: usize) -> Option<(usize, usize)> { 102 let result = unsafe { 103 Tcl_RegExpExecObj( 104 ptr::null_mut(), 105 self.re, 106 text.obj, 107 start as c_int, 108 1, 109 0, 110 ) 111 }; 112 if result == -1 { 113 panic!("Tcl_RegExpExecObj failed"); 114 } else if result == 0 { 115 return None; 116 } 117 let mut info: tcl_regexp_info = unsafe { mem::zeroed() }; 118 unsafe { 119 Tcl_RegExpGetInfo(self.re, &mut info); 120 let s = start as c_long + (*info.matches).start; 121 let e = start as c_long + (*info.matches).end; 122 Some((s as usize, e as usize)) 123 } 124 } 125} 126 127pub struct FindMatches<'r, 't> { 128 re: &'r Regex, 129 text: &'t Text, 130 last_match: usize, 131} 132 133impl<'r, 't> Iterator for FindMatches<'r, 't> { 134 type Item = (usize, usize); 135 136 fn next(&mut self) -> Option<(usize, usize)> { 137 match self.re.find_at(self.text, self.last_match) { 138 None => None, 139 Some((s, e)) => { 140 self.last_match = e; 141 Some((s, e)) 142 } 143 } 144 } 145} 146 147// TCL's FFI. We only wrap the bits we need. 148 149const TCL_REG_ADVANCED: c_int = 3; 150 151type tcl_interp = c_void; 152type tcl_regexp = c_void; 153 154#[repr(C)] 155struct tcl_obj { 156 ref_count: c_int, 157 // There are more fields, but we don't care about them. 158 // We're careful to only access ref_count so we can increment/decrement it. 159 // This is necessary because Tcl_IncRefCount and Tcl_DecrRefCount are 160 // macros. 161} 162 163impl tcl_obj { 164 unsafe fn incr_ref_count(&mut self) { 165 self.ref_count += 1; 166 } 167 168 unsafe fn decr_ref_count(&mut self) { 169 self.ref_count -= 1; 170 if self.ref_count <= 0 { 171 TclFreeObj(self); 172 } 173 } 174} 175 176#[repr(C)] 177struct tcl_regexp_info { 178 nsubs: c_int, 179 matches: *mut tcl_regexp_indices, 180 extend_start: c_long, 181 reserved: c_long, 182} 183 184#[repr(C)] 185struct tcl_regexp_indices { 186 start: c_long, 187 end: c_long, 188} 189 190extern "C" { 191 fn Tcl_CreateInterp() -> *mut tcl_interp; 192 193 fn Tcl_NewStringObj(pat: *const c_char, len: c_int) -> *mut tcl_obj; 194 195 fn TclFreeObj(obj: *mut tcl_obj); 196 197 fn Tcl_GetRegExpFromObj( 198 int: *mut tcl_interp, 199 pat: *mut tcl_obj, 200 flags: c_int, 201 ) -> *mut tcl_regexp; 202 203 fn Tcl_RegExpExecObj( 204 int: *mut tcl_interp, 205 re: *mut tcl_regexp, 206 text: *mut tcl_obj, 207 offset: c_int, 208 nmatches: c_int, 209 flags: c_int, 210 ) -> c_int; 211 212 fn Tcl_RegExpGetInfo(re: *mut tcl_regexp, info: *mut tcl_regexp_info); 213} 214