1use std::iter::repeat; 2 3/// Create a sequence of tests that should be run by memchr implementations. 4pub fn memchr_tests() -> Vec<MemchrTest> { 5 let mut tests = Vec::new(); 6 for statict in MEMCHR_TESTS { 7 assert!(!statict.corpus.contains("%"), "% is not allowed in corpora"); 8 assert!(!statict.corpus.contains("#"), "# is not allowed in corpora"); 9 assert!(!statict.needles.contains(&b'%'), "% is an invalid needle"); 10 assert!(!statict.needles.contains(&b'#'), "# is an invalid needle"); 11 12 let t = MemchrTest { 13 corpus: statict.corpus.to_string(), 14 needles: statict.needles.to_vec(), 15 positions: statict.positions.to_vec(), 16 }; 17 tests.push(t.clone()); 18 tests.extend(t.expand()); 19 } 20 tests 21} 22 23/// A set of tests for memchr-like functions. 24/// 25/// These tests mostly try to cover the short string cases. We cover the longer 26/// string cases via the benchmarks (which are tests themselves), via 27/// quickcheck tests and via automatic expansion of each test case (by 28/// increasing the corpus size). Finally, we cover different alignment cases 29/// in the tests by varying the starting point of the slice. 30const MEMCHR_TESTS: &[MemchrTestStatic] = &[ 31 // one needle (applied to memchr + memchr2 + memchr3) 32 MemchrTestStatic { corpus: "a", needles: &[b'a'], positions: &[0] }, 33 MemchrTestStatic { corpus: "aa", needles: &[b'a'], positions: &[0, 1] }, 34 MemchrTestStatic { 35 corpus: "aaa", 36 needles: &[b'a'], 37 positions: &[0, 1, 2], 38 }, 39 MemchrTestStatic { corpus: "", needles: &[b'a'], positions: &[] }, 40 MemchrTestStatic { corpus: "z", needles: &[b'a'], positions: &[] }, 41 MemchrTestStatic { corpus: "zz", needles: &[b'a'], positions: &[] }, 42 MemchrTestStatic { corpus: "zza", needles: &[b'a'], positions: &[2] }, 43 MemchrTestStatic { corpus: "zaza", needles: &[b'a'], positions: &[1, 3] }, 44 MemchrTestStatic { corpus: "zzza", needles: &[b'a'], positions: &[3] }, 45 MemchrTestStatic { corpus: "\x00a", needles: &[b'a'], positions: &[1] }, 46 MemchrTestStatic { corpus: "\x00", needles: &[b'\x00'], positions: &[0] }, 47 MemchrTestStatic { 48 corpus: "\x00\x00", 49 needles: &[b'\x00'], 50 positions: &[0, 1], 51 }, 52 MemchrTestStatic { 53 corpus: "\x00a\x00", 54 needles: &[b'\x00'], 55 positions: &[0, 2], 56 }, 57 MemchrTestStatic { 58 corpus: "zzzzzzzzzzzzzzzza", 59 needles: &[b'a'], 60 positions: &[16], 61 }, 62 MemchrTestStatic { 63 corpus: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza", 64 needles: &[b'a'], 65 positions: &[32], 66 }, 67 // two needles (applied to memchr2 + memchr3) 68 MemchrTestStatic { 69 corpus: "az", 70 needles: &[b'a', b'z'], 71 positions: &[0, 1], 72 }, 73 MemchrTestStatic { 74 corpus: "az", 75 needles: &[b'a', b'z'], 76 positions: &[0, 1], 77 }, 78 MemchrTestStatic { corpus: "az", needles: &[b'x', b'y'], positions: &[] }, 79 MemchrTestStatic { corpus: "az", needles: &[b'a', b'y'], positions: &[0] }, 80 MemchrTestStatic { corpus: "az", needles: &[b'x', b'z'], positions: &[1] }, 81 MemchrTestStatic { 82 corpus: "yyyyaz", 83 needles: &[b'a', b'z'], 84 positions: &[4, 5], 85 }, 86 MemchrTestStatic { 87 corpus: "yyyyaz", 88 needles: &[b'z', b'a'], 89 positions: &[4, 5], 90 }, 91 // three needles (applied to memchr3) 92 MemchrTestStatic { 93 corpus: "xyz", 94 needles: &[b'x', b'y', b'z'], 95 positions: &[0, 1, 2], 96 }, 97 MemchrTestStatic { 98 corpus: "zxy", 99 needles: &[b'x', b'y', b'z'], 100 positions: &[0, 1, 2], 101 }, 102 MemchrTestStatic { 103 corpus: "zxy", 104 needles: &[b'x', b'a', b'z'], 105 positions: &[0, 1], 106 }, 107 MemchrTestStatic { 108 corpus: "zxy", 109 needles: &[b't', b'a', b'z'], 110 positions: &[0], 111 }, 112 MemchrTestStatic { 113 corpus: "yxz", 114 needles: &[b't', b'a', b'z'], 115 positions: &[2], 116 }, 117]; 118 119/// A description of a test on a memchr like function. 120#[derive(Clone, Debug)] 121pub struct MemchrTest { 122 /// The thing to search. We use `&str` instead of `&[u8]` because they 123 /// are nicer to write in tests, and we don't miss much since memchr 124 /// doesn't care about UTF-8. 125 /// 126 /// Corpora cannot contain either '%' or '#'. We use these bytes when 127 /// expanding test cases into many test cases, and we assume they are not 128 /// used. If they are used, `memchr_tests` will panic. 129 corpus: String, 130 /// The needles to search for. This is intended to be an "alternation" of 131 /// needles. The number of needles may cause this test to be skipped for 132 /// some memchr variants. For example, a test with 2 needles cannot be used 133 /// to test `memchr`, but can be used to test `memchr2` and `memchr3`. 134 /// However, a test with only 1 needle can be used to test all of `memchr`, 135 /// `memchr2` and `memchr3`. We achieve this by filling in the needles with 136 /// bytes that we never used in the corpus (such as '#'). 137 needles: Vec<u8>, 138 /// The positions expected to match for all of the needles. 139 positions: Vec<usize>, 140} 141 142/// Like MemchrTest, but easier to define as a constant. 143#[derive(Clone, Debug)] 144pub struct MemchrTestStatic { 145 corpus: &'static str, 146 needles: &'static [u8], 147 positions: &'static [usize], 148} 149 150impl MemchrTest { 151 pub fn one<F: Fn(u8, &[u8]) -> Option<usize>>(&self, reverse: bool, f: F) { 152 let needles = match self.needles(1) { 153 None => return, 154 Some(needles) => needles, 155 }; 156 // We test different alignments here. Since some implementations use 157 // AVX2, which can read 32 bytes at a time, we test at least that. 158 // Moreover, with loop unrolling, we sometimes process 64 (sse2) or 128 159 // (avx) bytes at a time, so we include that in our offsets as well. 160 // 161 // You might think this would cause most needles to not be found, but 162 // we actually expand our tests to include corpus sizes all the way up 163 // to >500 bytes, so we should exercise most branches. 164 for align in 0..130 { 165 let corpus = self.corpus(align); 166 assert_eq!( 167 self.positions(align, reverse).get(0).cloned(), 168 f(needles[0], corpus.as_bytes()), 169 "search for {:?} failed in: {:?} (len: {}, alignment: {})", 170 needles[0] as char, 171 corpus, 172 corpus.len(), 173 align 174 ); 175 } 176 } 177 178 pub fn two<F: Fn(u8, u8, &[u8]) -> Option<usize>>( 179 &self, 180 reverse: bool, 181 f: F, 182 ) { 183 let needles = match self.needles(2) { 184 None => return, 185 Some(needles) => needles, 186 }; 187 for align in 0..130 { 188 let corpus = self.corpus(align); 189 assert_eq!( 190 self.positions(align, reverse).get(0).cloned(), 191 f(needles[0], needles[1], corpus.as_bytes()), 192 "search for {:?}|{:?} failed in: {:?} \ 193 (len: {}, alignment: {})", 194 needles[0] as char, 195 needles[1] as char, 196 corpus, 197 corpus.len(), 198 align 199 ); 200 } 201 } 202 203 pub fn three<F: Fn(u8, u8, u8, &[u8]) -> Option<usize>>( 204 &self, 205 reverse: bool, 206 f: F, 207 ) { 208 let needles = match self.needles(3) { 209 None => return, 210 Some(needles) => needles, 211 }; 212 for align in 0..130 { 213 let corpus = self.corpus(align); 214 assert_eq!( 215 self.positions(align, reverse).get(0).cloned(), 216 f(needles[0], needles[1], needles[2], corpus.as_bytes()), 217 "search for {:?}|{:?}|{:?} failed in: {:?} \ 218 (len: {}, alignment: {})", 219 needles[0] as char, 220 needles[1] as char, 221 needles[2] as char, 222 corpus, 223 corpus.len(), 224 align 225 ); 226 } 227 } 228 229 pub fn iter_one<'a, I, F>(&'a self, reverse: bool, f: F) 230 where 231 F: FnOnce(u8, &'a [u8]) -> I, 232 I: Iterator<Item = usize>, 233 { 234 if let Some(ns) = self.needles(1) { 235 self.iter(reverse, f(ns[0], self.corpus.as_bytes())); 236 } 237 } 238 239 pub fn iter_two<'a, I, F>(&'a self, reverse: bool, f: F) 240 where 241 F: FnOnce(u8, u8, &'a [u8]) -> I, 242 I: Iterator<Item = usize>, 243 { 244 if let Some(ns) = self.needles(2) { 245 self.iter(reverse, f(ns[0], ns[1], self.corpus.as_bytes())); 246 } 247 } 248 249 pub fn iter_three<'a, I, F>(&'a self, reverse: bool, f: F) 250 where 251 F: FnOnce(u8, u8, u8, &'a [u8]) -> I, 252 I: Iterator<Item = usize>, 253 { 254 if let Some(ns) = self.needles(3) { 255 self.iter(reverse, f(ns[0], ns[1], ns[2], self.corpus.as_bytes())); 256 } 257 } 258 259 /// Test that the positions yielded by the given iterator match the 260 /// positions in this test. If reverse is true, then reverse the positions 261 /// before comparing them. 262 fn iter<I: Iterator<Item = usize>>(&self, reverse: bool, it: I) { 263 assert_eq!( 264 self.positions(0, reverse), 265 it.collect::<Vec<usize>>(), 266 r"search for {:?} failed in: {:?}", 267 self.needles.iter().map(|&b| b as char).collect::<Vec<char>>(), 268 self.corpus 269 ); 270 } 271 272 /// Expand this test into many variations of the same test. 273 /// 274 /// In particular, this will generate more tests with larger corpus sizes. 275 /// The expected positions are updated to maintain the integrity of the 276 /// test. 277 /// 278 /// This is important in testing a memchr implementation, because there are 279 /// often different cases depending on the length of the corpus. 280 /// 281 /// Note that we extend the corpus by adding `%` bytes, which we 282 /// don't otherwise use as a needle. 283 fn expand(&self) -> Vec<MemchrTest> { 284 let mut more = Vec::new(); 285 286 // Add bytes to the start of the corpus. 287 for i in 1..515 { 288 let mut t = self.clone(); 289 let mut new_corpus: String = repeat('%').take(i).collect(); 290 new_corpus.push_str(&t.corpus); 291 t.corpus = new_corpus; 292 t.positions = t.positions.into_iter().map(|p| p + i).collect(); 293 more.push(t); 294 } 295 // Add bytes to the end of the corpus. 296 for i in 1..515 { 297 let mut t = self.clone(); 298 let padding: String = repeat('%').take(i).collect(); 299 t.corpus.push_str(&padding); 300 more.push(t); 301 } 302 303 more 304 } 305 306 /// Return the corpus at the given alignment. 307 /// 308 /// If the alignment exceeds the length of the corpus, then this returns 309 /// an empty slice. 310 fn corpus(&self, align: usize) -> &str { 311 self.corpus.get(align..).unwrap_or("") 312 } 313 314 /// Return exactly `count` needles from this test. If this test has less 315 /// than `count` needles, then add `#` until the number of needles 316 /// matches `count`. If this test has more than `count` needles, then 317 /// return `None` (because there is no way to use this test data for a 318 /// search using fewer needles). 319 fn needles(&self, count: usize) -> Option<Vec<u8>> { 320 if self.needles.len() > count { 321 return None; 322 } 323 324 let mut needles = self.needles.to_vec(); 325 for _ in needles.len()..count { 326 // we assume # is never used in tests. 327 needles.push(b'#'); 328 } 329 Some(needles) 330 } 331 332 /// Return the positions in this test, reversed if `reverse` is true. 333 /// 334 /// If alignment is given, then all positions greater than or equal to that 335 /// alignment are offset by the alignment. Positions less than the 336 /// alignment are dropped. 337 fn positions(&self, align: usize, reverse: bool) -> Vec<usize> { 338 let positions = if reverse { 339 let mut positions = self.positions.to_vec(); 340 positions.reverse(); 341 positions 342 } else { 343 self.positions.to_vec() 344 }; 345 positions 346 .into_iter() 347 .filter(|&p| p >= align) 348 .map(|p| p - align) 349 .collect() 350 } 351} 352