1mat!(ascii_literal, r"a", "a", Some((0, 1))); 2 3// Some crazy expressions from regular-expressions.info. 4mat!( 5 match_ranges, 6 r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", 7 "num: 255", 8 Some((5, 8)) 9); 10mat!( 11 match_ranges_not, 12 r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", 13 "num: 256", 14 None 15); 16mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3))); 17mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3))); 18mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4))); 19mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None); 20mat!( 21 match_email, 22 r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", 23 "mine is jam.slam@gmail.com ", 24 Some((8, 26)) 25); 26mat!( 27 match_email_not, 28 r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", 29 "mine is jam.slam@gmail ", 30 None 31); 32mat!( 33 match_email_big, 34 r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?", 35 "mine is jam.slam@gmail.com ", 36 Some((8, 26)) 37); 38mat!( 39 match_date1, 40 r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", 41 "1900-01-01", 42 Some((0, 10)) 43); 44mat!( 45 match_date2, 46 r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", 47 "1900-00-01", 48 None 49); 50mat!( 51 match_date3, 52 r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", 53 "1900-13-01", 54 None 55); 56 57// Do some crazy dancing with the start/end assertions. 58matiter!(match_start_end_empty, r"^$", "", (0, 0)); 59matiter!(match_start_end_empty_many_1, r"^$^$^$", "", (0, 0)); 60matiter!(match_start_end_empty_many_2, r"^^^$$$", "", (0, 0)); 61matiter!(match_start_end_empty_rev, r"$^", "", (0, 0)); 62matiter!( 63 match_start_end_empty_rep, 64 r"(?:^$)*", 65 "a\nb\nc", 66 (0, 0), 67 (1, 1), 68 (2, 2), 69 (3, 3), 70 (4, 4), 71 (5, 5) 72); 73matiter!( 74 match_start_end_empty_rep_rev, 75 r"(?:$^)*", 76 "a\nb\nc", 77 (0, 0), 78 (1, 1), 79 (2, 2), 80 (3, 3), 81 (4, 4), 82 (5, 5) 83); 84 85// Test negated character classes. 86mat!(negclass_letters, r"[^ac]", "acx", Some((2, 3))); 87mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3))); 88mat!(negclass_letter_space, r"[^a[:space:]]", "a x", Some((2, 3))); 89mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3))); 90mat!(negclass_space, r"[^[:space:]]", " a", Some((1, 2))); 91mat!(negclass_space_comma, r"[^,[:space:]]", ", a", Some((2, 3))); 92mat!(negclass_comma_space, r"[^[:space:],]", " ,a", Some((2, 3))); 93mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2))); 94 95// Test that repeated empty expressions don't loop forever. 96mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2))); 97mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2))); 98mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2))); 99mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2))); 100mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2))); 101mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2))); 102mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2))); 103mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2))); 104mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2))); 105mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2))); 106mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2))); 107mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2))); 108 109// Test that we handle various flavors of empty expressions. 110matiter!(match_empty1, r"", "", (0, 0)); 111matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 112matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 113matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 114matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 115matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 116matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 117matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 118matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 119matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 120matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3)); 121matiter!(match_empty12, r"|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 122matiter!(match_empty13, r"b|", "abc", (0, 0), (1, 2), (3, 3)); 123matiter!(match_empty14, r"|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 124matiter!(match_empty15, r"z|", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 125matiter!(match_empty16, r"|", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 126matiter!(match_empty17, r"||", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 127matiter!(match_empty18, r"||z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 128matiter!(match_empty19, r"(?:)|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 129matiter!(match_empty20, r"b|(?:)", "abc", (0, 0), (1, 2), (3, 3)); 130matiter!(match_empty21, r"(?:|)", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 131matiter!(match_empty22, r"(?:|)|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); 132matiter!(match_empty23, r"a(?:)|b", "abc", (0, 1), (1, 2)); 133 134// Test that the DFA can handle pathological cases. 135// (This should result in the DFA's cache being flushed too frequently, which 136// should cause it to quit and fall back to the NFA algorithm.) 137#[test] 138fn dfa_handles_pathological_case() { 139 fn ones_and_zeroes(count: usize) -> String { 140 use rand::rngs::SmallRng; 141 use rand::{Rng, SeedableRng}; 142 143 let mut rng = SmallRng::from_entropy(); 144 let mut s = String::new(); 145 for _ in 0..count { 146 if rng.gen() { 147 s.push('1'); 148 } else { 149 s.push('0'); 150 } 151 } 152 s 153 } 154 155 let re = regex!(r"[01]*1[01]{20}$"); 156 let text = { 157 let mut pieces = ones_and_zeroes(100_000); 158 pieces.push('1'); 159 pieces.push_str(&ones_and_zeroes(20)); 160 pieces 161 }; 162 assert!(re.is_match(text!(&*text))); 163} 164 165#[test] 166fn nest_limit_makes_it_parse() { 167 use regex::RegexBuilder; 168 169 RegexBuilder::new( 170 r#"(?-u) 171 2(?: 172 [45]\d{3}| 173 7(?: 174 1[0-267]| 175 2[0-289]| 176 3[0-29]| 177 4[01]| 178 5[1-3]| 179 6[013]| 180 7[0178]| 181 91 182 )| 183 8(?: 184 0[125]| 185 [139][1-6]| 186 2[0157-9]| 187 41| 188 6[1-35]| 189 7[1-5]| 190 8[1-8]| 191 90 192 )| 193 9(?: 194 0[0-2]| 195 1[0-4]| 196 2[568]| 197 3[3-6]| 198 5[5-7]| 199 6[0167]| 200 7[15]| 201 8[0146-9] 202 ) 203 )\d{4}| 204 3(?: 205 12?[5-7]\d{2}| 206 0(?: 207 2(?: 208 [025-79]\d| 209 [348]\d{1,2} 210 )| 211 3(?: 212 [2-4]\d| 213 [56]\d? 214 ) 215 )| 216 2(?: 217 1\d{2}| 218 2(?: 219 [12]\d| 220 [35]\d{1,2}| 221 4\d? 222 ) 223 )| 224 3(?: 225 1\d{2}| 226 2(?: 227 [2356]\d| 228 4\d{1,2} 229 ) 230 )| 231 4(?: 232 1\d{2}| 233 2(?: 234 2\d{1,2}| 235 [47]| 236 5\d{2} 237 ) 238 )| 239 5(?: 240 1\d{2}| 241 29 242 )| 243 [67]1\d{2}| 244 8(?: 245 1\d{2}| 246 2(?: 247 2\d{2}| 248 3| 249 4\d 250 ) 251 ) 252 )\d{3}| 253 4(?: 254 0(?: 255 2(?: 256 [09]\d| 257 7 258 )| 259 33\d{2} 260 )| 261 1\d{3}| 262 2(?: 263 1\d{2}| 264 2(?: 265 [25]\d?| 266 [348]\d| 267 [67]\d{1,2} 268 ) 269 )| 270 3(?: 271 1\d{2}(?: 272 \d{2} 273 )?| 274 2(?: 275 [045]\d| 276 [236-9]\d{1,2} 277 )| 278 32\d{2} 279 )| 280 4(?: 281 [18]\d{2}| 282 2(?: 283 [2-46]\d{2}| 284 3 285 )| 286 5[25]\d{2} 287 )| 288 5(?: 289 1\d{2}| 290 2(?: 291 3\d| 292 5 293 ) 294 )| 295 6(?: 296 [18]\d{2}| 297 2(?: 298 3(?: 299 \d{2} 300 )?| 301 [46]\d{1,2}| 302 5\d{2}| 303 7\d 304 )| 305 5(?: 306 3\d?| 307 4\d| 308 [57]\d{1,2}| 309 6\d{2}| 310 8 311 ) 312 )| 313 71\d{2}| 314 8(?: 315 [18]\d{2}| 316 23\d{2}| 317 54\d{2} 318 )| 319 9(?: 320 [18]\d{2}| 321 2[2-5]\d{2}| 322 53\d{1,2} 323 ) 324 )\d{3}| 325 5(?: 326 02[03489]\d{2}| 327 1\d{2}| 328 2(?: 329 1\d{2}| 330 2(?: 331 2(?: 332 \d{2} 333 )?| 334 [457]\d{2} 335 ) 336 )| 337 3(?: 338 1\d{2}| 339 2(?: 340 [37](?: 341 \d{2} 342 )?| 343 [569]\d{2} 344 ) 345 )| 346 4(?: 347 1\d{2}| 348 2[46]\d{2} 349 )| 350 5(?: 351 1\d{2}| 352 26\d{1,2} 353 )| 354 6(?: 355 [18]\d{2}| 356 2| 357 53\d{2} 358 )| 359 7(?: 360 1| 361 24 362 )\d{2}| 363 8(?: 364 1| 365 26 366 )\d{2}| 367 91\d{2} 368 )\d{3}| 369 6(?: 370 0(?: 371 1\d{2}| 372 2(?: 373 3\d{2}| 374 4\d{1,2} 375 ) 376 )| 377 2(?: 378 2[2-5]\d{2}| 379 5(?: 380 [3-5]\d{2}| 381 7 382 )| 383 8\d{2} 384 )| 385 3(?: 386 1| 387 2[3478] 388 )\d{2}| 389 4(?: 390 1| 391 2[34] 392 )\d{2}| 393 5(?: 394 1| 395 2[47] 396 )\d{2}| 397 6(?: 398 [18]\d{2}| 399 6(?: 400 2(?: 401 2\d| 402 [34]\d{2} 403 )| 404 5(?: 405 [24]\d{2}| 406 3\d| 407 5\d{1,2} 408 ) 409 ) 410 )| 411 72[2-5]\d{2}| 412 8(?: 413 1\d{2}| 414 2[2-5]\d{2} 415 )| 416 9(?: 417 1\d{2}| 418 2[2-6]\d{2} 419 ) 420 )\d{3}| 421 7(?: 422 (?: 423 02| 424 [3-589]1| 425 6[12]| 426 72[24] 427 )\d{2}| 428 21\d{3}| 429 32 430 )\d{3}| 431 8(?: 432 (?: 433 4[12]| 434 [5-7]2| 435 1\d? 436 )| 437 (?: 438 0| 439 3[12]| 440 [5-7]1| 441 217 442 )\d 443 )\d{4}| 444 9(?: 445 [35]1| 446 (?: 447 [024]2| 448 81 449 )\d| 450 (?: 451 1| 452 [24]1 453 )\d{2} 454 )\d{3} 455 "#, 456 ) 457 .build() 458 .unwrap(); 459} 460