1#include <assert.h>
2#include <stdio.h>
3#include <stdint.h>
4#include <stdlib.h>
5#include <string.h>
6
7#include "rure.h"
8
9#ifndef DEBUG
10  #define DEBUG false
11#endif
12
13bool test_is_match() {
14    bool passed = true;
15    const char *haystack = "snowman: \xE2\x98\x83";
16
17    rure *re = rure_compile_must("\\p{So}$");
18    bool matched = rure_is_match(re, (const uint8_t *)haystack,
19                                 strlen(haystack), 0);
20    if (!matched) {
21        if (DEBUG) {
22            fprintf(stderr,
23                    "[test_is_match] expected match, but got no match\n");
24        }
25        passed = false;
26    }
27    rure_free(re);
28    return passed;
29}
30
31bool test_shortest_match() {
32    bool passed = true;
33    const char *haystack = "aaaaa";
34
35    rure *re = rure_compile_must("a+");
36    size_t end = 0;
37    bool matched = rure_shortest_match(re, (const uint8_t *)haystack,
38                                       strlen(haystack), 0, &end);
39    if (!matched) {
40        if (DEBUG) {
41            fprintf(stderr,
42                    "[test_shortest_match] expected match, "
43                    "but got no match\n");
44        }
45        passed = false;
46    }
47    size_t expect_end = 1;
48    if (end != expect_end) {
49        if (DEBUG) {
50            fprintf(stderr,
51                    "[test_shortest_match] expected match end location %zu "
52                    "but got %zu\n", expect_end, end);
53        }
54        passed = false;
55    }
56    rure_free(re);
57    return passed;
58}
59
60bool test_find() {
61    bool passed = true;
62    const char *haystack = "snowman: \xE2\x98\x83";
63
64    rure *re = rure_compile_must("\\p{So}$");
65    rure_match match = {0};
66    bool matched = rure_find(re, (const uint8_t *)haystack, strlen(haystack),
67                             0, &match);
68    if (!matched) {
69        if (DEBUG) {
70            fprintf(stderr, "[test_find] expected match, but got no match\n");
71        }
72        passed = false;
73    }
74    size_t expect_start = 9;
75    size_t expect_end = 12;
76    if (match.start != expect_start || match.end != expect_end) {
77        if (DEBUG) {
78            fprintf(stderr,
79                    "[test_find] expected match at (%zu, %zu), but "
80                    "got match at (%zu, %zu)\n",
81                    expect_start, expect_end, match.start, match.end);
82        }
83        passed = false;
84    }
85    rure_free(re);
86    return passed;
87}
88
89bool test_captures() {
90    bool passed = true;
91    const char *haystack = "snowman: \xE2\x98\x83";
92
93    rure *re = rure_compile_must(".(.*(?P<snowman>\\p{So}))$");
94    rure_match match = {0};
95    rure_captures *caps = rure_captures_new(re);
96    bool matched = rure_find_captures(re, (const uint8_t *)haystack,
97                                      strlen(haystack), 0, caps);
98    if (!matched) {
99        if (DEBUG) {
100            fprintf(stderr,
101                    "[test_captures] expected match, but got no match\n");
102        }
103        passed = false;
104    }
105    size_t expect_captures_len = 3;
106    size_t captures_len = rure_captures_len(caps);
107    if (captures_len != expect_captures_len) {
108        if (DEBUG) {
109            fprintf(stderr,
110                    "[test_captures] "
111                    "expected capture group length to be %zd, but "
112                    "got %zd\n",
113                    expect_captures_len, captures_len);
114        }
115        passed = false;
116        goto done;
117    }
118    int32_t expect_capture_index = 2;
119    int32_t capture_index = rure_capture_name_index(re, "snowman");
120    if (capture_index != expect_capture_index) {
121        if (DEBUG) {
122            fprintf(stderr,
123                    "[test_captures] "
124                    "expected capture index %d for name 'snowman', but "
125                    "got %d\n",
126                    expect_capture_index, capture_index);
127        }
128        passed = false;
129        goto done;
130    }
131    size_t expect_start = 9;
132    size_t expect_end = 12;
133    rure_captures_at(caps, 2, &match);
134    if (match.start != expect_start || match.end != expect_end) {
135        if (DEBUG) {
136            fprintf(stderr,
137                    "[test_captures] "
138                    "expected capture 2 match at (%zu, %zu), "
139                    "but got match at (%zu, %zu)\n",
140                    expect_start, expect_end, match.start, match.end);
141        }
142        passed = false;
143    }
144done:
145    rure_captures_free(caps);
146    rure_free(re);
147    return passed;
148}
149
150bool test_iter() {
151    bool passed = true;
152    const uint8_t *haystack = (const uint8_t *)"abc xyz";
153    size_t haystack_len = strlen((const char *)haystack);
154
155    rure *re = rure_compile_must("\\w+(\\w)");
156    rure_match match = {0};
157    rure_captures *caps = rure_captures_new(re);
158    rure_iter *it = rure_iter_new(re);
159
160    bool matched = rure_iter_next(it, haystack, haystack_len, &match);
161    if (!matched) {
162        if (DEBUG) {
163            fprintf(stderr,
164                    "[test_iter] expected first match, but got no match\n");
165        }
166        passed = false;
167        goto done;
168    }
169    size_t expect_start = 0;
170    size_t expect_end = 3;
171    if (match.start != expect_start || match.end != expect_end) {
172        if (DEBUG) {
173            fprintf(stderr,
174                    "[test_iter] expected first match at (%zu, %zu), but "
175                    "got match at (%zu, %zu)\n",
176                    expect_start, expect_end, match.start, match.end);
177        }
178        passed = false;
179        goto done;
180    }
181
182    matched = rure_iter_next_captures(it, haystack, haystack_len, caps);
183    if (!matched) {
184        if (DEBUG) {
185            fprintf(stderr,
186                    "[test_iter] expected second match, but got no match\n");
187        }
188        passed = false;
189        goto done;
190    }
191    rure_captures_at(caps, 1, &match);
192    expect_start = 6;
193    expect_end = 7;
194    if (match.start != expect_start || match.end != expect_end) {
195        if (DEBUG) {
196            fprintf(stderr,
197                    "[test_iter] expected second match at (%zu, %zu), but "
198                    "got match at (%zu, %zu)\n",
199                    expect_start, expect_end, match.start, match.end);
200        }
201        passed = false;
202        goto done;
203    }
204done:
205    rure_iter_free(it);
206    rure_captures_free(caps);
207    rure_free(re);
208    return passed;
209}
210
211bool test_iter_capture_name(char *expect, char *given) {
212    bool passed = true;
213    if (strcmp(expect, given)) {
214        if (DEBUG) {
215            fprintf(stderr,
216                    "[test_iter_capture_name] expected first capture "
217                    "name '%s' got '%s'\n",
218                    expect, given);
219        }
220        passed = false;
221    }
222    return passed;
223}
224
225bool test_iter_capture_names() {
226    bool passed = true;
227
228    char *name;
229    rure *re = rure_compile_must(
230        "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})");
231    rure_iter_capture_names *it = rure_iter_capture_names_new(re);
232
233    bool result = rure_iter_capture_names_next(it, &name);
234    if (!result) {
235        if (DEBUG) {
236            fprintf(stderr,
237                    "[test_iter_capture_names] expected a second name, "
238                    "but got none\n");
239        }
240        passed = false;
241        goto done;
242    }
243
244    result = rure_iter_capture_names_next(it, &name);
245    passed = test_iter_capture_name("year", name);
246    if (!passed) {
247        goto done;
248    }
249
250    result = rure_iter_capture_names_next(it, &name);
251    passed = test_iter_capture_name("month", name);
252    if (!passed) {
253        goto done;
254    }
255
256    result = rure_iter_capture_names_next(it, &name);
257    passed = test_iter_capture_name("day", name);
258    if (!passed) {
259        goto done;
260    }
261done:
262    rure_iter_capture_names_free(it);
263    rure_free(re);
264    return passed;
265}
266
267/*
268 * This tests whether we can set the flags correctly. In this case, we disable
269 * all flags, which includes disabling Unicode mode. When we disable Unicode
270 * mode, we can match arbitrary possibly invalid UTF-8 bytes, such as \xFF.
271 * (When Unicode mode is enabled, \xFF won't match .)
272 */
273bool test_flags() {
274    bool passed = true;
275    const char *pattern = ".";
276    const char *haystack = "\xFF";
277
278    rure *re = rure_compile((const uint8_t *)pattern, strlen(pattern),
279                            0, NULL, NULL);
280    bool matched = rure_is_match(re, (const uint8_t *)haystack,
281                                 strlen(haystack), 0);
282    if (!matched) {
283        if (DEBUG) {
284            fprintf(stderr, "[test_flags] expected match, but got no match\n");
285        }
286        passed = false;
287    }
288    rure_free(re);
289    return passed;
290}
291
292bool test_compile_error() {
293    bool passed = true;
294    rure_error *err = rure_error_new();
295    rure *re = rure_compile((const uint8_t *)"(", 1, 0, NULL, err);
296    if (re != NULL) {
297        if (DEBUG) {
298            fprintf(stderr,
299                    "[test_compile_error] "
300                    "expected NULL regex pointer, but got non-NULL pointer\n");
301        }
302        passed = false;
303        rure_free(re);
304    }
305    const char *msg = rure_error_message(err);
306    if (NULL == strstr(msg, "unclosed group")) {
307        if (DEBUG) {
308            fprintf(stderr,
309                    "[test_compile_error] "
310                    "expected an 'unclosed parenthesis' error message, but "
311                    "got this instead: '%s'\n", msg);
312        }
313        passed = false;
314    }
315    rure_error_free(err);
316    return passed;
317}
318
319bool test_compile_error_size_limit() {
320    bool passed = true;
321    rure_options *opts = rure_options_new();
322    rure_options_size_limit(opts, 0);
323    rure_error *err = rure_error_new();
324    rure *re = rure_compile((const uint8_t *)"\\w{100}", 8, 0, opts, err);
325    if (re != NULL) {
326        if (DEBUG) {
327            fprintf(stderr,
328                    "[test_compile_error_size_limit] "
329                    "expected NULL regex pointer, but got non-NULL pointer\n");
330        }
331        passed = false;
332        rure_free(re);
333    }
334    const char *msg = rure_error_message(err);
335    if (NULL == strstr(msg, "exceeds size")) {
336        if (DEBUG) {
337            fprintf(stderr,
338                    "[test_compile_error] "
339                    "expected an 'exceeds size' error message, but "
340                    "got this instead: '%s'\n", msg);
341        }
342        passed = false;
343    }
344    rure_options_free(opts);
345    rure_error_free(err);
346    return passed;
347}
348
349bool test_regex_set_matches() {
350
351#define PAT_COUNT 6
352
353    bool passed = true;
354    const char *patterns[] = {
355        "foo", "barfoo", "\\w+", "\\d+", "foobar", "bar"
356    };
357    const size_t patterns_lengths[] = {
358        3, 6, 3, 3, 6, 3
359    };
360
361    rure_error *err = rure_error_new();
362    rure_set *re = rure_compile_set((const uint8_t **) patterns,
363                                    patterns_lengths,
364                                    PAT_COUNT,
365                                    0,
366                                    NULL,
367                                    err);
368    if (re == NULL) {
369        passed = false;
370        goto done2;
371    }
372
373    if (rure_set_len(re) != PAT_COUNT) {
374        passed = false;
375        goto done1;
376    }
377
378    if (!rure_set_is_match(re, (const uint8_t *) "foobar", 6, 0)) {
379        passed = false;
380        goto done1;
381    }
382
383    if (rure_set_is_match(re, (const uint8_t *) "", 0, 0)) {
384        passed = false;
385        goto done1;
386    }
387
388    bool matches[PAT_COUNT];
389    if (!rure_set_matches(re, (const uint8_t *) "foobar", 6, 0, matches)) {
390        passed = false;
391        goto done1;
392    }
393
394    const bool match_target[] = {
395        true, false, true, false, true, true
396    };
397
398    int i;
399    for (i = 0; i < PAT_COUNT; ++i) {
400        if (matches[i] != match_target[i]) {
401            passed = false;
402            goto done1;
403        }
404    }
405
406done1:
407    rure_set_free(re);
408done2:
409    rure_error_free(err);
410    return passed;
411
412#undef PAT_COUNT
413}
414
415bool test_regex_set_match_start() {
416
417#define PAT_COUNT 3
418
419    bool passed = true;
420    const char *patterns[] = {
421        "foo", "bar", "fooo"
422    };
423    const size_t patterns_lengths[] = {
424        3, 3, 4
425    };
426
427    rure_error *err = rure_error_new();
428    rure_set *re = rure_compile_set((const uint8_t **) patterns,
429                                    patterns_lengths,
430                                    PAT_COUNT,
431                                    0,
432                                    NULL,
433                                    err);
434    if (re == NULL) {
435        passed = false;
436        goto done2;
437    }
438
439    if (rure_set_len(re) != PAT_COUNT) {
440        passed = false;
441        goto done1;
442    }
443
444    if (rure_set_is_match(re, (const uint8_t *)"foobiasdr", 7, 2)) {
445        passed = false;
446        goto done1;
447    }
448
449    {
450        bool matches[PAT_COUNT];
451        if (!rure_set_matches(re, (const uint8_t *)"fooobar", 8, 0, matches)) {
452            passed = false;
453            goto done1;
454        }
455
456        const bool match_target[] = {
457            true, true, true
458        };
459
460        int i;
461        for (i = 0; i < PAT_COUNT; ++i) {
462            if (matches[i] != match_target[i]) {
463                passed = false;
464                goto done1;
465            }
466        }
467    }
468
469    {
470        bool matches[PAT_COUNT];
471        if (!rure_set_matches(re, (const uint8_t *)"fooobar", 7, 1, matches)) {
472            passed = false;
473            goto done1;
474        }
475
476        const bool match_target[] = {
477            false, true, false
478        };
479
480        int i;
481        for (i = 0; i < PAT_COUNT; ++i) {
482            if (matches[i] != match_target[i]) {
483                passed = false;
484                goto done1;
485            }
486        }
487    }
488
489done1:
490    rure_set_free(re);
491done2:
492    rure_error_free(err);
493    return passed;
494
495#undef PAT_COUNT
496}
497
498bool test_regex_set_options() {
499
500    bool passed = true;
501    rure_options *opts = rure_options_new();
502    rure_options_size_limit(opts, 0);
503    rure_error *err = rure_error_new();
504
505    const char *patterns[] = { "\\w{100}" };
506    const size_t patterns_lengths[] = { 8 };
507
508    rure_set *re = rure_compile_set(
509        (const uint8_t **) patterns, patterns_lengths, 1, 0, opts, err);
510    if (re != NULL) {
511        if (DEBUG) {
512            fprintf(stderr,
513                    "[test_compile_error_size_limit] "
514                    "expected NULL regex pointer, but got non-NULL pointer\n");
515        }
516        passed = false;
517        rure_set_free(re);
518    }
519    const char *msg = rure_error_message(err);
520    if (NULL == strstr(msg, "exceeds size")) {
521        if (DEBUG) {
522            fprintf(stderr,
523                    "[test_compile_error] "
524                    "expected an 'exceeds size' error message, but "
525                    "got this instead: '%s'\n", msg);
526        }
527        passed = false;
528    }
529    rure_options_free(opts);
530    rure_error_free(err);
531    return passed;
532}
533
534bool test_escape() {
535    bool passed = true;
536
537    const char *pattern = "^[a-z]+.*$";
538    const char *expected_escaped = "\\^\\[a\\-z\\]\\+\\.\\*\\$";
539
540    const char *escaped = rure_escape_must(pattern);
541    if (!escaped) {
542        if (DEBUG) {
543            fprintf(stderr,
544                    "[test_captures] expected escaped, but got no escaped\n");
545        }
546        passed = false;
547    } else if (strcmp(escaped, expected_escaped) != 0) {
548        if (DEBUG) {
549            fprintf(stderr,
550                    "[test_captures] expected \"%s\", but got \"%s\"\n",
551                    expected_escaped, escaped);
552        }
553        passed = false;
554    }
555    rure_cstring_free((char *) escaped);
556    return passed;
557}
558
559void run_test(bool (test)(), const char *name, bool *passed) {
560    if (!test()) {
561        *passed = false;
562        fprintf(stderr, "FAILED: %s\n", name);
563    } else {
564        fprintf(stderr, "PASSED: %s\n", name);
565    }
566}
567
568int main() {
569    bool passed = true;
570
571    run_test(test_is_match, "test_is_match", &passed);
572    run_test(test_shortest_match, "test_shortest_match", &passed);
573    run_test(test_find, "test_find", &passed);
574    run_test(test_captures, "test_captures", &passed);
575    run_test(test_iter, "test_iter", &passed);
576    run_test(test_iter_capture_names, "test_iter_capture_names", &passed);
577    run_test(test_flags, "test_flags", &passed);
578    run_test(test_compile_error, "test_compile_error", &passed);
579    run_test(test_compile_error_size_limit, "test_compile_error_size_limit",
580             &passed);
581    run_test(test_regex_set_matches, "test_regex_set_match", &passed);
582    run_test(test_regex_set_options, "test_regex_set_options", &passed);
583    run_test(test_regex_set_match_start, "test_regex_set_match_start",
584             &passed);
585    run_test(test_escape, "test_escape", &passed);
586
587    if (!passed) {
588        exit(1);
589    }
590    return 0;
591}
592