1/***************************************************************************
2Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
3tries to compile and match it, deriving options from the string itself. If
4STANDALONE is defined, a main program that calls the driver with the contents
5of specified files is compiled, and commentary on what is happening is output.
6If an argument starts with '=' the rest of it it is taken as a literal string
7rather than a file name. This allows easy testing of short strings.
8
9Written by Philip Hazel, October 2016
10***************************************************************************/
11
12#include <errno.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
16
17#define PCRE2_CODE_UNIT_WIDTH 8
18#include "pcre2.h"
19
20#define MAX_MATCH_SIZE 1000
21
22#define DFA_WORKSPACE_COUNT 100
23
24#define ALLOWED_COMPILE_OPTIONS \
25  (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
26   PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
27   PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \
28   PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
29   PCRE2_NO_AUTO_CAPTURE| \
30   PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
31   PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
32   PCRE2_UTF)
33
34#define ALLOWED_MATCH_OPTIONS \
35  (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
36   PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
37   PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
38
39/* This is the callout function. Its only purpose is to halt matching if there
40are more than 100 callouts, as one way of stopping too much time being spent on
41fruitless matches. The callout data is a pointer to the counter. */
42
43static int callout_function(pcre2_callout_block *cb, void *callout_data)
44{
45(void)cb;  /* Avoid unused parameter warning */
46*((uint32_t *)callout_data) += 1;
47return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
48}
49
50/* Putting in this apparently unnecessary prototype prevents gcc from giving a
51"no previous prototype" warning when compiling at high warning level. */
52
53int LLVMFuzzerTestOneInput(const unsigned char *, size_t);
54
55/* Here's the driving function. */
56
57int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size)
58{
59uint32_t compile_options;
60uint32_t match_options;
61pcre2_match_data *match_data = NULL;
62pcre2_match_context *match_context = NULL;
63size_t match_size;
64int dfa_workspace[DFA_WORKSPACE_COUNT];
65int r1, r2;
66int i;
67
68if (size < 1) return 0;
69
70/* Limiting the length of the subject for matching stops fruitless searches
71in large trees taking too much time. */
72
73match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
74
75/* Figure out some options to use. Initialize the random number to ensure
76repeatability. Ensure that we get a 32-bit unsigned random number for testing
77options. (RAND_MAX is required to be at least 32767, but is commonly
782147483647, which excludes the top bit.) */
79
80srand((unsigned int)(data[size/2]));
81r1 = rand();
82r2 = rand();
83
84/* Ensure that all undefined option bits are zero (waste of time trying them)
85and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
86input is UTF-8. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is no
87reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because
88\C in random patterns is highly likely to cause a crash. */
89
90compile_options =
91  ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_COMPILE_OPTIONS) |
92  PCRE2_NEVER_BACKSLASH_C;
93
94match_options =
95  ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_MATCH_OPTIONS);
96
97/* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
98allowed together and just give an immediate error return. */
99
100if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
101  match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
102
103/* Do the compile with and without the options, and after a successful compile,
104likewise do the match with and without the options. */
105
106for (i = 0; i < 2; i++)
107  {
108  uint32_t callout_count;
109  int errorcode;
110  PCRE2_SIZE erroroffset;
111  pcre2_code *code;
112
113#ifdef STANDALONE
114  printf("Compile options %.8x never_backslash_c", compile_options);
115  printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
116    ((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "",
117    ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "",
118    ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "",
119    ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "",
120    ((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
121    ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "",
122    ((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "",
123    ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "",
124    ((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "",
125    ((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "",
126    ((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
127    ((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "",
128    ((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "",
129    ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "",
130    ((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "",
131    ((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "",
132    ((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "",
133    ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "",
134    ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "",
135    ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "",
136    ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
137    ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "",
138    ((compile_options & PCRE2_UCP) != 0)? ",ucp" : "",
139    ((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "",
140    ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "",
141    ((compile_options & PCRE2_UTF) != 0)? ",utf" : "");
142#endif
143
144  code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options,
145    &errorcode, &erroroffset, NULL);
146
147  /* Compilation succeeded */
148
149  if (code != NULL)
150    {
151    int j;
152    uint32_t save_match_options = match_options;
153
154#ifdef SUPPORT_JIT
155    pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
156#endif
157
158    /* Create match data and context blocks only when we first need them. Set
159    low match and depth limits to avoid wasting too much searching large
160    pattern trees. Almost all matches are going to fail. */
161
162    if (match_data == NULL)
163      {
164      match_data = pcre2_match_data_create(32, NULL);
165      if (match_data == NULL)
166        {
167#ifdef STANDALONE
168        printf("** Failed to create match data block\n");
169#endif
170        return 0;
171        }
172      }
173
174    if (match_context == NULL)
175      {
176      match_context = pcre2_match_context_create(NULL);
177      if (match_context == NULL)
178        {
179#ifdef STANDALONE
180        printf("** Failed to create match context block\n");
181#endif
182        return 0;
183        }
184      (void)pcre2_set_match_limit(match_context, 100);
185      (void)pcre2_set_depth_limit(match_context, 100);
186      (void)pcre2_set_callout(match_context, callout_function, &callout_count);
187      }
188
189    /* Match twice, with and without options. */
190
191    for (j = 0; j < 2; j++)
192      {
193#ifdef STANDALONE
194      printf("Match options %.8x", match_options);
195      printf("%s%s%s%s%s%s%s%s%s%s\n",
196        ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
197        ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
198        ((match_options & PCRE2_NO_JIT) != 0)? ",no_jit" : "",
199        ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
200        ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
201        ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
202        ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
203        ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
204        ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
205        ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
206#endif
207
208      callout_count = 0;
209      errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0,
210        match_options, match_data, match_context);
211
212#ifdef STANDALONE
213      if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
214        {
215        unsigned char buffer[256];
216        pcre2_get_error_message(errorcode, buffer, 256);
217        printf("Match failed: error %d: %s\n", errorcode, buffer);
218        }
219#endif
220
221      match_options = 0;  /* For second time */
222      }
223
224    /* Match with DFA twice, with and without options. */
225
226    match_options = save_match_options & ~PCRE2_NO_JIT;  /* Not valid for DFA */
227
228    for (j = 0; j < 2; j++)
229      {
230#ifdef STANDALONE
231      printf("DFA match options %.8x", match_options);
232      printf("%s%s%s%s%s%s%s%s%s\n",
233        ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
234        ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
235        ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
236        ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
237        ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
238        ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
239        ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
240        ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
241        ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
242#endif
243
244      callout_count = 0;
245      errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)data,
246        (PCRE2_SIZE)match_size, 0, match_options, match_data, match_context,
247        dfa_workspace, DFA_WORKSPACE_COUNT);
248
249#ifdef STANDALONE
250      if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
251        {
252        unsigned char buffer[256];
253        pcre2_get_error_message(errorcode, buffer, 256);
254        printf("Match failed: error %d: %s\n", errorcode, buffer);
255        }
256#endif
257
258      match_options = 0;  /* For second time */
259      }
260
261    match_options = save_match_options;  /* Reset for the second compile */
262    pcre2_code_free(code);
263    }
264
265  /* Compilation failed */
266
267  else
268    {
269    unsigned char buffer[256];
270    pcre2_get_error_message(errorcode, buffer, 256);
271#ifdef STANDALONE
272    printf("Error %d at offset %lu: %s\n", errorcode, erroroffset, buffer);
273#else
274    if (strstr((const char *)buffer, "internal error") != NULL) abort();
275#endif
276    }
277
278  compile_options = PCRE2_NEVER_BACKSLASH_C;  /* For second time */
279  }
280
281if (match_data != NULL) pcre2_match_data_free(match_data);
282if (match_context != NULL) pcre2_match_context_free(match_context);
283
284return 0;
285}
286
287
288/* Optional main program.  */
289
290#ifdef STANDALONE
291int main(int argc, char **argv)
292{
293int i;
294
295if (argc < 2)
296  {
297  printf("** No arguments given\n");
298  return 0;
299  }
300
301for (i = 1; i < argc; i++)
302  {
303  size_t filelen;
304  size_t readsize;
305  unsigned char *buffer;
306  FILE *f;
307
308  /* Handle a literal string. Copy to an exact size buffer so that checks for
309  overrunning work. */
310
311  if (argv[i][0] == '=')
312    {
313    readsize = strlen(argv[i]) - 1;
314    printf("------ <Literal> ------\n");
315    printf("Length = %lu\n", readsize);
316    printf("%.*s\n", (int)readsize, argv[i]+1);
317    buffer = (unsigned char *)malloc(readsize);
318    if (buffer == NULL)
319      printf("** Failed to allocate %lu bytes of memory\n", readsize);
320    else
321      {
322      memcpy(buffer, argv[i]+1, readsize);
323      LLVMFuzzerTestOneInput(buffer, readsize);
324      free(buffer);
325      }
326    continue;
327    }
328
329  /* Handle a string given in a file */
330
331  f = fopen(argv[i], "rb");
332  if (f == NULL)
333    {
334    printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
335    continue;
336    }
337
338  printf("------ %s ------\n", argv[i]);
339
340  fseek(f, 0, SEEK_END);
341  filelen = ftell(f);
342  fseek(f, 0, SEEK_SET);
343
344  buffer = (unsigned char *)malloc(filelen);
345  if (buffer == NULL)
346    {
347    printf("** Failed to allocate %lu bytes of memory\n", filelen);
348    fclose(f);
349    continue;
350    }
351
352  readsize = fread(buffer, 1, filelen, f);
353  fclose(f);
354
355  if (readsize != filelen)
356    printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
357  else
358    {
359    printf("Length = %lu\n", filelen);
360    LLVMFuzzerTestOneInput(buffer, filelen);
361    }
362  free(buffer);
363  }
364
365return 0;
366}
367#endif  /* STANDALONE */
368
369/* End */
370