17db96d56Sopenharmony_ci/* A fuzz test for CPython.
27db96d56Sopenharmony_ci
37db96d56Sopenharmony_ci  The only exposed function is LLVMFuzzerTestOneInput, which is called by
47db96d56Sopenharmony_ci  fuzzers and by the _fuzz module for smoke tests.
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_ci  To build exactly one fuzz test, as when running in oss-fuzz etc.,
77db96d56Sopenharmony_ci  build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build
87db96d56Sopenharmony_ci  LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with
97db96d56Sopenharmony_ci      -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.
107db96d56Sopenharmony_ci
117db96d56Sopenharmony_ci  See the source code for LLVMFuzzerTestOneInput for details. */
127db96d56Sopenharmony_ci
137db96d56Sopenharmony_ci#include <Python.h>
147db96d56Sopenharmony_ci#include <stdlib.h>
157db96d56Sopenharmony_ci#include <inttypes.h>
167db96d56Sopenharmony_ci
177db96d56Sopenharmony_ci/*  Fuzz PyFloat_FromString as a proxy for float(str). */
187db96d56Sopenharmony_cistatic int fuzz_builtin_float(const char* data, size_t size) {
197db96d56Sopenharmony_ci    PyObject* s = PyBytes_FromStringAndSize(data, size);
207db96d56Sopenharmony_ci    if (s == NULL) return 0;
217db96d56Sopenharmony_ci    PyObject* f = PyFloat_FromString(s);
227db96d56Sopenharmony_ci    if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
237db96d56Sopenharmony_ci        PyErr_Clear();
247db96d56Sopenharmony_ci    }
257db96d56Sopenharmony_ci
267db96d56Sopenharmony_ci    Py_XDECREF(f);
277db96d56Sopenharmony_ci    Py_DECREF(s);
287db96d56Sopenharmony_ci    return 0;
297db96d56Sopenharmony_ci}
307db96d56Sopenharmony_ci
317db96d56Sopenharmony_ci#define MAX_INT_TEST_SIZE 0x10000
327db96d56Sopenharmony_ci
337db96d56Sopenharmony_ci/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
347db96d56Sopenharmony_cistatic int fuzz_builtin_int(const char* data, size_t size) {
357db96d56Sopenharmony_ci    /* Ignore test cases with very long ints to avoid timeouts
367db96d56Sopenharmony_ci       int("9" * 1000000) is not a very interesting test caase */
377db96d56Sopenharmony_ci    if (size > MAX_INT_TEST_SIZE) {
387db96d56Sopenharmony_ci        return 0;
397db96d56Sopenharmony_ci    }
407db96d56Sopenharmony_ci    /* Pick a random valid base. (When the fuzzed function takes extra
417db96d56Sopenharmony_ci       parameters, it's somewhat normal to hash the input to generate those
427db96d56Sopenharmony_ci       parameters. We want to exercise all code paths, so we do so here.) */
437db96d56Sopenharmony_ci    int base = _Py_HashBytes(data, size) % 37;
447db96d56Sopenharmony_ci    if (base == 1) {
457db96d56Sopenharmony_ci        // 1 is the only number between 0 and 36 that is not a valid base.
467db96d56Sopenharmony_ci        base = 0;
477db96d56Sopenharmony_ci    }
487db96d56Sopenharmony_ci    if (base == -1) {
497db96d56Sopenharmony_ci        return 0;  // An error occurred, bail early.
507db96d56Sopenharmony_ci    }
517db96d56Sopenharmony_ci    if (base < 0) {
527db96d56Sopenharmony_ci        base = -base;
537db96d56Sopenharmony_ci    }
547db96d56Sopenharmony_ci
557db96d56Sopenharmony_ci    PyObject* s = PyUnicode_FromStringAndSize(data, size);
567db96d56Sopenharmony_ci    if (s == NULL) {
577db96d56Sopenharmony_ci        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
587db96d56Sopenharmony_ci            PyErr_Clear();
597db96d56Sopenharmony_ci        }
607db96d56Sopenharmony_ci        return 0;
617db96d56Sopenharmony_ci    }
627db96d56Sopenharmony_ci    PyObject* l = PyLong_FromUnicodeObject(s, base);
637db96d56Sopenharmony_ci    if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
647db96d56Sopenharmony_ci        PyErr_Clear();
657db96d56Sopenharmony_ci    }
667db96d56Sopenharmony_ci    PyErr_Clear();
677db96d56Sopenharmony_ci    Py_XDECREF(l);
687db96d56Sopenharmony_ci    Py_DECREF(s);
697db96d56Sopenharmony_ci    return 0;
707db96d56Sopenharmony_ci}
717db96d56Sopenharmony_ci
727db96d56Sopenharmony_ci/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */
737db96d56Sopenharmony_cistatic int fuzz_builtin_unicode(const char* data, size_t size) {
747db96d56Sopenharmony_ci    PyObject* s = PyUnicode_FromStringAndSize(data, size);
757db96d56Sopenharmony_ci    if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
767db96d56Sopenharmony_ci        PyErr_Clear();
777db96d56Sopenharmony_ci    }
787db96d56Sopenharmony_ci    Py_XDECREF(s);
797db96d56Sopenharmony_ci    return 0;
807db96d56Sopenharmony_ci}
817db96d56Sopenharmony_ci
827db96d56Sopenharmony_ci
837db96d56Sopenharmony_ciPyObject* struct_unpack_method = NULL;
847db96d56Sopenharmony_ciPyObject* struct_error = NULL;
857db96d56Sopenharmony_ci/* Called by LLVMFuzzerTestOneInput for initialization */
867db96d56Sopenharmony_cistatic int init_struct_unpack(void) {
877db96d56Sopenharmony_ci    /* Import struct.unpack */
887db96d56Sopenharmony_ci    PyObject* struct_module = PyImport_ImportModule("struct");
897db96d56Sopenharmony_ci    if (struct_module == NULL) {
907db96d56Sopenharmony_ci        return 0;
917db96d56Sopenharmony_ci    }
927db96d56Sopenharmony_ci    struct_error = PyObject_GetAttrString(struct_module, "error");
937db96d56Sopenharmony_ci    if (struct_error == NULL) {
947db96d56Sopenharmony_ci        return 0;
957db96d56Sopenharmony_ci    }
967db96d56Sopenharmony_ci    struct_unpack_method = PyObject_GetAttrString(struct_module, "unpack");
977db96d56Sopenharmony_ci    return struct_unpack_method != NULL;
987db96d56Sopenharmony_ci}
997db96d56Sopenharmony_ci/* Fuzz struct.unpack(x, y) */
1007db96d56Sopenharmony_cistatic int fuzz_struct_unpack(const char* data, size_t size) {
1017db96d56Sopenharmony_ci    /* Everything up to the first null byte is considered the
1027db96d56Sopenharmony_ci       format. Everything after is the buffer */
1037db96d56Sopenharmony_ci    const char* first_null = memchr(data, '\0', size);
1047db96d56Sopenharmony_ci    if (first_null == NULL) {
1057db96d56Sopenharmony_ci        return 0;
1067db96d56Sopenharmony_ci    }
1077db96d56Sopenharmony_ci
1087db96d56Sopenharmony_ci    size_t format_length = first_null - data;
1097db96d56Sopenharmony_ci    size_t buffer_length = size - format_length - 1;
1107db96d56Sopenharmony_ci
1117db96d56Sopenharmony_ci    PyObject* pattern = PyBytes_FromStringAndSize(data, format_length);
1127db96d56Sopenharmony_ci    if (pattern == NULL) {
1137db96d56Sopenharmony_ci        return 0;
1147db96d56Sopenharmony_ci    }
1157db96d56Sopenharmony_ci    PyObject* buffer = PyBytes_FromStringAndSize(first_null + 1, buffer_length);
1167db96d56Sopenharmony_ci    if (buffer == NULL) {
1177db96d56Sopenharmony_ci        Py_DECREF(pattern);
1187db96d56Sopenharmony_ci        return 0;
1197db96d56Sopenharmony_ci    }
1207db96d56Sopenharmony_ci
1217db96d56Sopenharmony_ci    PyObject* unpacked = PyObject_CallFunctionObjArgs(
1227db96d56Sopenharmony_ci        struct_unpack_method, pattern, buffer, NULL);
1237db96d56Sopenharmony_ci    /* Ignore any overflow errors, these are easily triggered accidentally */
1247db96d56Sopenharmony_ci    if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_OverflowError)) {
1257db96d56Sopenharmony_ci        PyErr_Clear();
1267db96d56Sopenharmony_ci    }
1277db96d56Sopenharmony_ci    /* The pascal format string will throw a negative size when passing 0
1287db96d56Sopenharmony_ci       like: struct.unpack('0p', b'') */
1297db96d56Sopenharmony_ci    if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_SystemError)) {
1307db96d56Sopenharmony_ci        PyErr_Clear();
1317db96d56Sopenharmony_ci    }
1327db96d56Sopenharmony_ci    /* Ignore any struct.error exceptions, these can be caused by invalid
1337db96d56Sopenharmony_ci       formats or incomplete buffers both of which are common. */
1347db96d56Sopenharmony_ci    if (unpacked == NULL && PyErr_ExceptionMatches(struct_error)) {
1357db96d56Sopenharmony_ci        PyErr_Clear();
1367db96d56Sopenharmony_ci    }
1377db96d56Sopenharmony_ci
1387db96d56Sopenharmony_ci    Py_XDECREF(unpacked);
1397db96d56Sopenharmony_ci    Py_DECREF(pattern);
1407db96d56Sopenharmony_ci    Py_DECREF(buffer);
1417db96d56Sopenharmony_ci    return 0;
1427db96d56Sopenharmony_ci}
1437db96d56Sopenharmony_ci
1447db96d56Sopenharmony_ci
1457db96d56Sopenharmony_ci#define MAX_JSON_TEST_SIZE 0x10000
1467db96d56Sopenharmony_ci
1477db96d56Sopenharmony_ciPyObject* json_loads_method = NULL;
1487db96d56Sopenharmony_ci/* Called by LLVMFuzzerTestOneInput for initialization */
1497db96d56Sopenharmony_cistatic int init_json_loads(void) {
1507db96d56Sopenharmony_ci    /* Import json.loads */
1517db96d56Sopenharmony_ci    PyObject* json_module = PyImport_ImportModule("json");
1527db96d56Sopenharmony_ci    if (json_module == NULL) {
1537db96d56Sopenharmony_ci        return 0;
1547db96d56Sopenharmony_ci    }
1557db96d56Sopenharmony_ci    json_loads_method = PyObject_GetAttrString(json_module, "loads");
1567db96d56Sopenharmony_ci    return json_loads_method != NULL;
1577db96d56Sopenharmony_ci}
1587db96d56Sopenharmony_ci/* Fuzz json.loads(x) */
1597db96d56Sopenharmony_cistatic int fuzz_json_loads(const char* data, size_t size) {
1607db96d56Sopenharmony_ci    /* Since python supports arbitrarily large ints in JSON,
1617db96d56Sopenharmony_ci       long inputs can lead to timeouts on boring inputs like
1627db96d56Sopenharmony_ci       `json.loads("9" * 100000)` */
1637db96d56Sopenharmony_ci    if (size > MAX_JSON_TEST_SIZE) {
1647db96d56Sopenharmony_ci        return 0;
1657db96d56Sopenharmony_ci    }
1667db96d56Sopenharmony_ci    PyObject* input_bytes = PyBytes_FromStringAndSize(data, size);
1677db96d56Sopenharmony_ci    if (input_bytes == NULL) {
1687db96d56Sopenharmony_ci        return 0;
1697db96d56Sopenharmony_ci    }
1707db96d56Sopenharmony_ci    PyObject* parsed = PyObject_CallOneArg(json_loads_method, input_bytes);
1717db96d56Sopenharmony_ci    if (parsed == NULL) {
1727db96d56Sopenharmony_ci        /* Ignore ValueError as the fuzzer will more than likely
1737db96d56Sopenharmony_ci           generate some invalid json and values */
1747db96d56Sopenharmony_ci        if (PyErr_ExceptionMatches(PyExc_ValueError) ||
1757db96d56Sopenharmony_ci        /* Ignore RecursionError as the fuzzer generates long sequences of
1767db96d56Sopenharmony_ci           arrays such as `[[[...` */
1777db96d56Sopenharmony_ci            PyErr_ExceptionMatches(PyExc_RecursionError) ||
1787db96d56Sopenharmony_ci        /* Ignore unicode errors, invalid byte sequences are common */
1797db96d56Sopenharmony_ci            PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)
1807db96d56Sopenharmony_ci        ) {
1817db96d56Sopenharmony_ci            PyErr_Clear();
1827db96d56Sopenharmony_ci        }
1837db96d56Sopenharmony_ci    }
1847db96d56Sopenharmony_ci    Py_DECREF(input_bytes);
1857db96d56Sopenharmony_ci    Py_XDECREF(parsed);
1867db96d56Sopenharmony_ci    return 0;
1877db96d56Sopenharmony_ci}
1887db96d56Sopenharmony_ci
1897db96d56Sopenharmony_ci#define MAX_RE_TEST_SIZE 0x10000
1907db96d56Sopenharmony_ci
1917db96d56Sopenharmony_ciPyObject* sre_compile_method = NULL;
1927db96d56Sopenharmony_ciPyObject* sre_error_exception = NULL;
1937db96d56Sopenharmony_ciint SRE_FLAG_DEBUG = 0;
1947db96d56Sopenharmony_ci/* Called by LLVMFuzzerTestOneInput for initialization */
1957db96d56Sopenharmony_cistatic int init_sre_compile(void) {
1967db96d56Sopenharmony_ci    /* Import sre_compile.compile and sre.error */
1977db96d56Sopenharmony_ci    PyObject* sre_compile_module = PyImport_ImportModule("sre_compile");
1987db96d56Sopenharmony_ci    if (sre_compile_module == NULL) {
1997db96d56Sopenharmony_ci        return 0;
2007db96d56Sopenharmony_ci    }
2017db96d56Sopenharmony_ci    sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile");
2027db96d56Sopenharmony_ci    if (sre_compile_method == NULL) {
2037db96d56Sopenharmony_ci        return 0;
2047db96d56Sopenharmony_ci    }
2057db96d56Sopenharmony_ci
2067db96d56Sopenharmony_ci    PyObject* sre_constants = PyImport_ImportModule("sre_constants");
2077db96d56Sopenharmony_ci    if (sre_constants == NULL) {
2087db96d56Sopenharmony_ci        return 0;
2097db96d56Sopenharmony_ci    }
2107db96d56Sopenharmony_ci    sre_error_exception = PyObject_GetAttrString(sre_constants, "error");
2117db96d56Sopenharmony_ci    if (sre_error_exception == NULL) {
2127db96d56Sopenharmony_ci        return 0;
2137db96d56Sopenharmony_ci    }
2147db96d56Sopenharmony_ci    PyObject* debug_flag = PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG");
2157db96d56Sopenharmony_ci    if (debug_flag == NULL) {
2167db96d56Sopenharmony_ci        return 0;
2177db96d56Sopenharmony_ci    }
2187db96d56Sopenharmony_ci    SRE_FLAG_DEBUG = PyLong_AsLong(debug_flag);
2197db96d56Sopenharmony_ci    return 1;
2207db96d56Sopenharmony_ci}
2217db96d56Sopenharmony_ci/* Fuzz _sre.compile(x) */
2227db96d56Sopenharmony_cistatic int fuzz_sre_compile(const char* data, size_t size) {
2237db96d56Sopenharmony_ci    /* Ignore really long regex patterns that will timeout the fuzzer */
2247db96d56Sopenharmony_ci    if (size > MAX_RE_TEST_SIZE) {
2257db96d56Sopenharmony_ci        return 0;
2267db96d56Sopenharmony_ci    }
2277db96d56Sopenharmony_ci    /* We treat the first 2 bytes of the input as a number for the flags */
2287db96d56Sopenharmony_ci    if (size < 2) {
2297db96d56Sopenharmony_ci        return 0;
2307db96d56Sopenharmony_ci    }
2317db96d56Sopenharmony_ci    uint16_t flags = ((uint16_t*) data)[0];
2327db96d56Sopenharmony_ci    /* We remove the SRE_FLAG_DEBUG if present. This is because it
2337db96d56Sopenharmony_ci       prints to stdout which greatly decreases fuzzing speed */
2347db96d56Sopenharmony_ci    flags &= ~SRE_FLAG_DEBUG;
2357db96d56Sopenharmony_ci
2367db96d56Sopenharmony_ci    /* Pull the pattern from the remaining bytes */
2377db96d56Sopenharmony_ci    PyObject* pattern_bytes = PyBytes_FromStringAndSize(data + 2, size - 2);
2387db96d56Sopenharmony_ci    if (pattern_bytes == NULL) {
2397db96d56Sopenharmony_ci        return 0;
2407db96d56Sopenharmony_ci    }
2417db96d56Sopenharmony_ci    PyObject* flags_obj = PyLong_FromUnsignedLong(flags);
2427db96d56Sopenharmony_ci    if (flags_obj == NULL) {
2437db96d56Sopenharmony_ci        Py_DECREF(pattern_bytes);
2447db96d56Sopenharmony_ci        return 0;
2457db96d56Sopenharmony_ci    }
2467db96d56Sopenharmony_ci
2477db96d56Sopenharmony_ci    /* compiled = _sre.compile(data[2:], data[0:2] */
2487db96d56Sopenharmony_ci    PyObject* compiled = PyObject_CallFunctionObjArgs(
2497db96d56Sopenharmony_ci        sre_compile_method, pattern_bytes, flags_obj, NULL);
2507db96d56Sopenharmony_ci    /* Ignore ValueError as the fuzzer will more than likely
2517db96d56Sopenharmony_ci       generate some invalid combination of flags */
2527db96d56Sopenharmony_ci    if (compiled == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
2537db96d56Sopenharmony_ci        PyErr_Clear();
2547db96d56Sopenharmony_ci    }
2557db96d56Sopenharmony_ci    /* Ignore some common errors thrown by sre_parse:
2567db96d56Sopenharmony_ci       Overflow, Assertion, Recursion and Index */
2577db96d56Sopenharmony_ci    if (compiled == NULL && (PyErr_ExceptionMatches(PyExc_OverflowError) ||
2587db96d56Sopenharmony_ci                             PyErr_ExceptionMatches(PyExc_AssertionError) ||
2597db96d56Sopenharmony_ci                             PyErr_ExceptionMatches(PyExc_RecursionError) ||
2607db96d56Sopenharmony_ci                             PyErr_ExceptionMatches(PyExc_IndexError))
2617db96d56Sopenharmony_ci    ) {
2627db96d56Sopenharmony_ci        PyErr_Clear();
2637db96d56Sopenharmony_ci    }
2647db96d56Sopenharmony_ci    /* Ignore re.error */
2657db96d56Sopenharmony_ci    if (compiled == NULL && PyErr_ExceptionMatches(sre_error_exception)) {
2667db96d56Sopenharmony_ci        PyErr_Clear();
2677db96d56Sopenharmony_ci    }
2687db96d56Sopenharmony_ci
2697db96d56Sopenharmony_ci    Py_DECREF(pattern_bytes);
2707db96d56Sopenharmony_ci    Py_DECREF(flags_obj);
2717db96d56Sopenharmony_ci    Py_XDECREF(compiled);
2727db96d56Sopenharmony_ci    return 0;
2737db96d56Sopenharmony_ci}
2747db96d56Sopenharmony_ci
2757db96d56Sopenharmony_ci/* Some random patterns used to test re.match.
2767db96d56Sopenharmony_ci   Be careful not to add catostraphically slow regexes here, we want to
2777db96d56Sopenharmony_ci   exercise the matching code without causing timeouts.*/
2787db96d56Sopenharmony_cistatic const char* regex_patterns[] = {
2797db96d56Sopenharmony_ci    ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]",
2807db96d56Sopenharmony_ci    "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?",
2817db96d56Sopenharmony_ci    "{}", "a{,}", "{", "}", "^\\(*\\d{3}\\)*( |-)*\\d{3}( |-)*\\d{4}$",
2827db96d56Sopenharmony_ci    "(?:a*)*", "a{1,2}?"
2837db96d56Sopenharmony_ci};
2847db96d56Sopenharmony_ciconst size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]);
2857db96d56Sopenharmony_ciPyObject** compiled_patterns = NULL;
2867db96d56Sopenharmony_ci/* Called by LLVMFuzzerTestOneInput for initialization */
2877db96d56Sopenharmony_cistatic int init_sre_match(void) {
2887db96d56Sopenharmony_ci    PyObject* re_module = PyImport_ImportModule("re");
2897db96d56Sopenharmony_ci    if (re_module == NULL) {
2907db96d56Sopenharmony_ci        return 0;
2917db96d56Sopenharmony_ci    }
2927db96d56Sopenharmony_ci    compiled_patterns = (PyObject**) PyMem_RawMalloc(
2937db96d56Sopenharmony_ci        sizeof(PyObject*) * NUM_PATTERNS);
2947db96d56Sopenharmony_ci    if (compiled_patterns == NULL) {
2957db96d56Sopenharmony_ci        PyErr_NoMemory();
2967db96d56Sopenharmony_ci        return 0;
2977db96d56Sopenharmony_ci    }
2987db96d56Sopenharmony_ci
2997db96d56Sopenharmony_ci    /* Precompile all the regex patterns on the first run for faster fuzzing */
3007db96d56Sopenharmony_ci    for (size_t i = 0; i < NUM_PATTERNS; i++) {
3017db96d56Sopenharmony_ci        PyObject* compiled = PyObject_CallMethod(
3027db96d56Sopenharmony_ci            re_module, "compile", "y", regex_patterns[i]);
3037db96d56Sopenharmony_ci        /* Bail if any of the patterns fail to compile */
3047db96d56Sopenharmony_ci        if (compiled == NULL) {
3057db96d56Sopenharmony_ci            return 0;
3067db96d56Sopenharmony_ci        }
3077db96d56Sopenharmony_ci        compiled_patterns[i] = compiled;
3087db96d56Sopenharmony_ci    }
3097db96d56Sopenharmony_ci    return 1;
3107db96d56Sopenharmony_ci}
3117db96d56Sopenharmony_ci/* Fuzz re.match(x) */
3127db96d56Sopenharmony_cistatic int fuzz_sre_match(const char* data, size_t size) {
3137db96d56Sopenharmony_ci    if (size < 1 || size > MAX_RE_TEST_SIZE) {
3147db96d56Sopenharmony_ci        return 0;
3157db96d56Sopenharmony_ci    }
3167db96d56Sopenharmony_ci    /* Use the first byte as a uint8_t specifying the index of the
3177db96d56Sopenharmony_ci       regex to use */
3187db96d56Sopenharmony_ci    unsigned char idx = (unsigned char) data[0];
3197db96d56Sopenharmony_ci    idx = idx % NUM_PATTERNS;
3207db96d56Sopenharmony_ci
3217db96d56Sopenharmony_ci    /* Pull the string to match from the remaining bytes */
3227db96d56Sopenharmony_ci    PyObject* to_match = PyBytes_FromStringAndSize(data + 1, size - 1);
3237db96d56Sopenharmony_ci    if (to_match == NULL) {
3247db96d56Sopenharmony_ci        return 0;
3257db96d56Sopenharmony_ci    }
3267db96d56Sopenharmony_ci
3277db96d56Sopenharmony_ci    PyObject* pattern = compiled_patterns[idx];
3287db96d56Sopenharmony_ci    PyObject* match_callable = PyObject_GetAttrString(pattern, "match");
3297db96d56Sopenharmony_ci
3307db96d56Sopenharmony_ci    PyObject* matches = PyObject_CallOneArg(match_callable, to_match);
3317db96d56Sopenharmony_ci
3327db96d56Sopenharmony_ci    Py_XDECREF(matches);
3337db96d56Sopenharmony_ci    Py_DECREF(match_callable);
3347db96d56Sopenharmony_ci    Py_DECREF(to_match);
3357db96d56Sopenharmony_ci    return 0;
3367db96d56Sopenharmony_ci}
3377db96d56Sopenharmony_ci
3387db96d56Sopenharmony_ci#define MAX_CSV_TEST_SIZE 0x10000
3397db96d56Sopenharmony_ciPyObject* csv_module = NULL;
3407db96d56Sopenharmony_ciPyObject* csv_error = NULL;
3417db96d56Sopenharmony_ci/* Called by LLVMFuzzerTestOneInput for initialization */
3427db96d56Sopenharmony_cistatic int init_csv_reader(void) {
3437db96d56Sopenharmony_ci    /* Import csv and csv.Error */
3447db96d56Sopenharmony_ci    csv_module = PyImport_ImportModule("csv");
3457db96d56Sopenharmony_ci    if (csv_module == NULL) {
3467db96d56Sopenharmony_ci        return 0;
3477db96d56Sopenharmony_ci    }
3487db96d56Sopenharmony_ci    csv_error = PyObject_GetAttrString(csv_module, "Error");
3497db96d56Sopenharmony_ci    return csv_error != NULL;
3507db96d56Sopenharmony_ci}
3517db96d56Sopenharmony_ci/* Fuzz csv.reader([x]) */
3527db96d56Sopenharmony_cistatic int fuzz_csv_reader(const char* data, size_t size) {
3537db96d56Sopenharmony_ci    if (size < 1 || size > MAX_CSV_TEST_SIZE) {
3547db96d56Sopenharmony_ci        return 0;
3557db96d56Sopenharmony_ci    }
3567db96d56Sopenharmony_ci    /* Ignore non null-terminated strings since _csv can't handle
3577db96d56Sopenharmony_ci       embedded nulls */
3587db96d56Sopenharmony_ci    if (memchr(data, '\0', size) == NULL) {
3597db96d56Sopenharmony_ci        return 0;
3607db96d56Sopenharmony_ci    }
3617db96d56Sopenharmony_ci
3627db96d56Sopenharmony_ci    PyObject* s = PyUnicode_FromString(data);
3637db96d56Sopenharmony_ci    /* Ignore exceptions until we have a valid string */
3647db96d56Sopenharmony_ci    if (s == NULL) {
3657db96d56Sopenharmony_ci        PyErr_Clear();
3667db96d56Sopenharmony_ci        return 0;
3677db96d56Sopenharmony_ci    }
3687db96d56Sopenharmony_ci
3697db96d56Sopenharmony_ci    /* Split on \n so we can test multiple lines */
3707db96d56Sopenharmony_ci    PyObject* lines = PyObject_CallMethod(s, "split", "s", "\n");
3717db96d56Sopenharmony_ci    if (lines == NULL) {
3727db96d56Sopenharmony_ci        Py_DECREF(s);
3737db96d56Sopenharmony_ci        return 0;
3747db96d56Sopenharmony_ci    }
3757db96d56Sopenharmony_ci
3767db96d56Sopenharmony_ci    PyObject* reader = PyObject_CallMethod(csv_module, "reader", "N", lines);
3777db96d56Sopenharmony_ci    if (reader) {
3787db96d56Sopenharmony_ci        /* Consume all of the reader as an iterator */
3797db96d56Sopenharmony_ci        PyObject* parsed_line;
3807db96d56Sopenharmony_ci        while ((parsed_line = PyIter_Next(reader))) {
3817db96d56Sopenharmony_ci            Py_DECREF(parsed_line);
3827db96d56Sopenharmony_ci        }
3837db96d56Sopenharmony_ci    }
3847db96d56Sopenharmony_ci
3857db96d56Sopenharmony_ci    /* Ignore csv.Error because we're probably going to generate
3867db96d56Sopenharmony_ci       some bad files (embedded new-lines, unterminated quotes etc) */
3877db96d56Sopenharmony_ci    if (PyErr_ExceptionMatches(csv_error)) {
3887db96d56Sopenharmony_ci        PyErr_Clear();
3897db96d56Sopenharmony_ci    }
3907db96d56Sopenharmony_ci
3917db96d56Sopenharmony_ci    Py_XDECREF(reader);
3927db96d56Sopenharmony_ci    Py_DECREF(s);
3937db96d56Sopenharmony_ci    return 0;
3947db96d56Sopenharmony_ci}
3957db96d56Sopenharmony_ci
3967db96d56Sopenharmony_ci#define MAX_AST_LITERAL_EVAL_TEST_SIZE 0x10000
3977db96d56Sopenharmony_ciPyObject* ast_literal_eval_method = NULL;
3987db96d56Sopenharmony_ci/* Called by LLVMFuzzerTestOneInput for initialization */
3997db96d56Sopenharmony_cistatic int init_ast_literal_eval(void) {
4007db96d56Sopenharmony_ci    PyObject* ast_module = PyImport_ImportModule("ast");
4017db96d56Sopenharmony_ci    if (ast_module == NULL) {
4027db96d56Sopenharmony_ci        return 0;
4037db96d56Sopenharmony_ci    }
4047db96d56Sopenharmony_ci    ast_literal_eval_method = PyObject_GetAttrString(ast_module, "literal_eval");
4057db96d56Sopenharmony_ci    return ast_literal_eval_method != NULL;
4067db96d56Sopenharmony_ci}
4077db96d56Sopenharmony_ci/* Fuzz ast.literal_eval(x) */
4087db96d56Sopenharmony_cistatic int fuzz_ast_literal_eval(const char* data, size_t size) {
4097db96d56Sopenharmony_ci    if (size > MAX_AST_LITERAL_EVAL_TEST_SIZE) {
4107db96d56Sopenharmony_ci        return 0;
4117db96d56Sopenharmony_ci    }
4127db96d56Sopenharmony_ci    /* Ignore non null-terminated strings since ast can't handle
4137db96d56Sopenharmony_ci       embedded nulls */
4147db96d56Sopenharmony_ci    if (memchr(data, '\0', size) == NULL) {
4157db96d56Sopenharmony_ci        return 0;
4167db96d56Sopenharmony_ci    }
4177db96d56Sopenharmony_ci
4187db96d56Sopenharmony_ci    PyObject* s = PyUnicode_FromString(data);
4197db96d56Sopenharmony_ci    /* Ignore exceptions until we have a valid string */
4207db96d56Sopenharmony_ci    if (s == NULL) {
4217db96d56Sopenharmony_ci        PyErr_Clear();
4227db96d56Sopenharmony_ci        return 0;
4237db96d56Sopenharmony_ci    }
4247db96d56Sopenharmony_ci
4257db96d56Sopenharmony_ci    PyObject* literal = PyObject_CallOneArg(ast_literal_eval_method, s);
4267db96d56Sopenharmony_ci    /* Ignore some common errors thrown by ast.literal_eval */
4277db96d56Sopenharmony_ci    if (literal == NULL && (PyErr_ExceptionMatches(PyExc_ValueError) ||
4287db96d56Sopenharmony_ci                            PyErr_ExceptionMatches(PyExc_TypeError) ||
4297db96d56Sopenharmony_ci                            PyErr_ExceptionMatches(PyExc_SyntaxError) ||
4307db96d56Sopenharmony_ci                            PyErr_ExceptionMatches(PyExc_MemoryError) ||
4317db96d56Sopenharmony_ci                            PyErr_ExceptionMatches(PyExc_RecursionError))
4327db96d56Sopenharmony_ci    ) {
4337db96d56Sopenharmony_ci        PyErr_Clear();
4347db96d56Sopenharmony_ci    }
4357db96d56Sopenharmony_ci
4367db96d56Sopenharmony_ci    Py_XDECREF(literal);
4377db96d56Sopenharmony_ci    Py_DECREF(s);
4387db96d56Sopenharmony_ci    return 0;
4397db96d56Sopenharmony_ci}
4407db96d56Sopenharmony_ci
4417db96d56Sopenharmony_ci/* Run fuzzer and abort on failure. */
4427db96d56Sopenharmony_cistatic int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
4437db96d56Sopenharmony_ci    int rv = fuzzer((const char*) data, size);
4447db96d56Sopenharmony_ci    if (PyErr_Occurred()) {
4457db96d56Sopenharmony_ci        /* Fuzz tests should handle expected errors for themselves.
4467db96d56Sopenharmony_ci           This is last-ditch check in case they didn't. */
4477db96d56Sopenharmony_ci        PyErr_Print();
4487db96d56Sopenharmony_ci        abort();
4497db96d56Sopenharmony_ci    }
4507db96d56Sopenharmony_ci    /* Someday the return value might mean something, propagate it. */
4517db96d56Sopenharmony_ci    return rv;
4527db96d56Sopenharmony_ci}
4537db96d56Sopenharmony_ci
4547db96d56Sopenharmony_ci/* CPython generates a lot of leak warnings for whatever reason. */
4557db96d56Sopenharmony_ciint __lsan_is_turned_off(void) { return 1; }
4567db96d56Sopenharmony_ci
4577db96d56Sopenharmony_ci
4587db96d56Sopenharmony_ciint LLVMFuzzerInitialize(int *argc, char ***argv) {
4597db96d56Sopenharmony_ci    PyConfig config;
4607db96d56Sopenharmony_ci    PyConfig_InitPythonConfig(&config);
4617db96d56Sopenharmony_ci    config.install_signal_handlers = 0;
4627db96d56Sopenharmony_ci    PyStatus status;
4637db96d56Sopenharmony_ci    status = PyConfig_SetBytesString(&config, &config.program_name, *argv[0]);
4647db96d56Sopenharmony_ci    if (PyStatus_Exception(status)) {
4657db96d56Sopenharmony_ci        goto fail;
4667db96d56Sopenharmony_ci    }
4677db96d56Sopenharmony_ci
4687db96d56Sopenharmony_ci    status = Py_InitializeFromConfig(&config);
4697db96d56Sopenharmony_ci    if (PyStatus_Exception(status)) {
4707db96d56Sopenharmony_ci        goto fail;
4717db96d56Sopenharmony_ci    }
4727db96d56Sopenharmony_ci    PyConfig_Clear(&config);
4737db96d56Sopenharmony_ci
4747db96d56Sopenharmony_ci    return 0;
4757db96d56Sopenharmony_ci
4767db96d56Sopenharmony_cifail:
4777db96d56Sopenharmony_ci    PyConfig_Clear(&config);
4787db96d56Sopenharmony_ci    Py_ExitStatusException(status);
4797db96d56Sopenharmony_ci}
4807db96d56Sopenharmony_ci
4817db96d56Sopenharmony_ci/* Fuzz test interface.
4827db96d56Sopenharmony_ci   This returns the bitwise or of all fuzz test's return values.
4837db96d56Sopenharmony_ci
4847db96d56Sopenharmony_ci   All fuzz tests must return 0, as all nonzero return codes are reserved for
4857db96d56Sopenharmony_ci   future use -- we propagate the return values for that future case.
4867db96d56Sopenharmony_ci   (And we bitwise or when running multiple tests to verify that normally we
4877db96d56Sopenharmony_ci   only return 0.) */
4887db96d56Sopenharmony_ciint LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
4897db96d56Sopenharmony_ci    assert(Py_IsInitialized());
4907db96d56Sopenharmony_ci
4917db96d56Sopenharmony_ci    int rv = 0;
4927db96d56Sopenharmony_ci
4937db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_float)
4947db96d56Sopenharmony_ci    rv |= _run_fuzz(data, size, fuzz_builtin_float);
4957db96d56Sopenharmony_ci#endif
4967db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_int)
4977db96d56Sopenharmony_ci    rv |= _run_fuzz(data, size, fuzz_builtin_int);
4987db96d56Sopenharmony_ci#endif
4997db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_unicode)
5007db96d56Sopenharmony_ci    rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
5017db96d56Sopenharmony_ci#endif
5027db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_struct_unpack)
5037db96d56Sopenharmony_ci    static int STRUCT_UNPACK_INITIALIZED = 0;
5047db96d56Sopenharmony_ci    if (!STRUCT_UNPACK_INITIALIZED && !init_struct_unpack()) {
5057db96d56Sopenharmony_ci        PyErr_Print();
5067db96d56Sopenharmony_ci        abort();
5077db96d56Sopenharmony_ci    } else {
5087db96d56Sopenharmony_ci        STRUCT_UNPACK_INITIALIZED = 1;
5097db96d56Sopenharmony_ci    }
5107db96d56Sopenharmony_ci    rv |= _run_fuzz(data, size, fuzz_struct_unpack);
5117db96d56Sopenharmony_ci#endif
5127db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
5137db96d56Sopenharmony_ci    static int JSON_LOADS_INITIALIZED = 0;
5147db96d56Sopenharmony_ci    if (!JSON_LOADS_INITIALIZED && !init_json_loads()) {
5157db96d56Sopenharmony_ci        PyErr_Print();
5167db96d56Sopenharmony_ci        abort();
5177db96d56Sopenharmony_ci    } else {
5187db96d56Sopenharmony_ci        JSON_LOADS_INITIALIZED = 1;
5197db96d56Sopenharmony_ci    }
5207db96d56Sopenharmony_ci
5217db96d56Sopenharmony_ci    rv |= _run_fuzz(data, size, fuzz_json_loads);
5227db96d56Sopenharmony_ci#endif
5237db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile)
5247db96d56Sopenharmony_ci    static int SRE_COMPILE_INITIALIZED = 0;
5257db96d56Sopenharmony_ci    if (!SRE_COMPILE_INITIALIZED && !init_sre_compile()) {
5267db96d56Sopenharmony_ci        PyErr_Print();
5277db96d56Sopenharmony_ci        abort();
5287db96d56Sopenharmony_ci    } else {
5297db96d56Sopenharmony_ci        SRE_COMPILE_INITIALIZED = 1;
5307db96d56Sopenharmony_ci    }
5317db96d56Sopenharmony_ci
5327db96d56Sopenharmony_ci    rv |= _run_fuzz(data, size, fuzz_sre_compile);
5337db96d56Sopenharmony_ci#endif
5347db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match)
5357db96d56Sopenharmony_ci    static int SRE_MATCH_INITIALIZED = 0;
5367db96d56Sopenharmony_ci    if (!SRE_MATCH_INITIALIZED && !init_sre_match()) {
5377db96d56Sopenharmony_ci        PyErr_Print();
5387db96d56Sopenharmony_ci        abort();
5397db96d56Sopenharmony_ci    } else {
5407db96d56Sopenharmony_ci        SRE_MATCH_INITIALIZED = 1;
5417db96d56Sopenharmony_ci    }
5427db96d56Sopenharmony_ci
5437db96d56Sopenharmony_ci    rv |= _run_fuzz(data, size, fuzz_sre_match);
5447db96d56Sopenharmony_ci#endif
5457db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader)
5467db96d56Sopenharmony_ci    static int CSV_READER_INITIALIZED = 0;
5477db96d56Sopenharmony_ci    if (!CSV_READER_INITIALIZED && !init_csv_reader()) {
5487db96d56Sopenharmony_ci        PyErr_Print();
5497db96d56Sopenharmony_ci        abort();
5507db96d56Sopenharmony_ci    } else {
5517db96d56Sopenharmony_ci        CSV_READER_INITIALIZED = 1;
5527db96d56Sopenharmony_ci    }
5537db96d56Sopenharmony_ci
5547db96d56Sopenharmony_ci    rv |= _run_fuzz(data, size, fuzz_csv_reader);
5557db96d56Sopenharmony_ci#endif
5567db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_ast_literal_eval)
5577db96d56Sopenharmony_ci    static int AST_LITERAL_EVAL_INITIALIZED = 0;
5587db96d56Sopenharmony_ci    if (!AST_LITERAL_EVAL_INITIALIZED && !init_ast_literal_eval()) {
5597db96d56Sopenharmony_ci        PyErr_Print();
5607db96d56Sopenharmony_ci        abort();
5617db96d56Sopenharmony_ci    } else {
5627db96d56Sopenharmony_ci        AST_LITERAL_EVAL_INITIALIZED = 1;
5637db96d56Sopenharmony_ci    }
5647db96d56Sopenharmony_ci
5657db96d56Sopenharmony_ci    rv |= _run_fuzz(data, size, fuzz_ast_literal_eval);
5667db96d56Sopenharmony_ci#endif
5677db96d56Sopenharmony_ci  return rv;
5687db96d56Sopenharmony_ci}
569