17db96d56Sopenharmony_ci/* A fuzz test for CPython. 27db96d56Sopenharmony_ci 37db96d56Sopenharmony_ci The only exposed function is LLVMFuzzerTestOneInput, which is called by 47db96d56Sopenharmony_ci fuzzers and by the _fuzz module for smoke tests. 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_ci To build exactly one fuzz test, as when running in oss-fuzz etc., 77db96d56Sopenharmony_ci build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build 87db96d56Sopenharmony_ci LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with 97db96d56Sopenharmony_ci -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float. 107db96d56Sopenharmony_ci 117db96d56Sopenharmony_ci See the source code for LLVMFuzzerTestOneInput for details. */ 127db96d56Sopenharmony_ci 137db96d56Sopenharmony_ci#include <Python.h> 147db96d56Sopenharmony_ci#include <stdlib.h> 157db96d56Sopenharmony_ci#include <inttypes.h> 167db96d56Sopenharmony_ci 177db96d56Sopenharmony_ci/* Fuzz PyFloat_FromString as a proxy for float(str). */ 187db96d56Sopenharmony_cistatic int fuzz_builtin_float(const char* data, size_t size) { 197db96d56Sopenharmony_ci PyObject* s = PyBytes_FromStringAndSize(data, size); 207db96d56Sopenharmony_ci if (s == NULL) return 0; 217db96d56Sopenharmony_ci PyObject* f = PyFloat_FromString(s); 227db96d56Sopenharmony_ci if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) { 237db96d56Sopenharmony_ci PyErr_Clear(); 247db96d56Sopenharmony_ci } 257db96d56Sopenharmony_ci 267db96d56Sopenharmony_ci Py_XDECREF(f); 277db96d56Sopenharmony_ci Py_DECREF(s); 287db96d56Sopenharmony_ci return 0; 297db96d56Sopenharmony_ci} 307db96d56Sopenharmony_ci 317db96d56Sopenharmony_ci#define MAX_INT_TEST_SIZE 0x10000 327db96d56Sopenharmony_ci 337db96d56Sopenharmony_ci/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */ 347db96d56Sopenharmony_cistatic int fuzz_builtin_int(const char* data, size_t size) { 357db96d56Sopenharmony_ci /* Ignore test cases with very long ints to avoid timeouts 367db96d56Sopenharmony_ci int("9" * 1000000) is not a very interesting test caase */ 377db96d56Sopenharmony_ci if (size > MAX_INT_TEST_SIZE) { 387db96d56Sopenharmony_ci return 0; 397db96d56Sopenharmony_ci } 407db96d56Sopenharmony_ci /* Pick a random valid base. (When the fuzzed function takes extra 417db96d56Sopenharmony_ci parameters, it's somewhat normal to hash the input to generate those 427db96d56Sopenharmony_ci parameters. We want to exercise all code paths, so we do so here.) */ 437db96d56Sopenharmony_ci int base = _Py_HashBytes(data, size) % 37; 447db96d56Sopenharmony_ci if (base == 1) { 457db96d56Sopenharmony_ci // 1 is the only number between 0 and 36 that is not a valid base. 467db96d56Sopenharmony_ci base = 0; 477db96d56Sopenharmony_ci } 487db96d56Sopenharmony_ci if (base == -1) { 497db96d56Sopenharmony_ci return 0; // An error occurred, bail early. 507db96d56Sopenharmony_ci } 517db96d56Sopenharmony_ci if (base < 0) { 527db96d56Sopenharmony_ci base = -base; 537db96d56Sopenharmony_ci } 547db96d56Sopenharmony_ci 557db96d56Sopenharmony_ci PyObject* s = PyUnicode_FromStringAndSize(data, size); 567db96d56Sopenharmony_ci if (s == NULL) { 577db96d56Sopenharmony_ci if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { 587db96d56Sopenharmony_ci PyErr_Clear(); 597db96d56Sopenharmony_ci } 607db96d56Sopenharmony_ci return 0; 617db96d56Sopenharmony_ci } 627db96d56Sopenharmony_ci PyObject* l = PyLong_FromUnicodeObject(s, base); 637db96d56Sopenharmony_ci if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) { 647db96d56Sopenharmony_ci PyErr_Clear(); 657db96d56Sopenharmony_ci } 667db96d56Sopenharmony_ci PyErr_Clear(); 677db96d56Sopenharmony_ci Py_XDECREF(l); 687db96d56Sopenharmony_ci Py_DECREF(s); 697db96d56Sopenharmony_ci return 0; 707db96d56Sopenharmony_ci} 717db96d56Sopenharmony_ci 727db96d56Sopenharmony_ci/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */ 737db96d56Sopenharmony_cistatic int fuzz_builtin_unicode(const char* data, size_t size) { 747db96d56Sopenharmony_ci PyObject* s = PyUnicode_FromStringAndSize(data, size); 757db96d56Sopenharmony_ci if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { 767db96d56Sopenharmony_ci PyErr_Clear(); 777db96d56Sopenharmony_ci } 787db96d56Sopenharmony_ci Py_XDECREF(s); 797db96d56Sopenharmony_ci return 0; 807db96d56Sopenharmony_ci} 817db96d56Sopenharmony_ci 827db96d56Sopenharmony_ci 837db96d56Sopenharmony_ciPyObject* struct_unpack_method = NULL; 847db96d56Sopenharmony_ciPyObject* struct_error = NULL; 857db96d56Sopenharmony_ci/* Called by LLVMFuzzerTestOneInput for initialization */ 867db96d56Sopenharmony_cistatic int init_struct_unpack(void) { 877db96d56Sopenharmony_ci /* Import struct.unpack */ 887db96d56Sopenharmony_ci PyObject* struct_module = PyImport_ImportModule("struct"); 897db96d56Sopenharmony_ci if (struct_module == NULL) { 907db96d56Sopenharmony_ci return 0; 917db96d56Sopenharmony_ci } 927db96d56Sopenharmony_ci struct_error = PyObject_GetAttrString(struct_module, "error"); 937db96d56Sopenharmony_ci if (struct_error == NULL) { 947db96d56Sopenharmony_ci return 0; 957db96d56Sopenharmony_ci } 967db96d56Sopenharmony_ci struct_unpack_method = PyObject_GetAttrString(struct_module, "unpack"); 977db96d56Sopenharmony_ci return struct_unpack_method != NULL; 987db96d56Sopenharmony_ci} 997db96d56Sopenharmony_ci/* Fuzz struct.unpack(x, y) */ 1007db96d56Sopenharmony_cistatic int fuzz_struct_unpack(const char* data, size_t size) { 1017db96d56Sopenharmony_ci /* Everything up to the first null byte is considered the 1027db96d56Sopenharmony_ci format. Everything after is the buffer */ 1037db96d56Sopenharmony_ci const char* first_null = memchr(data, '\0', size); 1047db96d56Sopenharmony_ci if (first_null == NULL) { 1057db96d56Sopenharmony_ci return 0; 1067db96d56Sopenharmony_ci } 1077db96d56Sopenharmony_ci 1087db96d56Sopenharmony_ci size_t format_length = first_null - data; 1097db96d56Sopenharmony_ci size_t buffer_length = size - format_length - 1; 1107db96d56Sopenharmony_ci 1117db96d56Sopenharmony_ci PyObject* pattern = PyBytes_FromStringAndSize(data, format_length); 1127db96d56Sopenharmony_ci if (pattern == NULL) { 1137db96d56Sopenharmony_ci return 0; 1147db96d56Sopenharmony_ci } 1157db96d56Sopenharmony_ci PyObject* buffer = PyBytes_FromStringAndSize(first_null + 1, buffer_length); 1167db96d56Sopenharmony_ci if (buffer == NULL) { 1177db96d56Sopenharmony_ci Py_DECREF(pattern); 1187db96d56Sopenharmony_ci return 0; 1197db96d56Sopenharmony_ci } 1207db96d56Sopenharmony_ci 1217db96d56Sopenharmony_ci PyObject* unpacked = PyObject_CallFunctionObjArgs( 1227db96d56Sopenharmony_ci struct_unpack_method, pattern, buffer, NULL); 1237db96d56Sopenharmony_ci /* Ignore any overflow errors, these are easily triggered accidentally */ 1247db96d56Sopenharmony_ci if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_OverflowError)) { 1257db96d56Sopenharmony_ci PyErr_Clear(); 1267db96d56Sopenharmony_ci } 1277db96d56Sopenharmony_ci /* The pascal format string will throw a negative size when passing 0 1287db96d56Sopenharmony_ci like: struct.unpack('0p', b'') */ 1297db96d56Sopenharmony_ci if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_SystemError)) { 1307db96d56Sopenharmony_ci PyErr_Clear(); 1317db96d56Sopenharmony_ci } 1327db96d56Sopenharmony_ci /* Ignore any struct.error exceptions, these can be caused by invalid 1337db96d56Sopenharmony_ci formats or incomplete buffers both of which are common. */ 1347db96d56Sopenharmony_ci if (unpacked == NULL && PyErr_ExceptionMatches(struct_error)) { 1357db96d56Sopenharmony_ci PyErr_Clear(); 1367db96d56Sopenharmony_ci } 1377db96d56Sopenharmony_ci 1387db96d56Sopenharmony_ci Py_XDECREF(unpacked); 1397db96d56Sopenharmony_ci Py_DECREF(pattern); 1407db96d56Sopenharmony_ci Py_DECREF(buffer); 1417db96d56Sopenharmony_ci return 0; 1427db96d56Sopenharmony_ci} 1437db96d56Sopenharmony_ci 1447db96d56Sopenharmony_ci 1457db96d56Sopenharmony_ci#define MAX_JSON_TEST_SIZE 0x10000 1467db96d56Sopenharmony_ci 1477db96d56Sopenharmony_ciPyObject* json_loads_method = NULL; 1487db96d56Sopenharmony_ci/* Called by LLVMFuzzerTestOneInput for initialization */ 1497db96d56Sopenharmony_cistatic int init_json_loads(void) { 1507db96d56Sopenharmony_ci /* Import json.loads */ 1517db96d56Sopenharmony_ci PyObject* json_module = PyImport_ImportModule("json"); 1527db96d56Sopenharmony_ci if (json_module == NULL) { 1537db96d56Sopenharmony_ci return 0; 1547db96d56Sopenharmony_ci } 1557db96d56Sopenharmony_ci json_loads_method = PyObject_GetAttrString(json_module, "loads"); 1567db96d56Sopenharmony_ci return json_loads_method != NULL; 1577db96d56Sopenharmony_ci} 1587db96d56Sopenharmony_ci/* Fuzz json.loads(x) */ 1597db96d56Sopenharmony_cistatic int fuzz_json_loads(const char* data, size_t size) { 1607db96d56Sopenharmony_ci /* Since python supports arbitrarily large ints in JSON, 1617db96d56Sopenharmony_ci long inputs can lead to timeouts on boring inputs like 1627db96d56Sopenharmony_ci `json.loads("9" * 100000)` */ 1637db96d56Sopenharmony_ci if (size > MAX_JSON_TEST_SIZE) { 1647db96d56Sopenharmony_ci return 0; 1657db96d56Sopenharmony_ci } 1667db96d56Sopenharmony_ci PyObject* input_bytes = PyBytes_FromStringAndSize(data, size); 1677db96d56Sopenharmony_ci if (input_bytes == NULL) { 1687db96d56Sopenharmony_ci return 0; 1697db96d56Sopenharmony_ci } 1707db96d56Sopenharmony_ci PyObject* parsed = PyObject_CallOneArg(json_loads_method, input_bytes); 1717db96d56Sopenharmony_ci if (parsed == NULL) { 1727db96d56Sopenharmony_ci /* Ignore ValueError as the fuzzer will more than likely 1737db96d56Sopenharmony_ci generate some invalid json and values */ 1747db96d56Sopenharmony_ci if (PyErr_ExceptionMatches(PyExc_ValueError) || 1757db96d56Sopenharmony_ci /* Ignore RecursionError as the fuzzer generates long sequences of 1767db96d56Sopenharmony_ci arrays such as `[[[...` */ 1777db96d56Sopenharmony_ci PyErr_ExceptionMatches(PyExc_RecursionError) || 1787db96d56Sopenharmony_ci /* Ignore unicode errors, invalid byte sequences are common */ 1797db96d56Sopenharmony_ci PyErr_ExceptionMatches(PyExc_UnicodeDecodeError) 1807db96d56Sopenharmony_ci ) { 1817db96d56Sopenharmony_ci PyErr_Clear(); 1827db96d56Sopenharmony_ci } 1837db96d56Sopenharmony_ci } 1847db96d56Sopenharmony_ci Py_DECREF(input_bytes); 1857db96d56Sopenharmony_ci Py_XDECREF(parsed); 1867db96d56Sopenharmony_ci return 0; 1877db96d56Sopenharmony_ci} 1887db96d56Sopenharmony_ci 1897db96d56Sopenharmony_ci#define MAX_RE_TEST_SIZE 0x10000 1907db96d56Sopenharmony_ci 1917db96d56Sopenharmony_ciPyObject* sre_compile_method = NULL; 1927db96d56Sopenharmony_ciPyObject* sre_error_exception = NULL; 1937db96d56Sopenharmony_ciint SRE_FLAG_DEBUG = 0; 1947db96d56Sopenharmony_ci/* Called by LLVMFuzzerTestOneInput for initialization */ 1957db96d56Sopenharmony_cistatic int init_sre_compile(void) { 1967db96d56Sopenharmony_ci /* Import sre_compile.compile and sre.error */ 1977db96d56Sopenharmony_ci PyObject* sre_compile_module = PyImport_ImportModule("sre_compile"); 1987db96d56Sopenharmony_ci if (sre_compile_module == NULL) { 1997db96d56Sopenharmony_ci return 0; 2007db96d56Sopenharmony_ci } 2017db96d56Sopenharmony_ci sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile"); 2027db96d56Sopenharmony_ci if (sre_compile_method == NULL) { 2037db96d56Sopenharmony_ci return 0; 2047db96d56Sopenharmony_ci } 2057db96d56Sopenharmony_ci 2067db96d56Sopenharmony_ci PyObject* sre_constants = PyImport_ImportModule("sre_constants"); 2077db96d56Sopenharmony_ci if (sre_constants == NULL) { 2087db96d56Sopenharmony_ci return 0; 2097db96d56Sopenharmony_ci } 2107db96d56Sopenharmony_ci sre_error_exception = PyObject_GetAttrString(sre_constants, "error"); 2117db96d56Sopenharmony_ci if (sre_error_exception == NULL) { 2127db96d56Sopenharmony_ci return 0; 2137db96d56Sopenharmony_ci } 2147db96d56Sopenharmony_ci PyObject* debug_flag = PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG"); 2157db96d56Sopenharmony_ci if (debug_flag == NULL) { 2167db96d56Sopenharmony_ci return 0; 2177db96d56Sopenharmony_ci } 2187db96d56Sopenharmony_ci SRE_FLAG_DEBUG = PyLong_AsLong(debug_flag); 2197db96d56Sopenharmony_ci return 1; 2207db96d56Sopenharmony_ci} 2217db96d56Sopenharmony_ci/* Fuzz _sre.compile(x) */ 2227db96d56Sopenharmony_cistatic int fuzz_sre_compile(const char* data, size_t size) { 2237db96d56Sopenharmony_ci /* Ignore really long regex patterns that will timeout the fuzzer */ 2247db96d56Sopenharmony_ci if (size > MAX_RE_TEST_SIZE) { 2257db96d56Sopenharmony_ci return 0; 2267db96d56Sopenharmony_ci } 2277db96d56Sopenharmony_ci /* We treat the first 2 bytes of the input as a number for the flags */ 2287db96d56Sopenharmony_ci if (size < 2) { 2297db96d56Sopenharmony_ci return 0; 2307db96d56Sopenharmony_ci } 2317db96d56Sopenharmony_ci uint16_t flags = ((uint16_t*) data)[0]; 2327db96d56Sopenharmony_ci /* We remove the SRE_FLAG_DEBUG if present. This is because it 2337db96d56Sopenharmony_ci prints to stdout which greatly decreases fuzzing speed */ 2347db96d56Sopenharmony_ci flags &= ~SRE_FLAG_DEBUG; 2357db96d56Sopenharmony_ci 2367db96d56Sopenharmony_ci /* Pull the pattern from the remaining bytes */ 2377db96d56Sopenharmony_ci PyObject* pattern_bytes = PyBytes_FromStringAndSize(data + 2, size - 2); 2387db96d56Sopenharmony_ci if (pattern_bytes == NULL) { 2397db96d56Sopenharmony_ci return 0; 2407db96d56Sopenharmony_ci } 2417db96d56Sopenharmony_ci PyObject* flags_obj = PyLong_FromUnsignedLong(flags); 2427db96d56Sopenharmony_ci if (flags_obj == NULL) { 2437db96d56Sopenharmony_ci Py_DECREF(pattern_bytes); 2447db96d56Sopenharmony_ci return 0; 2457db96d56Sopenharmony_ci } 2467db96d56Sopenharmony_ci 2477db96d56Sopenharmony_ci /* compiled = _sre.compile(data[2:], data[0:2] */ 2487db96d56Sopenharmony_ci PyObject* compiled = PyObject_CallFunctionObjArgs( 2497db96d56Sopenharmony_ci sre_compile_method, pattern_bytes, flags_obj, NULL); 2507db96d56Sopenharmony_ci /* Ignore ValueError as the fuzzer will more than likely 2517db96d56Sopenharmony_ci generate some invalid combination of flags */ 2527db96d56Sopenharmony_ci if (compiled == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) { 2537db96d56Sopenharmony_ci PyErr_Clear(); 2547db96d56Sopenharmony_ci } 2557db96d56Sopenharmony_ci /* Ignore some common errors thrown by sre_parse: 2567db96d56Sopenharmony_ci Overflow, Assertion, Recursion and Index */ 2577db96d56Sopenharmony_ci if (compiled == NULL && (PyErr_ExceptionMatches(PyExc_OverflowError) || 2587db96d56Sopenharmony_ci PyErr_ExceptionMatches(PyExc_AssertionError) || 2597db96d56Sopenharmony_ci PyErr_ExceptionMatches(PyExc_RecursionError) || 2607db96d56Sopenharmony_ci PyErr_ExceptionMatches(PyExc_IndexError)) 2617db96d56Sopenharmony_ci ) { 2627db96d56Sopenharmony_ci PyErr_Clear(); 2637db96d56Sopenharmony_ci } 2647db96d56Sopenharmony_ci /* Ignore re.error */ 2657db96d56Sopenharmony_ci if (compiled == NULL && PyErr_ExceptionMatches(sre_error_exception)) { 2667db96d56Sopenharmony_ci PyErr_Clear(); 2677db96d56Sopenharmony_ci } 2687db96d56Sopenharmony_ci 2697db96d56Sopenharmony_ci Py_DECREF(pattern_bytes); 2707db96d56Sopenharmony_ci Py_DECREF(flags_obj); 2717db96d56Sopenharmony_ci Py_XDECREF(compiled); 2727db96d56Sopenharmony_ci return 0; 2737db96d56Sopenharmony_ci} 2747db96d56Sopenharmony_ci 2757db96d56Sopenharmony_ci/* Some random patterns used to test re.match. 2767db96d56Sopenharmony_ci Be careful not to add catostraphically slow regexes here, we want to 2777db96d56Sopenharmony_ci exercise the matching code without causing timeouts.*/ 2787db96d56Sopenharmony_cistatic const char* regex_patterns[] = { 2797db96d56Sopenharmony_ci ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]", 2807db96d56Sopenharmony_ci "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?", 2817db96d56Sopenharmony_ci "{}", "a{,}", "{", "}", "^\\(*\\d{3}\\)*( |-)*\\d{3}( |-)*\\d{4}$", 2827db96d56Sopenharmony_ci "(?:a*)*", "a{1,2}?" 2837db96d56Sopenharmony_ci}; 2847db96d56Sopenharmony_ciconst size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]); 2857db96d56Sopenharmony_ciPyObject** compiled_patterns = NULL; 2867db96d56Sopenharmony_ci/* Called by LLVMFuzzerTestOneInput for initialization */ 2877db96d56Sopenharmony_cistatic int init_sre_match(void) { 2887db96d56Sopenharmony_ci PyObject* re_module = PyImport_ImportModule("re"); 2897db96d56Sopenharmony_ci if (re_module == NULL) { 2907db96d56Sopenharmony_ci return 0; 2917db96d56Sopenharmony_ci } 2927db96d56Sopenharmony_ci compiled_patterns = (PyObject**) PyMem_RawMalloc( 2937db96d56Sopenharmony_ci sizeof(PyObject*) * NUM_PATTERNS); 2947db96d56Sopenharmony_ci if (compiled_patterns == NULL) { 2957db96d56Sopenharmony_ci PyErr_NoMemory(); 2967db96d56Sopenharmony_ci return 0; 2977db96d56Sopenharmony_ci } 2987db96d56Sopenharmony_ci 2997db96d56Sopenharmony_ci /* Precompile all the regex patterns on the first run for faster fuzzing */ 3007db96d56Sopenharmony_ci for (size_t i = 0; i < NUM_PATTERNS; i++) { 3017db96d56Sopenharmony_ci PyObject* compiled = PyObject_CallMethod( 3027db96d56Sopenharmony_ci re_module, "compile", "y", regex_patterns[i]); 3037db96d56Sopenharmony_ci /* Bail if any of the patterns fail to compile */ 3047db96d56Sopenharmony_ci if (compiled == NULL) { 3057db96d56Sopenharmony_ci return 0; 3067db96d56Sopenharmony_ci } 3077db96d56Sopenharmony_ci compiled_patterns[i] = compiled; 3087db96d56Sopenharmony_ci } 3097db96d56Sopenharmony_ci return 1; 3107db96d56Sopenharmony_ci} 3117db96d56Sopenharmony_ci/* Fuzz re.match(x) */ 3127db96d56Sopenharmony_cistatic int fuzz_sre_match(const char* data, size_t size) { 3137db96d56Sopenharmony_ci if (size < 1 || size > MAX_RE_TEST_SIZE) { 3147db96d56Sopenharmony_ci return 0; 3157db96d56Sopenharmony_ci } 3167db96d56Sopenharmony_ci /* Use the first byte as a uint8_t specifying the index of the 3177db96d56Sopenharmony_ci regex to use */ 3187db96d56Sopenharmony_ci unsigned char idx = (unsigned char) data[0]; 3197db96d56Sopenharmony_ci idx = idx % NUM_PATTERNS; 3207db96d56Sopenharmony_ci 3217db96d56Sopenharmony_ci /* Pull the string to match from the remaining bytes */ 3227db96d56Sopenharmony_ci PyObject* to_match = PyBytes_FromStringAndSize(data + 1, size - 1); 3237db96d56Sopenharmony_ci if (to_match == NULL) { 3247db96d56Sopenharmony_ci return 0; 3257db96d56Sopenharmony_ci } 3267db96d56Sopenharmony_ci 3277db96d56Sopenharmony_ci PyObject* pattern = compiled_patterns[idx]; 3287db96d56Sopenharmony_ci PyObject* match_callable = PyObject_GetAttrString(pattern, "match"); 3297db96d56Sopenharmony_ci 3307db96d56Sopenharmony_ci PyObject* matches = PyObject_CallOneArg(match_callable, to_match); 3317db96d56Sopenharmony_ci 3327db96d56Sopenharmony_ci Py_XDECREF(matches); 3337db96d56Sopenharmony_ci Py_DECREF(match_callable); 3347db96d56Sopenharmony_ci Py_DECREF(to_match); 3357db96d56Sopenharmony_ci return 0; 3367db96d56Sopenharmony_ci} 3377db96d56Sopenharmony_ci 3387db96d56Sopenharmony_ci#define MAX_CSV_TEST_SIZE 0x10000 3397db96d56Sopenharmony_ciPyObject* csv_module = NULL; 3407db96d56Sopenharmony_ciPyObject* csv_error = NULL; 3417db96d56Sopenharmony_ci/* Called by LLVMFuzzerTestOneInput for initialization */ 3427db96d56Sopenharmony_cistatic int init_csv_reader(void) { 3437db96d56Sopenharmony_ci /* Import csv and csv.Error */ 3447db96d56Sopenharmony_ci csv_module = PyImport_ImportModule("csv"); 3457db96d56Sopenharmony_ci if (csv_module == NULL) { 3467db96d56Sopenharmony_ci return 0; 3477db96d56Sopenharmony_ci } 3487db96d56Sopenharmony_ci csv_error = PyObject_GetAttrString(csv_module, "Error"); 3497db96d56Sopenharmony_ci return csv_error != NULL; 3507db96d56Sopenharmony_ci} 3517db96d56Sopenharmony_ci/* Fuzz csv.reader([x]) */ 3527db96d56Sopenharmony_cistatic int fuzz_csv_reader(const char* data, size_t size) { 3537db96d56Sopenharmony_ci if (size < 1 || size > MAX_CSV_TEST_SIZE) { 3547db96d56Sopenharmony_ci return 0; 3557db96d56Sopenharmony_ci } 3567db96d56Sopenharmony_ci /* Ignore non null-terminated strings since _csv can't handle 3577db96d56Sopenharmony_ci embedded nulls */ 3587db96d56Sopenharmony_ci if (memchr(data, '\0', size) == NULL) { 3597db96d56Sopenharmony_ci return 0; 3607db96d56Sopenharmony_ci } 3617db96d56Sopenharmony_ci 3627db96d56Sopenharmony_ci PyObject* s = PyUnicode_FromString(data); 3637db96d56Sopenharmony_ci /* Ignore exceptions until we have a valid string */ 3647db96d56Sopenharmony_ci if (s == NULL) { 3657db96d56Sopenharmony_ci PyErr_Clear(); 3667db96d56Sopenharmony_ci return 0; 3677db96d56Sopenharmony_ci } 3687db96d56Sopenharmony_ci 3697db96d56Sopenharmony_ci /* Split on \n so we can test multiple lines */ 3707db96d56Sopenharmony_ci PyObject* lines = PyObject_CallMethod(s, "split", "s", "\n"); 3717db96d56Sopenharmony_ci if (lines == NULL) { 3727db96d56Sopenharmony_ci Py_DECREF(s); 3737db96d56Sopenharmony_ci return 0; 3747db96d56Sopenharmony_ci } 3757db96d56Sopenharmony_ci 3767db96d56Sopenharmony_ci PyObject* reader = PyObject_CallMethod(csv_module, "reader", "N", lines); 3777db96d56Sopenharmony_ci if (reader) { 3787db96d56Sopenharmony_ci /* Consume all of the reader as an iterator */ 3797db96d56Sopenharmony_ci PyObject* parsed_line; 3807db96d56Sopenharmony_ci while ((parsed_line = PyIter_Next(reader))) { 3817db96d56Sopenharmony_ci Py_DECREF(parsed_line); 3827db96d56Sopenharmony_ci } 3837db96d56Sopenharmony_ci } 3847db96d56Sopenharmony_ci 3857db96d56Sopenharmony_ci /* Ignore csv.Error because we're probably going to generate 3867db96d56Sopenharmony_ci some bad files (embedded new-lines, unterminated quotes etc) */ 3877db96d56Sopenharmony_ci if (PyErr_ExceptionMatches(csv_error)) { 3887db96d56Sopenharmony_ci PyErr_Clear(); 3897db96d56Sopenharmony_ci } 3907db96d56Sopenharmony_ci 3917db96d56Sopenharmony_ci Py_XDECREF(reader); 3927db96d56Sopenharmony_ci Py_DECREF(s); 3937db96d56Sopenharmony_ci return 0; 3947db96d56Sopenharmony_ci} 3957db96d56Sopenharmony_ci 3967db96d56Sopenharmony_ci#define MAX_AST_LITERAL_EVAL_TEST_SIZE 0x10000 3977db96d56Sopenharmony_ciPyObject* ast_literal_eval_method = NULL; 3987db96d56Sopenharmony_ci/* Called by LLVMFuzzerTestOneInput for initialization */ 3997db96d56Sopenharmony_cistatic int init_ast_literal_eval(void) { 4007db96d56Sopenharmony_ci PyObject* ast_module = PyImport_ImportModule("ast"); 4017db96d56Sopenharmony_ci if (ast_module == NULL) { 4027db96d56Sopenharmony_ci return 0; 4037db96d56Sopenharmony_ci } 4047db96d56Sopenharmony_ci ast_literal_eval_method = PyObject_GetAttrString(ast_module, "literal_eval"); 4057db96d56Sopenharmony_ci return ast_literal_eval_method != NULL; 4067db96d56Sopenharmony_ci} 4077db96d56Sopenharmony_ci/* Fuzz ast.literal_eval(x) */ 4087db96d56Sopenharmony_cistatic int fuzz_ast_literal_eval(const char* data, size_t size) { 4097db96d56Sopenharmony_ci if (size > MAX_AST_LITERAL_EVAL_TEST_SIZE) { 4107db96d56Sopenharmony_ci return 0; 4117db96d56Sopenharmony_ci } 4127db96d56Sopenharmony_ci /* Ignore non null-terminated strings since ast can't handle 4137db96d56Sopenharmony_ci embedded nulls */ 4147db96d56Sopenharmony_ci if (memchr(data, '\0', size) == NULL) { 4157db96d56Sopenharmony_ci return 0; 4167db96d56Sopenharmony_ci } 4177db96d56Sopenharmony_ci 4187db96d56Sopenharmony_ci PyObject* s = PyUnicode_FromString(data); 4197db96d56Sopenharmony_ci /* Ignore exceptions until we have a valid string */ 4207db96d56Sopenharmony_ci if (s == NULL) { 4217db96d56Sopenharmony_ci PyErr_Clear(); 4227db96d56Sopenharmony_ci return 0; 4237db96d56Sopenharmony_ci } 4247db96d56Sopenharmony_ci 4257db96d56Sopenharmony_ci PyObject* literal = PyObject_CallOneArg(ast_literal_eval_method, s); 4267db96d56Sopenharmony_ci /* Ignore some common errors thrown by ast.literal_eval */ 4277db96d56Sopenharmony_ci if (literal == NULL && (PyErr_ExceptionMatches(PyExc_ValueError) || 4287db96d56Sopenharmony_ci PyErr_ExceptionMatches(PyExc_TypeError) || 4297db96d56Sopenharmony_ci PyErr_ExceptionMatches(PyExc_SyntaxError) || 4307db96d56Sopenharmony_ci PyErr_ExceptionMatches(PyExc_MemoryError) || 4317db96d56Sopenharmony_ci PyErr_ExceptionMatches(PyExc_RecursionError)) 4327db96d56Sopenharmony_ci ) { 4337db96d56Sopenharmony_ci PyErr_Clear(); 4347db96d56Sopenharmony_ci } 4357db96d56Sopenharmony_ci 4367db96d56Sopenharmony_ci Py_XDECREF(literal); 4377db96d56Sopenharmony_ci Py_DECREF(s); 4387db96d56Sopenharmony_ci return 0; 4397db96d56Sopenharmony_ci} 4407db96d56Sopenharmony_ci 4417db96d56Sopenharmony_ci/* Run fuzzer and abort on failure. */ 4427db96d56Sopenharmony_cistatic int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) { 4437db96d56Sopenharmony_ci int rv = fuzzer((const char*) data, size); 4447db96d56Sopenharmony_ci if (PyErr_Occurred()) { 4457db96d56Sopenharmony_ci /* Fuzz tests should handle expected errors for themselves. 4467db96d56Sopenharmony_ci This is last-ditch check in case they didn't. */ 4477db96d56Sopenharmony_ci PyErr_Print(); 4487db96d56Sopenharmony_ci abort(); 4497db96d56Sopenharmony_ci } 4507db96d56Sopenharmony_ci /* Someday the return value might mean something, propagate it. */ 4517db96d56Sopenharmony_ci return rv; 4527db96d56Sopenharmony_ci} 4537db96d56Sopenharmony_ci 4547db96d56Sopenharmony_ci/* CPython generates a lot of leak warnings for whatever reason. */ 4557db96d56Sopenharmony_ciint __lsan_is_turned_off(void) { return 1; } 4567db96d56Sopenharmony_ci 4577db96d56Sopenharmony_ci 4587db96d56Sopenharmony_ciint LLVMFuzzerInitialize(int *argc, char ***argv) { 4597db96d56Sopenharmony_ci PyConfig config; 4607db96d56Sopenharmony_ci PyConfig_InitPythonConfig(&config); 4617db96d56Sopenharmony_ci config.install_signal_handlers = 0; 4627db96d56Sopenharmony_ci PyStatus status; 4637db96d56Sopenharmony_ci status = PyConfig_SetBytesString(&config, &config.program_name, *argv[0]); 4647db96d56Sopenharmony_ci if (PyStatus_Exception(status)) { 4657db96d56Sopenharmony_ci goto fail; 4667db96d56Sopenharmony_ci } 4677db96d56Sopenharmony_ci 4687db96d56Sopenharmony_ci status = Py_InitializeFromConfig(&config); 4697db96d56Sopenharmony_ci if (PyStatus_Exception(status)) { 4707db96d56Sopenharmony_ci goto fail; 4717db96d56Sopenharmony_ci } 4727db96d56Sopenharmony_ci PyConfig_Clear(&config); 4737db96d56Sopenharmony_ci 4747db96d56Sopenharmony_ci return 0; 4757db96d56Sopenharmony_ci 4767db96d56Sopenharmony_cifail: 4777db96d56Sopenharmony_ci PyConfig_Clear(&config); 4787db96d56Sopenharmony_ci Py_ExitStatusException(status); 4797db96d56Sopenharmony_ci} 4807db96d56Sopenharmony_ci 4817db96d56Sopenharmony_ci/* Fuzz test interface. 4827db96d56Sopenharmony_ci This returns the bitwise or of all fuzz test's return values. 4837db96d56Sopenharmony_ci 4847db96d56Sopenharmony_ci All fuzz tests must return 0, as all nonzero return codes are reserved for 4857db96d56Sopenharmony_ci future use -- we propagate the return values for that future case. 4867db96d56Sopenharmony_ci (And we bitwise or when running multiple tests to verify that normally we 4877db96d56Sopenharmony_ci only return 0.) */ 4887db96d56Sopenharmony_ciint LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { 4897db96d56Sopenharmony_ci assert(Py_IsInitialized()); 4907db96d56Sopenharmony_ci 4917db96d56Sopenharmony_ci int rv = 0; 4927db96d56Sopenharmony_ci 4937db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_float) 4947db96d56Sopenharmony_ci rv |= _run_fuzz(data, size, fuzz_builtin_float); 4957db96d56Sopenharmony_ci#endif 4967db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_int) 4977db96d56Sopenharmony_ci rv |= _run_fuzz(data, size, fuzz_builtin_int); 4987db96d56Sopenharmony_ci#endif 4997db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_unicode) 5007db96d56Sopenharmony_ci rv |= _run_fuzz(data, size, fuzz_builtin_unicode); 5017db96d56Sopenharmony_ci#endif 5027db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_struct_unpack) 5037db96d56Sopenharmony_ci static int STRUCT_UNPACK_INITIALIZED = 0; 5047db96d56Sopenharmony_ci if (!STRUCT_UNPACK_INITIALIZED && !init_struct_unpack()) { 5057db96d56Sopenharmony_ci PyErr_Print(); 5067db96d56Sopenharmony_ci abort(); 5077db96d56Sopenharmony_ci } else { 5087db96d56Sopenharmony_ci STRUCT_UNPACK_INITIALIZED = 1; 5097db96d56Sopenharmony_ci } 5107db96d56Sopenharmony_ci rv |= _run_fuzz(data, size, fuzz_struct_unpack); 5117db96d56Sopenharmony_ci#endif 5127db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads) 5137db96d56Sopenharmony_ci static int JSON_LOADS_INITIALIZED = 0; 5147db96d56Sopenharmony_ci if (!JSON_LOADS_INITIALIZED && !init_json_loads()) { 5157db96d56Sopenharmony_ci PyErr_Print(); 5167db96d56Sopenharmony_ci abort(); 5177db96d56Sopenharmony_ci } else { 5187db96d56Sopenharmony_ci JSON_LOADS_INITIALIZED = 1; 5197db96d56Sopenharmony_ci } 5207db96d56Sopenharmony_ci 5217db96d56Sopenharmony_ci rv |= _run_fuzz(data, size, fuzz_json_loads); 5227db96d56Sopenharmony_ci#endif 5237db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile) 5247db96d56Sopenharmony_ci static int SRE_COMPILE_INITIALIZED = 0; 5257db96d56Sopenharmony_ci if (!SRE_COMPILE_INITIALIZED && !init_sre_compile()) { 5267db96d56Sopenharmony_ci PyErr_Print(); 5277db96d56Sopenharmony_ci abort(); 5287db96d56Sopenharmony_ci } else { 5297db96d56Sopenharmony_ci SRE_COMPILE_INITIALIZED = 1; 5307db96d56Sopenharmony_ci } 5317db96d56Sopenharmony_ci 5327db96d56Sopenharmony_ci rv |= _run_fuzz(data, size, fuzz_sre_compile); 5337db96d56Sopenharmony_ci#endif 5347db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match) 5357db96d56Sopenharmony_ci static int SRE_MATCH_INITIALIZED = 0; 5367db96d56Sopenharmony_ci if (!SRE_MATCH_INITIALIZED && !init_sre_match()) { 5377db96d56Sopenharmony_ci PyErr_Print(); 5387db96d56Sopenharmony_ci abort(); 5397db96d56Sopenharmony_ci } else { 5407db96d56Sopenharmony_ci SRE_MATCH_INITIALIZED = 1; 5417db96d56Sopenharmony_ci } 5427db96d56Sopenharmony_ci 5437db96d56Sopenharmony_ci rv |= _run_fuzz(data, size, fuzz_sre_match); 5447db96d56Sopenharmony_ci#endif 5457db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader) 5467db96d56Sopenharmony_ci static int CSV_READER_INITIALIZED = 0; 5477db96d56Sopenharmony_ci if (!CSV_READER_INITIALIZED && !init_csv_reader()) { 5487db96d56Sopenharmony_ci PyErr_Print(); 5497db96d56Sopenharmony_ci abort(); 5507db96d56Sopenharmony_ci } else { 5517db96d56Sopenharmony_ci CSV_READER_INITIALIZED = 1; 5527db96d56Sopenharmony_ci } 5537db96d56Sopenharmony_ci 5547db96d56Sopenharmony_ci rv |= _run_fuzz(data, size, fuzz_csv_reader); 5557db96d56Sopenharmony_ci#endif 5567db96d56Sopenharmony_ci#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_ast_literal_eval) 5577db96d56Sopenharmony_ci static int AST_LITERAL_EVAL_INITIALIZED = 0; 5587db96d56Sopenharmony_ci if (!AST_LITERAL_EVAL_INITIALIZED && !init_ast_literal_eval()) { 5597db96d56Sopenharmony_ci PyErr_Print(); 5607db96d56Sopenharmony_ci abort(); 5617db96d56Sopenharmony_ci } else { 5627db96d56Sopenharmony_ci AST_LITERAL_EVAL_INITIALIZED = 1; 5637db96d56Sopenharmony_ci } 5647db96d56Sopenharmony_ci 5657db96d56Sopenharmony_ci rv |= _run_fuzz(data, size, fuzz_ast_literal_eval); 5667db96d56Sopenharmony_ci#endif 5677db96d56Sopenharmony_ci return rv; 5687db96d56Sopenharmony_ci} 569