xref: /third_party/python/Modules/_json.c (revision 7db96d56)
1/* JSON accelerator C extensor: _json module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
6
7#ifndef Py_BUILD_CORE_BUILTIN
8#  define Py_BUILD_CORE_MODULE 1
9#endif
10#define NEEDS_PY_IDENTIFIER
11
12#include "Python.h"
13#include "pycore_ceval.h"         // _Py_EnterRecursiveCall()
14#include "structmember.h"         // PyMemberDef
15#include "pycore_accu.h"
16
17typedef struct {
18    PyObject *PyScannerType;
19    PyObject *PyEncoderType;
20} _jsonmodulestate;
21
22static inline _jsonmodulestate*
23get_json_state(PyObject *module)
24{
25    void *state = PyModule_GetState(module);
26    assert(state != NULL);
27    return (_jsonmodulestate *)state;
28}
29
30
31typedef struct _PyScannerObject {
32    PyObject_HEAD
33    signed char strict;
34    PyObject *object_hook;
35    PyObject *object_pairs_hook;
36    PyObject *parse_float;
37    PyObject *parse_int;
38    PyObject *parse_constant;
39    PyObject *memo;
40} PyScannerObject;
41
42static PyMemberDef scanner_members[] = {
43    {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
44    {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
45    {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
46    {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
47    {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
48    {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
49    {NULL}
50};
51
52typedef struct _PyEncoderObject {
53    PyObject_HEAD
54    PyObject *markers;
55    PyObject *defaultfn;
56    PyObject *encoder;
57    PyObject *indent;
58    PyObject *key_separator;
59    PyObject *item_separator;
60    char sort_keys;
61    char skipkeys;
62    int allow_nan;
63    PyCFunction fast_encode;
64} PyEncoderObject;
65
66static PyMemberDef encoder_members[] = {
67    {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
68    {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
69    {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
70    {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
71    {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
72    {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
73    {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
74    {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
75    {NULL}
76};
77
78/* Forward decls */
79
80static PyObject *
81ascii_escape_unicode(PyObject *pystr);
82static PyObject *
83py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr);
84static PyObject *
85scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
86static PyObject *
87_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
88static PyObject *
89scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
90static void
91scanner_dealloc(PyObject *self);
92static int
93scanner_clear(PyScannerObject *self);
94static PyObject *
95encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
96static void
97encoder_dealloc(PyObject *self);
98static int
99encoder_clear(PyEncoderObject *self);
100static int
101encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
102static int
103encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
104static int
105encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
106static PyObject *
107_encoded_const(PyObject *obj);
108static void
109raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
110static PyObject *
111encoder_encode_string(PyEncoderObject *s, PyObject *obj);
112static PyObject *
113encoder_encode_float(PyEncoderObject *s, PyObject *obj);
114
115#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
116#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
117
118static Py_ssize_t
119ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
120{
121    /* Escape unicode code point c to ASCII escape sequences
122    in char *output. output must have at least 12 bytes unused to
123    accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
124    output[chars++] = '\\';
125    switch (c) {
126        case '\\': output[chars++] = c; break;
127        case '"': output[chars++] = c; break;
128        case '\b': output[chars++] = 'b'; break;
129        case '\f': output[chars++] = 'f'; break;
130        case '\n': output[chars++] = 'n'; break;
131        case '\r': output[chars++] = 'r'; break;
132        case '\t': output[chars++] = 't'; break;
133        default:
134            if (c >= 0x10000) {
135                /* UTF-16 surrogate pair */
136                Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
137                output[chars++] = 'u';
138                output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
139                output[chars++] = Py_hexdigits[(v >>  8) & 0xf];
140                output[chars++] = Py_hexdigits[(v >>  4) & 0xf];
141                output[chars++] = Py_hexdigits[(v      ) & 0xf];
142                c = Py_UNICODE_LOW_SURROGATE(c);
143                output[chars++] = '\\';
144            }
145            output[chars++] = 'u';
146            output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
147            output[chars++] = Py_hexdigits[(c >>  8) & 0xf];
148            output[chars++] = Py_hexdigits[(c >>  4) & 0xf];
149            output[chars++] = Py_hexdigits[(c      ) & 0xf];
150    }
151    return chars;
152}
153
154static PyObject *
155ascii_escape_unicode(PyObject *pystr)
156{
157    /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
158    Py_ssize_t i;
159    Py_ssize_t input_chars;
160    Py_ssize_t output_size;
161    Py_ssize_t chars;
162    PyObject *rval;
163    const void *input;
164    Py_UCS1 *output;
165    int kind;
166
167    if (PyUnicode_READY(pystr) == -1)
168        return NULL;
169
170    input_chars = PyUnicode_GET_LENGTH(pystr);
171    input = PyUnicode_DATA(pystr);
172    kind = PyUnicode_KIND(pystr);
173
174    /* Compute the output size */
175    for (i = 0, output_size = 2; i < input_chars; i++) {
176        Py_UCS4 c = PyUnicode_READ(kind, input, i);
177        Py_ssize_t d;
178        if (S_CHAR(c)) {
179            d = 1;
180        }
181        else {
182            switch(c) {
183            case '\\': case '"': case '\b': case '\f':
184            case '\n': case '\r': case '\t':
185                d = 2; break;
186            default:
187                d = c >= 0x10000 ? 12 : 6;
188            }
189        }
190        if (output_size > PY_SSIZE_T_MAX - d) {
191            PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
192            return NULL;
193        }
194        output_size += d;
195    }
196
197    rval = PyUnicode_New(output_size, 127);
198    if (rval == NULL) {
199        return NULL;
200    }
201    output = PyUnicode_1BYTE_DATA(rval);
202    chars = 0;
203    output[chars++] = '"';
204    for (i = 0; i < input_chars; i++) {
205        Py_UCS4 c = PyUnicode_READ(kind, input, i);
206        if (S_CHAR(c)) {
207            output[chars++] = c;
208        }
209        else {
210            chars = ascii_escape_unichar(c, output, chars);
211        }
212    }
213    output[chars++] = '"';
214#ifdef Py_DEBUG
215    assert(_PyUnicode_CheckConsistency(rval, 1));
216#endif
217    return rval;
218}
219
220static PyObject *
221escape_unicode(PyObject *pystr)
222{
223    /* Take a PyUnicode pystr and return a new escaped PyUnicode */
224    Py_ssize_t i;
225    Py_ssize_t input_chars;
226    Py_ssize_t output_size;
227    Py_ssize_t chars;
228    PyObject *rval;
229    const void *input;
230    int kind;
231    Py_UCS4 maxchar;
232
233    if (PyUnicode_READY(pystr) == -1)
234        return NULL;
235
236    maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
237    input_chars = PyUnicode_GET_LENGTH(pystr);
238    input = PyUnicode_DATA(pystr);
239    kind = PyUnicode_KIND(pystr);
240
241    /* Compute the output size */
242    for (i = 0, output_size = 2; i < input_chars; i++) {
243        Py_UCS4 c = PyUnicode_READ(kind, input, i);
244        Py_ssize_t d;
245        switch (c) {
246        case '\\': case '"': case '\b': case '\f':
247        case '\n': case '\r': case '\t':
248            d = 2;
249            break;
250        default:
251            if (c <= 0x1f)
252                d = 6;
253            else
254                d = 1;
255        }
256        if (output_size > PY_SSIZE_T_MAX - d) {
257            PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
258            return NULL;
259        }
260        output_size += d;
261    }
262
263    rval = PyUnicode_New(output_size, maxchar);
264    if (rval == NULL)
265        return NULL;
266
267    kind = PyUnicode_KIND(rval);
268
269#define ENCODE_OUTPUT do { \
270        chars = 0; \
271        output[chars++] = '"'; \
272        for (i = 0; i < input_chars; i++) { \
273            Py_UCS4 c = PyUnicode_READ(kind, input, i); \
274            switch (c) { \
275            case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
276            case '"':  output[chars++] = '\\'; output[chars++] = c; break; \
277            case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
278            case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
279            case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
280            case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
281            case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
282            default: \
283                if (c <= 0x1f) { \
284                    output[chars++] = '\\'; \
285                    output[chars++] = 'u'; \
286                    output[chars++] = '0'; \
287                    output[chars++] = '0'; \
288                    output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
289                    output[chars++] = Py_hexdigits[(c     ) & 0xf]; \
290                } else { \
291                    output[chars++] = c; \
292                } \
293            } \
294        } \
295        output[chars++] = '"'; \
296    } while (0)
297
298    if (kind == PyUnicode_1BYTE_KIND) {
299        Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
300        ENCODE_OUTPUT;
301    } else if (kind == PyUnicode_2BYTE_KIND) {
302        Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
303        ENCODE_OUTPUT;
304    } else {
305        Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
306        assert(kind == PyUnicode_4BYTE_KIND);
307        ENCODE_OUTPUT;
308    }
309#undef ENCODE_OUTPUT
310
311#ifdef Py_DEBUG
312    assert(_PyUnicode_CheckConsistency(rval, 1));
313#endif
314    return rval;
315}
316
317static void
318raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
319{
320    /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
321    _Py_static_string(PyId_decoder, "json.decoder");
322    PyObject *decoder = _PyImport_GetModuleId(&PyId_decoder);
323    if (decoder == NULL) {
324        return;
325    }
326
327    _Py_IDENTIFIER(JSONDecodeError);
328    PyObject *JSONDecodeError = _PyObject_GetAttrId(decoder, &PyId_JSONDecodeError);
329    Py_DECREF(decoder);
330    if (JSONDecodeError == NULL) {
331        return;
332    }
333
334    PyObject *exc;
335    exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
336    Py_DECREF(JSONDecodeError);
337    if (exc) {
338        PyErr_SetObject(JSONDecodeError, exc);
339        Py_DECREF(exc);
340    }
341}
342
343static void
344raise_stop_iteration(Py_ssize_t idx)
345{
346    PyObject *value = PyLong_FromSsize_t(idx);
347    if (value != NULL) {
348        PyErr_SetObject(PyExc_StopIteration, value);
349        Py_DECREF(value);
350    }
351}
352
353static PyObject *
354_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
355    /* return (rval, idx) tuple, stealing reference to rval */
356    PyObject *tpl;
357    PyObject *pyidx;
358    /*
359    steal a reference to rval, returns (rval, idx)
360    */
361    if (rval == NULL) {
362        return NULL;
363    }
364    pyidx = PyLong_FromSsize_t(idx);
365    if (pyidx == NULL) {
366        Py_DECREF(rval);
367        return NULL;
368    }
369    tpl = PyTuple_New(2);
370    if (tpl == NULL) {
371        Py_DECREF(pyidx);
372        Py_DECREF(rval);
373        return NULL;
374    }
375    PyTuple_SET_ITEM(tpl, 0, rval);
376    PyTuple_SET_ITEM(tpl, 1, pyidx);
377    return tpl;
378}
379
380static PyObject *
381scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
382{
383    /* Read the JSON string from PyUnicode pystr.
384    end is the index of the first character after the quote.
385    if strict is zero then literal control characters are allowed
386    *next_end_ptr is a return-by-reference index of the character
387        after the end quote
388
389    Return value is a new PyUnicode
390    */
391    PyObject *rval = NULL;
392    Py_ssize_t len;
393    Py_ssize_t begin = end - 1;
394    Py_ssize_t next /* = begin */;
395    const void *buf;
396    int kind;
397
398    if (PyUnicode_READY(pystr) == -1)
399        return 0;
400
401    _PyUnicodeWriter writer;
402    _PyUnicodeWriter_Init(&writer);
403    writer.overallocate = 1;
404
405    len = PyUnicode_GET_LENGTH(pystr);
406    buf = PyUnicode_DATA(pystr);
407    kind = PyUnicode_KIND(pystr);
408
409    if (end < 0 || len < end) {
410        PyErr_SetString(PyExc_ValueError, "end is out of bounds");
411        goto bail;
412    }
413    while (1) {
414        /* Find the end of the string or the next escape */
415        Py_UCS4 c;
416        {
417            // Use tight scope variable to help register allocation.
418            Py_UCS4 d = 0;
419            for (next = end; next < len; next++) {
420                d = PyUnicode_READ(kind, buf, next);
421                if (d == '"' || d == '\\') {
422                    break;
423                }
424                if (d <= 0x1f && strict) {
425                    raise_errmsg("Invalid control character at", pystr, next);
426                    goto bail;
427                }
428            }
429            c = d;
430        }
431
432        if (c == '"') {
433            // Fast path for simple case.
434            if (writer.buffer == NULL) {
435                PyObject *ret = PyUnicode_Substring(pystr, end, next);
436                if (ret == NULL) {
437                    goto bail;
438                }
439                *next_end_ptr = next + 1;;
440                return ret;
441            }
442        }
443        else if (c != '\\') {
444            raise_errmsg("Unterminated string starting at", pystr, begin);
445            goto bail;
446        }
447
448        /* Pick up this chunk if it's not zero length */
449        if (next != end) {
450            if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
451                goto bail;
452            }
453        }
454        next++;
455        if (c == '"') {
456            end = next;
457            break;
458        }
459        if (next == len) {
460            raise_errmsg("Unterminated string starting at", pystr, begin);
461            goto bail;
462        }
463        c = PyUnicode_READ(kind, buf, next);
464        if (c != 'u') {
465            /* Non-unicode backslash escapes */
466            end = next + 1;
467            switch (c) {
468                case '"': break;
469                case '\\': break;
470                case '/': break;
471                case 'b': c = '\b'; break;
472                case 'f': c = '\f'; break;
473                case 'n': c = '\n'; break;
474                case 'r': c = '\r'; break;
475                case 't': c = '\t'; break;
476                default: c = 0;
477            }
478            if (c == 0) {
479                raise_errmsg("Invalid \\escape", pystr, end - 2);
480                goto bail;
481            }
482        }
483        else {
484            c = 0;
485            next++;
486            end = next + 4;
487            if (end >= len) {
488                raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
489                goto bail;
490            }
491            /* Decode 4 hex digits */
492            for (; next < end; next++) {
493                Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
494                c <<= 4;
495                switch (digit) {
496                    case '0': case '1': case '2': case '3': case '4':
497                    case '5': case '6': case '7': case '8': case '9':
498                        c |= (digit - '0'); break;
499                    case 'a': case 'b': case 'c': case 'd': case 'e':
500                    case 'f':
501                        c |= (digit - 'a' + 10); break;
502                    case 'A': case 'B': case 'C': case 'D': case 'E':
503                    case 'F':
504                        c |= (digit - 'A' + 10); break;
505                    default:
506                        raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
507                        goto bail;
508                }
509            }
510            /* Surrogate pair */
511            if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
512                PyUnicode_READ(kind, buf, next++) == '\\' &&
513                PyUnicode_READ(kind, buf, next++) == 'u') {
514                Py_UCS4 c2 = 0;
515                end += 6;
516                /* Decode 4 hex digits */
517                for (; next < end; next++) {
518                    Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
519                    c2 <<= 4;
520                    switch (digit) {
521                        case '0': case '1': case '2': case '3': case '4':
522                        case '5': case '6': case '7': case '8': case '9':
523                            c2 |= (digit - '0'); break;
524                        case 'a': case 'b': case 'c': case 'd': case 'e':
525                        case 'f':
526                            c2 |= (digit - 'a' + 10); break;
527                        case 'A': case 'B': case 'C': case 'D': case 'E':
528                        case 'F':
529                            c2 |= (digit - 'A' + 10); break;
530                        default:
531                            raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
532                            goto bail;
533                    }
534                }
535                if (Py_UNICODE_IS_LOW_SURROGATE(c2))
536                    c = Py_UNICODE_JOIN_SURROGATES(c, c2);
537                else
538                    end -= 6;
539            }
540        }
541        if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
542            goto bail;
543        }
544    }
545
546    rval = _PyUnicodeWriter_Finish(&writer);
547    *next_end_ptr = end;
548    return rval;
549
550bail:
551    *next_end_ptr = -1;
552    _PyUnicodeWriter_Dealloc(&writer);
553    return NULL;
554}
555
556PyDoc_STRVAR(pydoc_scanstring,
557    "scanstring(string, end, strict=True) -> (string, end)\n"
558    "\n"
559    "Scan the string s for a JSON string. End is the index of the\n"
560    "character in s after the quote that started the JSON string.\n"
561    "Unescapes all valid JSON string escape sequences and raises ValueError\n"
562    "on attempt to decode an invalid string. If strict is False then literal\n"
563    "control characters are allowed in the string.\n"
564    "\n"
565    "Returns a tuple of the decoded string and the index of the character in s\n"
566    "after the end quote."
567);
568
569static PyObject *
570py_scanstring(PyObject* Py_UNUSED(self), PyObject *args)
571{
572    PyObject *pystr;
573    PyObject *rval;
574    Py_ssize_t end;
575    Py_ssize_t next_end = -1;
576    int strict = 1;
577    if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
578        return NULL;
579    }
580    if (PyUnicode_Check(pystr)) {
581        rval = scanstring_unicode(pystr, end, strict, &next_end);
582    }
583    else {
584        PyErr_Format(PyExc_TypeError,
585                     "first argument must be a string, not %.80s",
586                     Py_TYPE(pystr)->tp_name);
587        return NULL;
588    }
589    return _build_rval_index_tuple(rval, next_end);
590}
591
592PyDoc_STRVAR(pydoc_encode_basestring_ascii,
593    "encode_basestring_ascii(string) -> string\n"
594    "\n"
595    "Return an ASCII-only JSON representation of a Python string"
596);
597
598static PyObject *
599py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr)
600{
601    PyObject *rval;
602    /* Return an ASCII-only JSON representation of a Python string */
603    /* METH_O */
604    if (PyUnicode_Check(pystr)) {
605        rval = ascii_escape_unicode(pystr);
606    }
607    else {
608        PyErr_Format(PyExc_TypeError,
609                     "first argument must be a string, not %.80s",
610                     Py_TYPE(pystr)->tp_name);
611        return NULL;
612    }
613    return rval;
614}
615
616
617PyDoc_STRVAR(pydoc_encode_basestring,
618    "encode_basestring(string) -> string\n"
619    "\n"
620    "Return a JSON representation of a Python string"
621);
622
623static PyObject *
624py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr)
625{
626    PyObject *rval;
627    /* Return a JSON representation of a Python string */
628    /* METH_O */
629    if (PyUnicode_Check(pystr)) {
630        rval = escape_unicode(pystr);
631    }
632    else {
633        PyErr_Format(PyExc_TypeError,
634                     "first argument must be a string, not %.80s",
635                     Py_TYPE(pystr)->tp_name);
636        return NULL;
637    }
638    return rval;
639}
640
641static void
642scanner_dealloc(PyObject *self)
643{
644    PyTypeObject *tp = Py_TYPE(self);
645    /* bpo-31095: UnTrack is needed before calling any callbacks */
646    PyObject_GC_UnTrack(self);
647    scanner_clear((PyScannerObject *)self);
648    tp->tp_free(self);
649    Py_DECREF(tp);
650}
651
652static int
653scanner_traverse(PyScannerObject *self, visitproc visit, void *arg)
654{
655    Py_VISIT(Py_TYPE(self));
656    Py_VISIT(self->object_hook);
657    Py_VISIT(self->object_pairs_hook);
658    Py_VISIT(self->parse_float);
659    Py_VISIT(self->parse_int);
660    Py_VISIT(self->parse_constant);
661    Py_VISIT(self->memo);
662    return 0;
663}
664
665static int
666scanner_clear(PyScannerObject *self)
667{
668    Py_CLEAR(self->object_hook);
669    Py_CLEAR(self->object_pairs_hook);
670    Py_CLEAR(self->parse_float);
671    Py_CLEAR(self->parse_int);
672    Py_CLEAR(self->parse_constant);
673    Py_CLEAR(self->memo);
674    return 0;
675}
676
677static PyObject *
678_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
679{
680    /* Read a JSON object from PyUnicode pystr.
681    idx is the index of the first character after the opening curly brace.
682    *next_idx_ptr is a return-by-reference index to the first character after
683        the closing curly brace.
684
685    Returns a new PyObject (usually a dict, but object_hook can change that)
686    */
687    const void *str;
688    int kind;
689    Py_ssize_t end_idx;
690    PyObject *val = NULL;
691    PyObject *rval = NULL;
692    PyObject *key = NULL;
693    int has_pairs_hook = (s->object_pairs_hook != Py_None);
694    Py_ssize_t next_idx;
695
696    if (PyUnicode_READY(pystr) == -1)
697        return NULL;
698
699    str = PyUnicode_DATA(pystr);
700    kind = PyUnicode_KIND(pystr);
701    end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
702
703    if (has_pairs_hook)
704        rval = PyList_New(0);
705    else
706        rval = PyDict_New();
707    if (rval == NULL)
708        return NULL;
709
710    /* skip whitespace after { */
711    while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
712
713    /* only loop if the object is non-empty */
714    if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
715        while (1) {
716            PyObject *memokey;
717
718            /* read key */
719            if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
720                raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
721                goto bail;
722            }
723            key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
724            if (key == NULL)
725                goto bail;
726            memokey = PyDict_SetDefault(s->memo, key, key);
727            if (memokey == NULL) {
728                goto bail;
729            }
730            Py_INCREF(memokey);
731            Py_DECREF(key);
732            key = memokey;
733            idx = next_idx;
734
735            /* skip whitespace between key and : delimiter, read :, skip whitespace */
736            while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
737            if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
738                raise_errmsg("Expecting ':' delimiter", pystr, idx);
739                goto bail;
740            }
741            idx++;
742            while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
743
744            /* read any JSON term */
745            val = scan_once_unicode(s, pystr, idx, &next_idx);
746            if (val == NULL)
747                goto bail;
748
749            if (has_pairs_hook) {
750                PyObject *item = PyTuple_Pack(2, key, val);
751                if (item == NULL)
752                    goto bail;
753                Py_CLEAR(key);
754                Py_CLEAR(val);
755                if (PyList_Append(rval, item) == -1) {
756                    Py_DECREF(item);
757                    goto bail;
758                }
759                Py_DECREF(item);
760            }
761            else {
762                if (PyDict_SetItem(rval, key, val) < 0)
763                    goto bail;
764                Py_CLEAR(key);
765                Py_CLEAR(val);
766            }
767            idx = next_idx;
768
769            /* skip whitespace before } or , */
770            while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
771
772            /* bail if the object is closed or we didn't get the , delimiter */
773            if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
774                break;
775            if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
776                raise_errmsg("Expecting ',' delimiter", pystr, idx);
777                goto bail;
778            }
779            idx++;
780
781            /* skip whitespace after , delimiter */
782            while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
783        }
784    }
785
786    *next_idx_ptr = idx + 1;
787
788    if (has_pairs_hook) {
789        val = PyObject_CallOneArg(s->object_pairs_hook, rval);
790        Py_DECREF(rval);
791        return val;
792    }
793
794    /* if object_hook is not None: rval = object_hook(rval) */
795    if (s->object_hook != Py_None) {
796        val = PyObject_CallOneArg(s->object_hook, rval);
797        Py_DECREF(rval);
798        return val;
799    }
800    return rval;
801bail:
802    Py_XDECREF(key);
803    Py_XDECREF(val);
804    Py_XDECREF(rval);
805    return NULL;
806}
807
808static PyObject *
809_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
810    /* Read a JSON array from PyUnicode pystr.
811    idx is the index of the first character after the opening brace.
812    *next_idx_ptr is a return-by-reference index to the first character after
813        the closing brace.
814
815    Returns a new PyList
816    */
817    const void *str;
818    int kind;
819    Py_ssize_t end_idx;
820    PyObject *val = NULL;
821    PyObject *rval;
822    Py_ssize_t next_idx;
823
824    if (PyUnicode_READY(pystr) == -1)
825        return NULL;
826
827    rval = PyList_New(0);
828    if (rval == NULL)
829        return NULL;
830
831    str = PyUnicode_DATA(pystr);
832    kind = PyUnicode_KIND(pystr);
833    end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
834
835    /* skip whitespace after [ */
836    while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
837
838    /* only loop if the array is non-empty */
839    if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
840        while (1) {
841
842            /* read any JSON term  */
843            val = scan_once_unicode(s, pystr, idx, &next_idx);
844            if (val == NULL)
845                goto bail;
846
847            if (PyList_Append(rval, val) == -1)
848                goto bail;
849
850            Py_CLEAR(val);
851            idx = next_idx;
852
853            /* skip whitespace between term and , */
854            while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
855
856            /* bail if the array is closed or we didn't get the , delimiter */
857            if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
858                break;
859            if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
860                raise_errmsg("Expecting ',' delimiter", pystr, idx);
861                goto bail;
862            }
863            idx++;
864
865            /* skip whitespace after , */
866            while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
867        }
868    }
869
870    /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
871    if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
872        raise_errmsg("Expecting value", pystr, end_idx);
873        goto bail;
874    }
875    *next_idx_ptr = idx + 1;
876    return rval;
877bail:
878    Py_XDECREF(val);
879    Py_DECREF(rval);
880    return NULL;
881}
882
883static PyObject *
884_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
885    /* Read a JSON constant.
886    constant is the constant string that was found
887        ("NaN", "Infinity", "-Infinity").
888    idx is the index of the first character of the constant
889    *next_idx_ptr is a return-by-reference index to the first character after
890        the constant.
891
892    Returns the result of parse_constant
893    */
894    PyObject *cstr;
895    PyObject *rval;
896    /* constant is "NaN", "Infinity", or "-Infinity" */
897    cstr = PyUnicode_InternFromString(constant);
898    if (cstr == NULL)
899        return NULL;
900
901    /* rval = parse_constant(constant) */
902    rval = PyObject_CallOneArg(s->parse_constant, cstr);
903    idx += PyUnicode_GET_LENGTH(cstr);
904    Py_DECREF(cstr);
905    *next_idx_ptr = idx;
906    return rval;
907}
908
909static PyObject *
910_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
911    /* Read a JSON number from PyUnicode pystr.
912    idx is the index of the first character of the number
913    *next_idx_ptr is a return-by-reference index to the first character after
914        the number.
915
916    Returns a new PyObject representation of that number:
917        PyLong, or PyFloat.
918        May return other types if parse_int or parse_float are set
919    */
920    const void *str;
921    int kind;
922    Py_ssize_t end_idx;
923    Py_ssize_t idx = start;
924    int is_float = 0;
925    PyObject *rval;
926    PyObject *numstr = NULL;
927    PyObject *custom_func;
928
929    if (PyUnicode_READY(pystr) == -1)
930        return NULL;
931
932    str = PyUnicode_DATA(pystr);
933    kind = PyUnicode_KIND(pystr);
934    end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
935
936    /* read a sign if it's there, make sure it's not the end of the string */
937    if (PyUnicode_READ(kind, str, idx) == '-') {
938        idx++;
939        if (idx > end_idx) {
940            raise_stop_iteration(start);
941            return NULL;
942        }
943    }
944
945    /* read as many integer digits as we find as long as it doesn't start with 0 */
946    if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
947        idx++;
948        while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
949    }
950    /* if it starts with 0 we only expect one integer digit */
951    else if (PyUnicode_READ(kind, str, idx) == '0') {
952        idx++;
953    }
954    /* no integer digits, error */
955    else {
956        raise_stop_iteration(start);
957        return NULL;
958    }
959
960    /* if the next char is '.' followed by a digit then read all float digits */
961    if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
962        is_float = 1;
963        idx += 2;
964        while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
965    }
966
967    /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
968    if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
969        Py_ssize_t e_start = idx;
970        idx++;
971
972        /* read an exponent sign if present */
973        if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
974
975        /* read all digits */
976        while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
977
978        /* if we got a digit, then parse as float. if not, backtrack */
979        if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
980            is_float = 1;
981        }
982        else {
983            idx = e_start;
984        }
985    }
986
987    if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
988        custom_func = s->parse_float;
989    else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
990        custom_func = s->parse_int;
991    else
992        custom_func = NULL;
993
994    if (custom_func) {
995        /* copy the section we determined to be a number */
996        numstr = PyUnicode_FromKindAndData(kind,
997                                           (char*)str + kind * start,
998                                           idx - start);
999        if (numstr == NULL)
1000            return NULL;
1001        rval = PyObject_CallOneArg(custom_func, numstr);
1002    }
1003    else {
1004        Py_ssize_t i, n;
1005        char *buf;
1006        /* Straight conversion to ASCII, to avoid costly conversion of
1007           decimal unicode digits (which cannot appear here) */
1008        n = idx - start;
1009        numstr = PyBytes_FromStringAndSize(NULL, n);
1010        if (numstr == NULL)
1011            return NULL;
1012        buf = PyBytes_AS_STRING(numstr);
1013        for (i = 0; i < n; i++) {
1014            buf[i] = (char) PyUnicode_READ(kind, str, i + start);
1015        }
1016        if (is_float)
1017            rval = PyFloat_FromString(numstr);
1018        else
1019            rval = PyLong_FromString(buf, NULL, 10);
1020    }
1021    Py_DECREF(numstr);
1022    *next_idx_ptr = idx;
1023    return rval;
1024}
1025
1026static PyObject *
1027scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1028{
1029    /* Read one JSON term (of any kind) from PyUnicode pystr.
1030    idx is the index of the first character of the term
1031    *next_idx_ptr is a return-by-reference index to the first character after
1032        the number.
1033
1034    Returns a new PyObject representation of the term.
1035    */
1036    PyObject *res;
1037    const void *str;
1038    int kind;
1039    Py_ssize_t length;
1040
1041    if (PyUnicode_READY(pystr) == -1)
1042        return NULL;
1043
1044    str = PyUnicode_DATA(pystr);
1045    kind = PyUnicode_KIND(pystr);
1046    length = PyUnicode_GET_LENGTH(pystr);
1047
1048    if (idx < 0) {
1049        PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1050        return NULL;
1051    }
1052    if (idx >= length) {
1053        raise_stop_iteration(idx);
1054        return NULL;
1055    }
1056
1057    switch (PyUnicode_READ(kind, str, idx)) {
1058        case '"':
1059            /* string */
1060            return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
1061        case '{':
1062            /* object */
1063            if (_Py_EnterRecursiveCall(" while decoding a JSON object "
1064                                       "from a unicode string"))
1065                return NULL;
1066            res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1067            _Py_LeaveRecursiveCall();
1068            return res;
1069        case '[':
1070            /* array */
1071            if (_Py_EnterRecursiveCall(" while decoding a JSON array "
1072                                       "from a unicode string"))
1073                return NULL;
1074            res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1075            _Py_LeaveRecursiveCall();
1076            return res;
1077        case 'n':
1078            /* null */
1079            if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
1080                *next_idx_ptr = idx + 4;
1081                Py_RETURN_NONE;
1082            }
1083            break;
1084        case 't':
1085            /* true */
1086            if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
1087                *next_idx_ptr = idx + 4;
1088                Py_RETURN_TRUE;
1089            }
1090            break;
1091        case 'f':
1092            /* false */
1093            if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1094                PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1095                PyUnicode_READ(kind, str, idx + 3) == 's' &&
1096                PyUnicode_READ(kind, str, idx + 4) == 'e') {
1097                *next_idx_ptr = idx + 5;
1098                Py_RETURN_FALSE;
1099            }
1100            break;
1101        case 'N':
1102            /* NaN */
1103            if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1104                PyUnicode_READ(kind, str, idx + 2) == 'N') {
1105                return _parse_constant(s, "NaN", idx, next_idx_ptr);
1106            }
1107            break;
1108        case 'I':
1109            /* Infinity */
1110            if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1111                PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1112                PyUnicode_READ(kind, str, idx + 3) == 'i' &&
1113                PyUnicode_READ(kind, str, idx + 4) == 'n' &&
1114                PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1115                PyUnicode_READ(kind, str, idx + 6) == 't' &&
1116                PyUnicode_READ(kind, str, idx + 7) == 'y') {
1117                return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1118            }
1119            break;
1120        case '-':
1121            /* -Infinity */
1122            if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
1123                PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1124                PyUnicode_READ(kind, str, idx + 3) == 'f' &&
1125                PyUnicode_READ(kind, str, idx + 4) == 'i' &&
1126                PyUnicode_READ(kind, str, idx + 5) == 'n' &&
1127                PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1128                PyUnicode_READ(kind, str, idx + 7) == 't' &&
1129                PyUnicode_READ(kind, str, idx + 8) == 'y') {
1130                return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1131            }
1132            break;
1133    }
1134    /* Didn't find a string, object, array, or named constant. Look for a number. */
1135    return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1136}
1137
1138static PyObject *
1139scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds)
1140{
1141    /* Python callable interface to scan_once_{str,unicode} */
1142    PyObject *pystr;
1143    PyObject *rval;
1144    Py_ssize_t idx;
1145    Py_ssize_t next_idx = -1;
1146    static char *kwlist[] = {"string", "idx", NULL};
1147    if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
1148        return NULL;
1149
1150    if (PyUnicode_Check(pystr)) {
1151        rval = scan_once_unicode(self, pystr, idx, &next_idx);
1152    }
1153    else {
1154        PyErr_Format(PyExc_TypeError,
1155                 "first argument must be a string, not %.80s",
1156                 Py_TYPE(pystr)->tp_name);
1157        return NULL;
1158    }
1159    PyDict_Clear(self->memo);
1160    if (rval == NULL)
1161        return NULL;
1162    return _build_rval_index_tuple(rval, next_idx);
1163}
1164
1165static PyObject *
1166scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1167{
1168    PyScannerObject *s;
1169    PyObject *ctx;
1170    PyObject *strict;
1171    static char *kwlist[] = {"context", NULL};
1172
1173    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1174        return NULL;
1175
1176    s = (PyScannerObject *)type->tp_alloc(type, 0);
1177    if (s == NULL) {
1178        return NULL;
1179    }
1180
1181    s->memo = PyDict_New();
1182    if (s->memo == NULL)
1183        goto bail;
1184
1185    /* All of these will fail "gracefully" so we don't need to verify them */
1186    strict = PyObject_GetAttrString(ctx, "strict");
1187    if (strict == NULL)
1188        goto bail;
1189    s->strict = PyObject_IsTrue(strict);
1190    Py_DECREF(strict);
1191    if (s->strict < 0)
1192        goto bail;
1193    s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1194    if (s->object_hook == NULL)
1195        goto bail;
1196    s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1197    if (s->object_pairs_hook == NULL)
1198        goto bail;
1199    s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1200    if (s->parse_float == NULL)
1201        goto bail;
1202    s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1203    if (s->parse_int == NULL)
1204        goto bail;
1205    s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1206    if (s->parse_constant == NULL)
1207        goto bail;
1208
1209    return (PyObject *)s;
1210
1211bail:
1212    Py_DECREF(s);
1213    return NULL;
1214}
1215
1216PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1217
1218static PyType_Slot PyScannerType_slots[] = {
1219    {Py_tp_doc, (void *)scanner_doc},
1220    {Py_tp_dealloc, scanner_dealloc},
1221    {Py_tp_call, scanner_call},
1222    {Py_tp_traverse, scanner_traverse},
1223    {Py_tp_clear, scanner_clear},
1224    {Py_tp_members, scanner_members},
1225    {Py_tp_new, scanner_new},
1226    {0, 0}
1227};
1228
1229static PyType_Spec PyScannerType_spec = {
1230    .name = "_json.Scanner",
1231    .basicsize = sizeof(PyScannerObject),
1232    .itemsize = 0,
1233    .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1234    .slots = PyScannerType_slots,
1235};
1236
1237static PyObject *
1238encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1239{
1240    static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1241
1242    PyEncoderObject *s;
1243    PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1244    PyObject *item_separator;
1245    int sort_keys, skipkeys, allow_nan;
1246
1247    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
1248        &markers, &defaultfn, &encoder, &indent,
1249        &key_separator, &item_separator,
1250        &sort_keys, &skipkeys, &allow_nan))
1251        return NULL;
1252
1253    if (markers != Py_None && !PyDict_Check(markers)) {
1254        PyErr_Format(PyExc_TypeError,
1255                     "make_encoder() argument 1 must be dict or None, "
1256                     "not %.200s", Py_TYPE(markers)->tp_name);
1257        return NULL;
1258    }
1259
1260    s = (PyEncoderObject *)type->tp_alloc(type, 0);
1261    if (s == NULL)
1262        return NULL;
1263
1264    s->markers = markers;
1265    s->defaultfn = defaultfn;
1266    s->encoder = encoder;
1267    s->indent = indent;
1268    s->key_separator = key_separator;
1269    s->item_separator = item_separator;
1270    s->sort_keys = sort_keys;
1271    s->skipkeys = skipkeys;
1272    s->allow_nan = allow_nan;
1273    s->fast_encode = NULL;
1274    if (PyCFunction_Check(s->encoder)) {
1275        PyCFunction f = PyCFunction_GetFunction(s->encoder);
1276        if (f == (PyCFunction)py_encode_basestring_ascii ||
1277                f == (PyCFunction)py_encode_basestring) {
1278            s->fast_encode = f;
1279        }
1280    }
1281
1282    Py_INCREF(s->markers);
1283    Py_INCREF(s->defaultfn);
1284    Py_INCREF(s->encoder);
1285    Py_INCREF(s->indent);
1286    Py_INCREF(s->key_separator);
1287    Py_INCREF(s->item_separator);
1288    return (PyObject *)s;
1289}
1290
1291static PyObject *
1292encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
1293{
1294    /* Python callable interface to encode_listencode_obj */
1295    static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1296    PyObject *obj;
1297    Py_ssize_t indent_level;
1298    _PyAccu acc;
1299    if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1300        &obj, &indent_level))
1301        return NULL;
1302    if (_PyAccu_Init(&acc))
1303        return NULL;
1304    if (encoder_listencode_obj(self, &acc, obj, indent_level)) {
1305        _PyAccu_Destroy(&acc);
1306        return NULL;
1307    }
1308    return _PyAccu_FinishAsList(&acc);
1309}
1310
1311static PyObject *
1312_encoded_const(PyObject *obj)
1313{
1314    /* Return the JSON string representation of None, True, False */
1315    if (obj == Py_None) {
1316        _Py_static_string(PyId_null, "null");
1317        PyObject *s_null = _PyUnicode_FromId(&PyId_null);
1318        if (s_null == NULL) {
1319            return NULL;
1320        }
1321        return Py_NewRef(s_null);
1322    }
1323    else if (obj == Py_True) {
1324        _Py_static_string(PyId_true, "true");
1325        PyObject *s_true = _PyUnicode_FromId(&PyId_true);
1326        if (s_true == NULL) {
1327            return NULL;
1328        }
1329        return Py_NewRef(s_true);
1330    }
1331    else if (obj == Py_False) {
1332        _Py_static_string(PyId_false, "false");
1333        PyObject *s_false = _PyUnicode_FromId(&PyId_false);
1334        if (s_false == NULL) {
1335            return NULL;
1336        }
1337        return Py_NewRef(s_false);
1338    }
1339    else {
1340        PyErr_SetString(PyExc_ValueError, "not a const");
1341        return NULL;
1342    }
1343}
1344
1345static PyObject *
1346encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1347{
1348    /* Return the JSON representation of a PyFloat. */
1349    double i = PyFloat_AS_DOUBLE(obj);
1350    if (!Py_IS_FINITE(i)) {
1351        if (!s->allow_nan) {
1352            PyErr_SetString(
1353                    PyExc_ValueError,
1354                    "Out of range float values are not JSON compliant"
1355                    );
1356            return NULL;
1357        }
1358        if (i > 0) {
1359            return PyUnicode_FromString("Infinity");
1360        }
1361        else if (i < 0) {
1362            return PyUnicode_FromString("-Infinity");
1363        }
1364        else {
1365            return PyUnicode_FromString("NaN");
1366        }
1367    }
1368    return PyFloat_Type.tp_repr(obj);
1369}
1370
1371static PyObject *
1372encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1373{
1374    /* Return the JSON representation of a string */
1375    PyObject *encoded;
1376
1377    if (s->fast_encode) {
1378        return s->fast_encode(NULL, obj);
1379    }
1380    encoded = PyObject_CallOneArg(s->encoder, obj);
1381    if (encoded != NULL && !PyUnicode_Check(encoded)) {
1382        PyErr_Format(PyExc_TypeError,
1383                     "encoder() must return a string, not %.80s",
1384                     Py_TYPE(encoded)->tp_name);
1385        Py_DECREF(encoded);
1386        return NULL;
1387    }
1388    return encoded;
1389}
1390
1391static int
1392_steal_accumulate(_PyAccu *acc, PyObject *stolen)
1393{
1394    /* Append stolen and then decrement its reference count */
1395    int rval = _PyAccu_Accumulate(acc, stolen);
1396    Py_DECREF(stolen);
1397    return rval;
1398}
1399
1400static int
1401encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
1402                       PyObject *obj, Py_ssize_t indent_level)
1403{
1404    /* Encode Python object obj to a JSON term */
1405    PyObject *newobj;
1406    int rv;
1407
1408    if (obj == Py_None || obj == Py_True || obj == Py_False) {
1409        PyObject *cstr = _encoded_const(obj);
1410        if (cstr == NULL)
1411            return -1;
1412        return _steal_accumulate(acc, cstr);
1413    }
1414    else if (PyUnicode_Check(obj))
1415    {
1416        PyObject *encoded = encoder_encode_string(s, obj);
1417        if (encoded == NULL)
1418            return -1;
1419        return _steal_accumulate(acc, encoded);
1420    }
1421    else if (PyLong_Check(obj)) {
1422        PyObject *encoded = PyLong_Type.tp_repr(obj);
1423        if (encoded == NULL)
1424            return -1;
1425        return _steal_accumulate(acc, encoded);
1426    }
1427    else if (PyFloat_Check(obj)) {
1428        PyObject *encoded = encoder_encode_float(s, obj);
1429        if (encoded == NULL)
1430            return -1;
1431        return _steal_accumulate(acc, encoded);
1432    }
1433    else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1434        if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
1435            return -1;
1436        rv = encoder_listencode_list(s, acc, obj, indent_level);
1437        _Py_LeaveRecursiveCall();
1438        return rv;
1439    }
1440    else if (PyDict_Check(obj)) {
1441        if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
1442            return -1;
1443        rv = encoder_listencode_dict(s, acc, obj, indent_level);
1444        _Py_LeaveRecursiveCall();
1445        return rv;
1446    }
1447    else {
1448        PyObject *ident = NULL;
1449        if (s->markers != Py_None) {
1450            int has_key;
1451            ident = PyLong_FromVoidPtr(obj);
1452            if (ident == NULL)
1453                return -1;
1454            has_key = PyDict_Contains(s->markers, ident);
1455            if (has_key) {
1456                if (has_key != -1)
1457                    PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1458                Py_DECREF(ident);
1459                return -1;
1460            }
1461            if (PyDict_SetItem(s->markers, ident, obj)) {
1462                Py_DECREF(ident);
1463                return -1;
1464            }
1465        }
1466        newobj = PyObject_CallOneArg(s->defaultfn, obj);
1467        if (newobj == NULL) {
1468            Py_XDECREF(ident);
1469            return -1;
1470        }
1471
1472        if (_Py_EnterRecursiveCall(" while encoding a JSON object")) {
1473            Py_DECREF(newobj);
1474            Py_XDECREF(ident);
1475            return -1;
1476        }
1477        rv = encoder_listencode_obj(s, acc, newobj, indent_level);
1478        _Py_LeaveRecursiveCall();
1479
1480        Py_DECREF(newobj);
1481        if (rv) {
1482            Py_XDECREF(ident);
1483            return -1;
1484        }
1485        if (ident != NULL) {
1486            if (PyDict_DelItem(s->markers, ident)) {
1487                Py_XDECREF(ident);
1488                return -1;
1489            }
1490            Py_XDECREF(ident);
1491        }
1492        return rv;
1493    }
1494}
1495
1496static int
1497encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
1498                        PyObject *dct, Py_ssize_t indent_level)
1499{
1500    /* Encode Python dict dct a JSON term */
1501    _Py_static_string(PyId_open_dict, "{");
1502    _Py_static_string(PyId_close_dict, "}");
1503    _Py_static_string(PyId_empty_dict, "{}");
1504    PyObject *open_dict = _PyUnicode_FromId(&PyId_open_dict);    // borrowed ref
1505    PyObject *close_dict = _PyUnicode_FromId(&PyId_close_dict);  // borrowed ref
1506    PyObject *empty_dict = _PyUnicode_FromId(&PyId_empty_dict);  // borrowed ref
1507    PyObject *kstr = NULL;
1508    PyObject *ident = NULL;
1509    PyObject *it = NULL;
1510    PyObject *items;
1511    PyObject *item = NULL;
1512    Py_ssize_t idx;
1513
1514    if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1515        return -1;
1516    }
1517    if (PyDict_GET_SIZE(dct) == 0)  /* Fast path */
1518        return _PyAccu_Accumulate(acc, empty_dict);
1519
1520    if (s->markers != Py_None) {
1521        int has_key;
1522        ident = PyLong_FromVoidPtr(dct);
1523        if (ident == NULL)
1524            goto bail;
1525        has_key = PyDict_Contains(s->markers, ident);
1526        if (has_key) {
1527            if (has_key != -1)
1528                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1529            goto bail;
1530        }
1531        if (PyDict_SetItem(s->markers, ident, dct)) {
1532            goto bail;
1533        }
1534    }
1535
1536    if (_PyAccu_Accumulate(acc, open_dict))
1537        goto bail;
1538
1539    if (s->indent != Py_None) {
1540        /* TODO: DOES NOT RUN */
1541        indent_level += 1;
1542        /*
1543            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1544            separator = _item_separator + newline_indent
1545            buf += newline_indent
1546        */
1547    }
1548
1549    items = PyMapping_Items(dct);
1550    if (items == NULL)
1551        goto bail;
1552    if (s->sort_keys && PyList_Sort(items) < 0) {
1553        Py_DECREF(items);
1554        goto bail;
1555    }
1556    it = PyObject_GetIter(items);
1557    Py_DECREF(items);
1558    if (it == NULL)
1559        goto bail;
1560    idx = 0;
1561    while ((item = PyIter_Next(it)) != NULL) {
1562        PyObject *encoded, *key, *value;
1563        if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
1564            PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1565            goto bail;
1566        }
1567        key = PyTuple_GET_ITEM(item, 0);
1568        if (PyUnicode_Check(key)) {
1569            Py_INCREF(key);
1570            kstr = key;
1571        }
1572        else if (PyFloat_Check(key)) {
1573            kstr = encoder_encode_float(s, key);
1574            if (kstr == NULL)
1575                goto bail;
1576        }
1577        else if (key == Py_True || key == Py_False || key == Py_None) {
1578                        /* This must come before the PyLong_Check because
1579                           True and False are also 1 and 0.*/
1580            kstr = _encoded_const(key);
1581            if (kstr == NULL)
1582                goto bail;
1583        }
1584        else if (PyLong_Check(key)) {
1585            kstr = PyLong_Type.tp_repr(key);
1586            if (kstr == NULL) {
1587                goto bail;
1588            }
1589        }
1590        else if (s->skipkeys) {
1591            Py_DECREF(item);
1592            continue;
1593        }
1594        else {
1595            PyErr_Format(PyExc_TypeError,
1596                         "keys must be str, int, float, bool or None, "
1597                         "not %.100s", Py_TYPE(key)->tp_name);
1598            goto bail;
1599        }
1600
1601        if (idx) {
1602            if (_PyAccu_Accumulate(acc, s->item_separator))
1603                goto bail;
1604        }
1605
1606        encoded = encoder_encode_string(s, kstr);
1607        Py_CLEAR(kstr);
1608        if (encoded == NULL)
1609            goto bail;
1610        if (_PyAccu_Accumulate(acc, encoded)) {
1611            Py_DECREF(encoded);
1612            goto bail;
1613        }
1614        Py_DECREF(encoded);
1615        if (_PyAccu_Accumulate(acc, s->key_separator))
1616            goto bail;
1617
1618        value = PyTuple_GET_ITEM(item, 1);
1619        if (encoder_listencode_obj(s, acc, value, indent_level))
1620            goto bail;
1621        idx += 1;
1622        Py_DECREF(item);
1623    }
1624    if (PyErr_Occurred())
1625        goto bail;
1626    Py_CLEAR(it);
1627
1628    if (ident != NULL) {
1629        if (PyDict_DelItem(s->markers, ident))
1630            goto bail;
1631        Py_CLEAR(ident);
1632    }
1633    /* TODO DOES NOT RUN; dead code
1634    if (s->indent != Py_None) {
1635        indent_level -= 1;
1636
1637        yield '\n' + (' ' * (_indent * _current_indent_level))
1638    }*/
1639    if (_PyAccu_Accumulate(acc, close_dict))
1640        goto bail;
1641    return 0;
1642
1643bail:
1644    Py_XDECREF(it);
1645    Py_XDECREF(item);
1646    Py_XDECREF(kstr);
1647    Py_XDECREF(ident);
1648    return -1;
1649}
1650
1651
1652static int
1653encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
1654                        PyObject *seq, Py_ssize_t indent_level)
1655{
1656    /* Encode Python list seq to a JSON term */
1657    _Py_static_string(PyId_open_array, "[");
1658    _Py_static_string(PyId_close_array, "]");
1659    _Py_static_string(PyId_empty_array, "[]");
1660    PyObject *open_array = _PyUnicode_FromId(&PyId_open_array);   // borrowed ref
1661    PyObject *close_array = _PyUnicode_FromId(&PyId_close_array); // borrowed ref
1662    PyObject *empty_array = _PyUnicode_FromId(&PyId_empty_array); // borrowed ref
1663    PyObject *ident = NULL;
1664    PyObject *s_fast = NULL;
1665    Py_ssize_t i;
1666
1667    if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1668        return -1;
1669    }
1670    ident = NULL;
1671    s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1672    if (s_fast == NULL)
1673        return -1;
1674    if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
1675        Py_DECREF(s_fast);
1676        return _PyAccu_Accumulate(acc, empty_array);
1677    }
1678
1679    if (s->markers != Py_None) {
1680        int has_key;
1681        ident = PyLong_FromVoidPtr(seq);
1682        if (ident == NULL)
1683            goto bail;
1684        has_key = PyDict_Contains(s->markers, ident);
1685        if (has_key) {
1686            if (has_key != -1)
1687                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1688            goto bail;
1689        }
1690        if (PyDict_SetItem(s->markers, ident, seq)) {
1691            goto bail;
1692        }
1693    }
1694
1695    if (_PyAccu_Accumulate(acc, open_array))
1696        goto bail;
1697    if (s->indent != Py_None) {
1698        /* TODO: DOES NOT RUN */
1699        indent_level += 1;
1700        /*
1701            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1702            separator = _item_separator + newline_indent
1703            buf += newline_indent
1704        */
1705    }
1706    for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1707        PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
1708        if (i) {
1709            if (_PyAccu_Accumulate(acc, s->item_separator))
1710                goto bail;
1711        }
1712        if (encoder_listencode_obj(s, acc, obj, indent_level))
1713            goto bail;
1714    }
1715    if (ident != NULL) {
1716        if (PyDict_DelItem(s->markers, ident))
1717            goto bail;
1718        Py_CLEAR(ident);
1719    }
1720
1721    /* TODO: DOES NOT RUN
1722    if (s->indent != Py_None) {
1723        indent_level -= 1;
1724
1725        yield '\n' + (' ' * (_indent * _current_indent_level))
1726    }*/
1727    if (_PyAccu_Accumulate(acc, close_array))
1728        goto bail;
1729    Py_DECREF(s_fast);
1730    return 0;
1731
1732bail:
1733    Py_XDECREF(ident);
1734    Py_DECREF(s_fast);
1735    return -1;
1736}
1737
1738static void
1739encoder_dealloc(PyObject *self)
1740{
1741    PyTypeObject *tp = Py_TYPE(self);
1742    /* bpo-31095: UnTrack is needed before calling any callbacks */
1743    PyObject_GC_UnTrack(self);
1744    encoder_clear((PyEncoderObject *)self);
1745    tp->tp_free(self);
1746    Py_DECREF(tp);
1747}
1748
1749static int
1750encoder_traverse(PyEncoderObject *self, visitproc visit, void *arg)
1751{
1752    Py_VISIT(Py_TYPE(self));
1753    Py_VISIT(self->markers);
1754    Py_VISIT(self->defaultfn);
1755    Py_VISIT(self->encoder);
1756    Py_VISIT(self->indent);
1757    Py_VISIT(self->key_separator);
1758    Py_VISIT(self->item_separator);
1759    return 0;
1760}
1761
1762static int
1763encoder_clear(PyEncoderObject *self)
1764{
1765    /* Deallocate Encoder */
1766    Py_CLEAR(self->markers);
1767    Py_CLEAR(self->defaultfn);
1768    Py_CLEAR(self->encoder);
1769    Py_CLEAR(self->indent);
1770    Py_CLEAR(self->key_separator);
1771    Py_CLEAR(self->item_separator);
1772    return 0;
1773}
1774
1775PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1776
1777static PyType_Slot PyEncoderType_slots[] = {
1778    {Py_tp_doc, (void *)encoder_doc},
1779    {Py_tp_dealloc, encoder_dealloc},
1780    {Py_tp_call, encoder_call},
1781    {Py_tp_traverse, encoder_traverse},
1782    {Py_tp_clear, encoder_clear},
1783    {Py_tp_members, encoder_members},
1784    {Py_tp_new, encoder_new},
1785    {0, 0}
1786};
1787
1788static PyType_Spec PyEncoderType_spec = {
1789    .name = "_json.Encoder",
1790    .basicsize = sizeof(PyEncoderObject),
1791    .itemsize = 0,
1792    .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1793    .slots = PyEncoderType_slots
1794};
1795
1796static PyMethodDef speedups_methods[] = {
1797    {"encode_basestring_ascii",
1798        (PyCFunction)py_encode_basestring_ascii,
1799        METH_O,
1800        pydoc_encode_basestring_ascii},
1801    {"encode_basestring",
1802        (PyCFunction)py_encode_basestring,
1803        METH_O,
1804        pydoc_encode_basestring},
1805    {"scanstring",
1806        (PyCFunction)py_scanstring,
1807        METH_VARARGS,
1808        pydoc_scanstring},
1809    {NULL, NULL, 0, NULL}
1810};
1811
1812PyDoc_STRVAR(module_doc,
1813"json speedups\n");
1814
1815static int
1816_json_exec(PyObject *module)
1817{
1818    _jsonmodulestate *state = get_json_state(module);
1819
1820    state->PyScannerType = PyType_FromSpec(&PyScannerType_spec);
1821    if (state->PyScannerType == NULL) {
1822        return -1;
1823    }
1824    Py_INCREF(state->PyScannerType);
1825    if (PyModule_AddObject(module, "make_scanner", state->PyScannerType) < 0) {
1826        Py_DECREF(state->PyScannerType);
1827        return -1;
1828    }
1829
1830    state->PyEncoderType = PyType_FromSpec(&PyEncoderType_spec);
1831    if (state->PyEncoderType == NULL) {
1832        return -1;
1833    }
1834    Py_INCREF(state->PyEncoderType);
1835    if (PyModule_AddObject(module, "make_encoder", state->PyEncoderType) < 0) {
1836        Py_DECREF(state->PyEncoderType);
1837        return -1;
1838    }
1839
1840    return 0;
1841}
1842
1843static int
1844_jsonmodule_traverse(PyObject *module, visitproc visit, void *arg)
1845{
1846    _jsonmodulestate *state = get_json_state(module);
1847    Py_VISIT(state->PyScannerType);
1848    Py_VISIT(state->PyEncoderType);
1849    return 0;
1850}
1851
1852static int
1853_jsonmodule_clear(PyObject *module)
1854{
1855    _jsonmodulestate *state = get_json_state(module);
1856    Py_CLEAR(state->PyScannerType);
1857    Py_CLEAR(state->PyEncoderType);
1858    return 0;
1859}
1860
1861static void
1862_jsonmodule_free(void *module)
1863{
1864    _jsonmodule_clear((PyObject *)module);
1865}
1866
1867static PyModuleDef_Slot _json_slots[] = {
1868    {Py_mod_exec, _json_exec},
1869    {0, NULL}
1870};
1871
1872static struct PyModuleDef jsonmodule = {
1873        PyModuleDef_HEAD_INIT,
1874        "_json",
1875        module_doc,
1876        sizeof(_jsonmodulestate),
1877        speedups_methods,
1878        _json_slots,
1879        _jsonmodule_traverse,
1880        _jsonmodule_clear,
1881        _jsonmodule_free,
1882};
1883
1884PyMODINIT_FUNC
1885PyInit__json(void)
1886{
1887    return PyModuleDef_Init(&jsonmodule);
1888}
1889