xref: /third_party/python/Modules/_io/textio.c (revision 7db96d56)
1/*
2    An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4    Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6    Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "pycore_interp.h"        // PyInterpreterState.fs_codec
12#include "pycore_long.h"          // _PyLong_GetZero()
13#include "pycore_fileutils.h"     // _Py_GetLocaleEncoding()
14#include "pycore_object.h"
15#include "pycore_pystate.h"       // _PyInterpreterState_GET()
16#include "structmember.h"         // PyMemberDef
17#include "_iomodule.h"
18
19/*[clinic input]
20module _io
21class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
22class _io.TextIOWrapper "textio *" "&TextIOWrapper_Type"
23[clinic start generated code]*/
24/*[clinic end generated code: output=da39a3ee5e6b4b0d input=ed072384f8aada2c]*/
25
26/* TextIOBase */
27
28PyDoc_STRVAR(textiobase_doc,
29    "Base class for text I/O.\n"
30    "\n"
31    "This class provides a character and line based interface to stream\n"
32    "I/O. There is no readinto method because Python's character strings\n"
33    "are immutable.\n"
34    );
35
36static PyObject *
37_unsupported(const char *message)
38{
39    _PyIO_State *state = IO_STATE();
40    if (state != NULL)
41        PyErr_SetString(state->unsupported_operation, message);
42    return NULL;
43}
44
45PyDoc_STRVAR(textiobase_detach_doc,
46    "Separate the underlying buffer from the TextIOBase and return it.\n"
47    "\n"
48    "After the underlying buffer has been detached, the TextIO is in an\n"
49    "unusable state.\n"
50    );
51
52static PyObject *
53textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
54{
55    return _unsupported("detach");
56}
57
58PyDoc_STRVAR(textiobase_read_doc,
59    "Read at most n characters from stream.\n"
60    "\n"
61    "Read from underlying buffer until we have n characters or we hit EOF.\n"
62    "If n is negative or omitted, read until EOF.\n"
63    );
64
65static PyObject *
66textiobase_read(PyObject *self, PyObject *args)
67{
68    return _unsupported("read");
69}
70
71PyDoc_STRVAR(textiobase_readline_doc,
72    "Read until newline or EOF.\n"
73    "\n"
74    "Returns an empty string if EOF is hit immediately.\n"
75    );
76
77static PyObject *
78textiobase_readline(PyObject *self, PyObject *args)
79{
80    return _unsupported("readline");
81}
82
83PyDoc_STRVAR(textiobase_write_doc,
84    "Write string to stream.\n"
85    "Returns the number of characters written (which is always equal to\n"
86    "the length of the string).\n"
87    );
88
89static PyObject *
90textiobase_write(PyObject *self, PyObject *args)
91{
92    return _unsupported("write");
93}
94
95PyDoc_STRVAR(textiobase_encoding_doc,
96    "Encoding of the text stream.\n"
97    "\n"
98    "Subclasses should override.\n"
99    );
100
101static PyObject *
102textiobase_encoding_get(PyObject *self, void *context)
103{
104    Py_RETURN_NONE;
105}
106
107PyDoc_STRVAR(textiobase_newlines_doc,
108    "Line endings translated so far.\n"
109    "\n"
110    "Only line endings translated during reading are considered.\n"
111    "\n"
112    "Subclasses should override.\n"
113    );
114
115static PyObject *
116textiobase_newlines_get(PyObject *self, void *context)
117{
118    Py_RETURN_NONE;
119}
120
121PyDoc_STRVAR(textiobase_errors_doc,
122    "The error setting of the decoder or encoder.\n"
123    "\n"
124    "Subclasses should override.\n"
125    );
126
127static PyObject *
128textiobase_errors_get(PyObject *self, void *context)
129{
130    Py_RETURN_NONE;
131}
132
133
134static PyMethodDef textiobase_methods[] = {
135    {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
136    {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
137    {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
138    {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
139    {NULL, NULL}
140};
141
142static PyGetSetDef textiobase_getset[] = {
143    {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
144    {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
145    {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
146    {NULL}
147};
148
149PyTypeObject PyTextIOBase_Type = {
150    PyVarObject_HEAD_INIT(NULL, 0)
151    "_io._TextIOBase",          /*tp_name*/
152    0,                          /*tp_basicsize*/
153    0,                          /*tp_itemsize*/
154    0,                          /*tp_dealloc*/
155    0,                          /*tp_vectorcall_offset*/
156    0,                          /*tp_getattr*/
157    0,                          /*tp_setattr*/
158    0,                          /*tp_as_async*/
159    0,                          /*tp_repr*/
160    0,                          /*tp_as_number*/
161    0,                          /*tp_as_sequence*/
162    0,                          /*tp_as_mapping*/
163    0,                          /*tp_hash */
164    0,                          /*tp_call*/
165    0,                          /*tp_str*/
166    0,                          /*tp_getattro*/
167    0,                          /*tp_setattro*/
168    0,                          /*tp_as_buffer*/
169    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
170    textiobase_doc,             /* tp_doc */
171    0,                          /* tp_traverse */
172    0,                          /* tp_clear */
173    0,                          /* tp_richcompare */
174    0,                          /* tp_weaklistoffset */
175    0,                          /* tp_iter */
176    0,                          /* tp_iternext */
177    textiobase_methods,         /* tp_methods */
178    0,                          /* tp_members */
179    textiobase_getset,          /* tp_getset */
180    &PyIOBase_Type,             /* tp_base */
181    0,                          /* tp_dict */
182    0,                          /* tp_descr_get */
183    0,                          /* tp_descr_set */
184    0,                          /* tp_dictoffset */
185    0,                          /* tp_init */
186    0,                          /* tp_alloc */
187    0,                          /* tp_new */
188    0,                          /* tp_free */
189    0,                          /* tp_is_gc */
190    0,                          /* tp_bases */
191    0,                          /* tp_mro */
192    0,                          /* tp_cache */
193    0,                          /* tp_subclasses */
194    0,                          /* tp_weaklist */
195    0,                          /* tp_del */
196    0,                          /* tp_version_tag */
197    0,                          /* tp_finalize */
198};
199
200
201/* IncrementalNewlineDecoder */
202
203typedef struct {
204    PyObject_HEAD
205    PyObject *decoder;
206    PyObject *errors;
207    unsigned int pendingcr: 1;
208    unsigned int translate: 1;
209    unsigned int seennl: 3;
210} nldecoder_object;
211
212/*[clinic input]
213_io.IncrementalNewlineDecoder.__init__
214    decoder: object
215    translate: int
216    errors: object(c_default="NULL") = "strict"
217
218Codec used when reading a file in universal newlines mode.
219
220It wraps another incremental decoder, translating \r\n and \r into \n.
221It also records the types of newlines encountered.  When used with
222translate=False, it ensures that the newline sequence is returned in
223one piece. When used with decoder=None, it expects unicode strings as
224decode input and translates newlines without first invoking an external
225decoder.
226[clinic start generated code]*/
227
228static int
229_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
230                                            PyObject *decoder, int translate,
231                                            PyObject *errors)
232/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
233{
234
235    if (errors == NULL) {
236        errors = Py_NewRef(&_Py_ID(strict));
237    }
238    else {
239        errors = Py_NewRef(errors);
240    }
241
242    Py_XSETREF(self->errors, errors);
243    Py_XSETREF(self->decoder, Py_NewRef(decoder));
244    self->translate = translate ? 1 : 0;
245    self->seennl = 0;
246    self->pendingcr = 0;
247
248    return 0;
249}
250
251static void
252incrementalnewlinedecoder_dealloc(nldecoder_object *self)
253{
254    Py_CLEAR(self->decoder);
255    Py_CLEAR(self->errors);
256    Py_TYPE(self)->tp_free((PyObject *)self);
257}
258
259static int
260check_decoded(PyObject *decoded)
261{
262    if (decoded == NULL)
263        return -1;
264    if (!PyUnicode_Check(decoded)) {
265        PyErr_Format(PyExc_TypeError,
266                     "decoder should return a string result, not '%.200s'",
267                     Py_TYPE(decoded)->tp_name);
268        Py_DECREF(decoded);
269        return -1;
270    }
271    if (PyUnicode_READY(decoded) < 0) {
272        Py_DECREF(decoded);
273        return -1;
274    }
275    return 0;
276}
277
278#define CHECK_INITIALIZED_DECODER(self) \
279    if (self->errors == NULL) { \
280        PyErr_SetString(PyExc_ValueError, \
281                        "IncrementalNewlineDecoder.__init__() not called"); \
282        return NULL; \
283    }
284
285#define SEEN_CR   1
286#define SEEN_LF   2
287#define SEEN_CRLF 4
288#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
289
290PyObject *
291_PyIncrementalNewlineDecoder_decode(PyObject *myself,
292                                    PyObject *input, int final)
293{
294    PyObject *output;
295    Py_ssize_t output_len;
296    nldecoder_object *self = (nldecoder_object *) myself;
297
298    CHECK_INITIALIZED_DECODER(self);
299
300    /* decode input (with the eventual \r from a previous pass) */
301    if (self->decoder != Py_None) {
302        output = PyObject_CallMethodObjArgs(self->decoder,
303            &_Py_ID(decode), input, final ? Py_True : Py_False, NULL);
304    }
305    else {
306        output = input;
307        Py_INCREF(output);
308    }
309
310    if (check_decoded(output) < 0)
311        return NULL;
312
313    output_len = PyUnicode_GET_LENGTH(output);
314    if (self->pendingcr && (final || output_len > 0)) {
315        /* Prefix output with CR */
316        int kind;
317        PyObject *modified;
318        char *out;
319
320        modified = PyUnicode_New(output_len + 1,
321                                 PyUnicode_MAX_CHAR_VALUE(output));
322        if (modified == NULL)
323            goto error;
324        kind = PyUnicode_KIND(modified);
325        out = PyUnicode_DATA(modified);
326        PyUnicode_WRITE(kind, out, 0, '\r');
327        memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
328        Py_DECREF(output);
329        output = modified; /* output remains ready */
330        self->pendingcr = 0;
331        output_len++;
332    }
333
334    /* retain last \r even when not translating data:
335     * then readline() is sure to get \r\n in one pass
336     */
337    if (!final) {
338        if (output_len > 0
339            && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
340        {
341            PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
342            if (modified == NULL)
343                goto error;
344            Py_DECREF(output);
345            output = modified;
346            self->pendingcr = 1;
347        }
348    }
349
350    /* Record which newlines are read and do newline translation if desired,
351       all in one pass. */
352    {
353        const void *in_str;
354        Py_ssize_t len;
355        int seennl = self->seennl;
356        int only_lf = 0;
357        int kind;
358
359        in_str = PyUnicode_DATA(output);
360        len = PyUnicode_GET_LENGTH(output);
361        kind = PyUnicode_KIND(output);
362
363        if (len == 0)
364            return output;
365
366        /* If, up to now, newlines are consistently \n, do a quick check
367           for the \r *byte* with the libc's optimized memchr.
368           */
369        if (seennl == SEEN_LF || seennl == 0) {
370            only_lf = (memchr(in_str, '\r', kind * len) == NULL);
371        }
372
373        if (only_lf) {
374            /* If not already seen, quick scan for a possible "\n" character.
375               (there's nothing else to be done, even when in translation mode)
376            */
377            if (seennl == 0 &&
378                memchr(in_str, '\n', kind * len) != NULL) {
379                if (kind == PyUnicode_1BYTE_KIND)
380                    seennl |= SEEN_LF;
381                else {
382                    Py_ssize_t i = 0;
383                    for (;;) {
384                        Py_UCS4 c;
385                        /* Fast loop for non-control characters */
386                        while (PyUnicode_READ(kind, in_str, i) > '\n')
387                            i++;
388                        c = PyUnicode_READ(kind, in_str, i++);
389                        if (c == '\n') {
390                            seennl |= SEEN_LF;
391                            break;
392                        }
393                        if (i >= len)
394                            break;
395                    }
396                }
397            }
398            /* Finished: we have scanned for newlines, and none of them
399               need translating */
400        }
401        else if (!self->translate) {
402            Py_ssize_t i = 0;
403            /* We have already seen all newline types, no need to scan again */
404            if (seennl == SEEN_ALL)
405                goto endscan;
406            for (;;) {
407                Py_UCS4 c;
408                /* Fast loop for non-control characters */
409                while (PyUnicode_READ(kind, in_str, i) > '\r')
410                    i++;
411                c = PyUnicode_READ(kind, in_str, i++);
412                if (c == '\n')
413                    seennl |= SEEN_LF;
414                else if (c == '\r') {
415                    if (PyUnicode_READ(kind, in_str, i) == '\n') {
416                        seennl |= SEEN_CRLF;
417                        i++;
418                    }
419                    else
420                        seennl |= SEEN_CR;
421                }
422                if (i >= len)
423                    break;
424                if (seennl == SEEN_ALL)
425                    break;
426            }
427        endscan:
428            ;
429        }
430        else {
431            void *translated;
432            int kind = PyUnicode_KIND(output);
433            const void *in_str = PyUnicode_DATA(output);
434            Py_ssize_t in, out;
435            /* XXX: Previous in-place translation here is disabled as
436               resizing is not possible anymore */
437            /* We could try to optimize this so that we only do a copy
438               when there is something to translate. On the other hand,
439               we already know there is a \r byte, so chances are high
440               that something needs to be done. */
441            translated = PyMem_Malloc(kind * len);
442            if (translated == NULL) {
443                PyErr_NoMemory();
444                goto error;
445            }
446            in = out = 0;
447            for (;;) {
448                Py_UCS4 c;
449                /* Fast loop for non-control characters */
450                while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
451                    PyUnicode_WRITE(kind, translated, out++, c);
452                if (c == '\n') {
453                    PyUnicode_WRITE(kind, translated, out++, c);
454                    seennl |= SEEN_LF;
455                    continue;
456                }
457                if (c == '\r') {
458                    if (PyUnicode_READ(kind, in_str, in) == '\n') {
459                        in++;
460                        seennl |= SEEN_CRLF;
461                    }
462                    else
463                        seennl |= SEEN_CR;
464                    PyUnicode_WRITE(kind, translated, out++, '\n');
465                    continue;
466                }
467                if (in > len)
468                    break;
469                PyUnicode_WRITE(kind, translated, out++, c);
470            }
471            Py_DECREF(output);
472            output = PyUnicode_FromKindAndData(kind, translated, out);
473            PyMem_Free(translated);
474            if (!output)
475                return NULL;
476        }
477        self->seennl |= seennl;
478    }
479
480    return output;
481
482  error:
483    Py_DECREF(output);
484    return NULL;
485}
486
487/*[clinic input]
488_io.IncrementalNewlineDecoder.decode
489    input: object
490    final: bool(accept={int}) = False
491[clinic start generated code]*/
492
493static PyObject *
494_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
495                                          PyObject *input, int final)
496/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
497{
498    return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
499}
500
501/*[clinic input]
502_io.IncrementalNewlineDecoder.getstate
503[clinic start generated code]*/
504
505static PyObject *
506_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
507/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
508{
509    PyObject *buffer;
510    unsigned long long flag;
511
512    CHECK_INITIALIZED_DECODER(self);
513
514    if (self->decoder != Py_None) {
515        PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
516           &_Py_ID(getstate));
517        if (state == NULL)
518            return NULL;
519        if (!PyTuple_Check(state)) {
520            PyErr_SetString(PyExc_TypeError,
521                            "illegal decoder state");
522            Py_DECREF(state);
523            return NULL;
524        }
525        if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
526                              &buffer, &flag))
527        {
528            Py_DECREF(state);
529            return NULL;
530        }
531        Py_INCREF(buffer);
532        Py_DECREF(state);
533    }
534    else {
535        buffer = PyBytes_FromString("");
536        flag = 0;
537    }
538    flag <<= 1;
539    if (self->pendingcr)
540        flag |= 1;
541    return Py_BuildValue("NK", buffer, flag);
542}
543
544/*[clinic input]
545_io.IncrementalNewlineDecoder.setstate
546    state: object
547    /
548[clinic start generated code]*/
549
550static PyObject *
551_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
552                                       PyObject *state)
553/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
554{
555    PyObject *buffer;
556    unsigned long long flag;
557
558    CHECK_INITIALIZED_DECODER(self);
559
560    if (!PyTuple_Check(state)) {
561        PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
562        return NULL;
563    }
564    if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
565                          &buffer, &flag))
566    {
567        return NULL;
568    }
569
570    self->pendingcr = (int) (flag & 1);
571    flag >>= 1;
572
573    if (self->decoder != Py_None) {
574        return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
575                                    "((OK))", buffer, flag);
576    }
577    else {
578        Py_RETURN_NONE;
579    }
580}
581
582/*[clinic input]
583_io.IncrementalNewlineDecoder.reset
584[clinic start generated code]*/
585
586static PyObject *
587_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
588/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
589{
590    CHECK_INITIALIZED_DECODER(self);
591
592    self->seennl = 0;
593    self->pendingcr = 0;
594    if (self->decoder != Py_None)
595        return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
596    else
597        Py_RETURN_NONE;
598}
599
600static PyObject *
601incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
602{
603    CHECK_INITIALIZED_DECODER(self);
604
605    switch (self->seennl) {
606    case SEEN_CR:
607        return PyUnicode_FromString("\r");
608    case SEEN_LF:
609        return PyUnicode_FromString("\n");
610    case SEEN_CRLF:
611        return PyUnicode_FromString("\r\n");
612    case SEEN_CR | SEEN_LF:
613        return Py_BuildValue("ss", "\r", "\n");
614    case SEEN_CR | SEEN_CRLF:
615        return Py_BuildValue("ss", "\r", "\r\n");
616    case SEEN_LF | SEEN_CRLF:
617        return Py_BuildValue("ss", "\n", "\r\n");
618    case SEEN_CR | SEEN_LF | SEEN_CRLF:
619        return Py_BuildValue("sss", "\r", "\n", "\r\n");
620    default:
621        Py_RETURN_NONE;
622   }
623
624}
625
626/* TextIOWrapper */
627
628typedef PyObject *
629        (*encodefunc_t)(PyObject *, PyObject *);
630
631typedef struct
632{
633    PyObject_HEAD
634    int ok; /* initialized? */
635    int detached;
636    Py_ssize_t chunk_size;
637    PyObject *buffer;
638    PyObject *encoding;
639    PyObject *encoder;
640    PyObject *decoder;
641    PyObject *readnl;
642    PyObject *errors;
643    const char *writenl; /* ASCII-encoded; NULL stands for \n */
644    char line_buffering;
645    char write_through;
646    char readuniversal;
647    char readtranslate;
648    char writetranslate;
649    char seekable;
650    char has_read1;
651    char telling;
652    char finalizing;
653    /* Specialized encoding func (see below) */
654    encodefunc_t encodefunc;
655    /* Whether or not it's the start of the stream */
656    char encoding_start_of_stream;
657
658    /* Reads and writes are internally buffered in order to speed things up.
659       However, any read will first flush the write buffer if itsn't empty.
660
661       Please also note that text to be written is first encoded before being
662       buffered. This is necessary so that encoding errors are immediately
663       reported to the caller, but it unfortunately means that the
664       IncrementalEncoder (whose encode() method is always written in Python)
665       becomes a bottleneck for small writes.
666    */
667    PyObject *decoded_chars;       /* buffer for text returned from decoder */
668    Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
669    PyObject *pending_bytes;       // data waiting to be written.
670                                   // ascii unicode, bytes, or list of them.
671    Py_ssize_t pending_bytes_count;
672
673    /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
674     * dec_flags is the second (integer) item of the decoder state and
675     * next_input is the chunk of input bytes that comes next after the
676     * snapshot point.  We use this to reconstruct decoder states in tell().
677     */
678    PyObject *snapshot;
679    /* Bytes-to-characters ratio for the current chunk. Serves as input for
680       the heuristic in tell(). */
681    double b2cratio;
682
683    /* Cache raw object if it's a FileIO object */
684    PyObject *raw;
685
686    PyObject *weakreflist;
687    PyObject *dict;
688} textio;
689
690static void
691textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
692
693/* A couple of specialized cases in order to bypass the slow incremental
694   encoding methods for the most popular encodings. */
695
696static PyObject *
697ascii_encode(textio *self, PyObject *text)
698{
699    return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
700}
701
702static PyObject *
703utf16be_encode(textio *self, PyObject *text)
704{
705    return _PyUnicode_EncodeUTF16(text,
706                                  PyUnicode_AsUTF8(self->errors), 1);
707}
708
709static PyObject *
710utf16le_encode(textio *self, PyObject *text)
711{
712    return _PyUnicode_EncodeUTF16(text,
713                                  PyUnicode_AsUTF8(self->errors), -1);
714}
715
716static PyObject *
717utf16_encode(textio *self, PyObject *text)
718{
719    if (!self->encoding_start_of_stream) {
720        /* Skip the BOM and use native byte ordering */
721#if PY_BIG_ENDIAN
722        return utf16be_encode(self, text);
723#else
724        return utf16le_encode(self, text);
725#endif
726    }
727    return _PyUnicode_EncodeUTF16(text,
728                                  PyUnicode_AsUTF8(self->errors), 0);
729}
730
731static PyObject *
732utf32be_encode(textio *self, PyObject *text)
733{
734    return _PyUnicode_EncodeUTF32(text,
735                                  PyUnicode_AsUTF8(self->errors), 1);
736}
737
738static PyObject *
739utf32le_encode(textio *self, PyObject *text)
740{
741    return _PyUnicode_EncodeUTF32(text,
742                                  PyUnicode_AsUTF8(self->errors), -1);
743}
744
745static PyObject *
746utf32_encode(textio *self, PyObject *text)
747{
748    if (!self->encoding_start_of_stream) {
749        /* Skip the BOM and use native byte ordering */
750#if PY_BIG_ENDIAN
751        return utf32be_encode(self, text);
752#else
753        return utf32le_encode(self, text);
754#endif
755    }
756    return _PyUnicode_EncodeUTF32(text,
757                                  PyUnicode_AsUTF8(self->errors), 0);
758}
759
760static PyObject *
761utf8_encode(textio *self, PyObject *text)
762{
763    return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
764}
765
766static PyObject *
767latin1_encode(textio *self, PyObject *text)
768{
769    return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
770}
771
772// Return true when encoding can be skipped when text is ascii.
773static inline int
774is_asciicompat_encoding(encodefunc_t f)
775{
776    return f == (encodefunc_t) ascii_encode
777        || f == (encodefunc_t) latin1_encode
778        || f == (encodefunc_t) utf8_encode;
779}
780
781/* Map normalized encoding names onto the specialized encoding funcs */
782
783typedef struct {
784    const char *name;
785    encodefunc_t encodefunc;
786} encodefuncentry;
787
788static const encodefuncentry encodefuncs[] = {
789    {"ascii",       (encodefunc_t) ascii_encode},
790    {"iso8859-1",   (encodefunc_t) latin1_encode},
791    {"utf-8",       (encodefunc_t) utf8_encode},
792    {"utf-16-be",   (encodefunc_t) utf16be_encode},
793    {"utf-16-le",   (encodefunc_t) utf16le_encode},
794    {"utf-16",      (encodefunc_t) utf16_encode},
795    {"utf-32-be",   (encodefunc_t) utf32be_encode},
796    {"utf-32-le",   (encodefunc_t) utf32le_encode},
797    {"utf-32",      (encodefunc_t) utf32_encode},
798    {NULL, NULL}
799};
800
801static int
802validate_newline(const char *newline)
803{
804    if (newline && newline[0] != '\0'
805        && !(newline[0] == '\n' && newline[1] == '\0')
806        && !(newline[0] == '\r' && newline[1] == '\0')
807        && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
808        PyErr_Format(PyExc_ValueError,
809                     "illegal newline value: %s", newline);
810        return -1;
811    }
812    return 0;
813}
814
815static int
816set_newline(textio *self, const char *newline)
817{
818    PyObject *old = self->readnl;
819    if (newline == NULL) {
820        self->readnl = NULL;
821    }
822    else {
823        self->readnl = PyUnicode_FromString(newline);
824        if (self->readnl == NULL) {
825            self->readnl = old;
826            return -1;
827        }
828    }
829    self->readuniversal = (newline == NULL || newline[0] == '\0');
830    self->readtranslate = (newline == NULL);
831    self->writetranslate = (newline == NULL || newline[0] != '\0');
832    if (!self->readuniversal && self->readnl != NULL) {
833        // validate_newline() accepts only ASCII newlines.
834        assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
835        self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
836        if (strcmp(self->writenl, "\n") == 0) {
837            self->writenl = NULL;
838        }
839    }
840    else {
841#ifdef MS_WINDOWS
842        self->writenl = "\r\n";
843#else
844        self->writenl = NULL;
845#endif
846    }
847    Py_XDECREF(old);
848    return 0;
849}
850
851static int
852_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
853                           const char *errors)
854{
855    PyObject *res;
856    int r;
857
858    res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
859    if (res == NULL)
860        return -1;
861
862    r = PyObject_IsTrue(res);
863    Py_DECREF(res);
864    if (r == -1)
865        return -1;
866
867    if (r != 1)
868        return 0;
869
870    Py_CLEAR(self->decoder);
871    self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
872    if (self->decoder == NULL)
873        return -1;
874
875    if (self->readuniversal) {
876        PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
877            (PyObject *)&PyIncrementalNewlineDecoder_Type,
878            self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
879        if (incrementalDecoder == NULL)
880            return -1;
881        Py_CLEAR(self->decoder);
882        self->decoder = incrementalDecoder;
883    }
884
885    return 0;
886}
887
888static PyObject*
889_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
890{
891    PyObject *chars;
892
893    if (Py_IS_TYPE(decoder, &PyIncrementalNewlineDecoder_Type))
894        chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
895    else
896        chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes,
897                                           eof ? Py_True : Py_False, NULL);
898
899    if (check_decoded(chars) < 0)
900        // check_decoded already decreases refcount
901        return NULL;
902
903    return chars;
904}
905
906static int
907_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
908                           const char *errors)
909{
910    PyObject *res;
911    int r;
912
913    res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
914    if (res == NULL)
915        return -1;
916
917    r = PyObject_IsTrue(res);
918    Py_DECREF(res);
919    if (r == -1)
920        return -1;
921
922    if (r != 1)
923        return 0;
924
925    Py_CLEAR(self->encoder);
926    self->encodefunc = NULL;
927    self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
928    if (self->encoder == NULL)
929        return -1;
930
931    /* Get the normalized named of the codec */
932    if (_PyObject_LookupAttr(codec_info, &_Py_ID(name), &res) < 0) {
933        return -1;
934    }
935    if (res != NULL && PyUnicode_Check(res)) {
936        const encodefuncentry *e = encodefuncs;
937        while (e->name != NULL) {
938            if (_PyUnicode_EqualToASCIIString(res, e->name)) {
939                self->encodefunc = e->encodefunc;
940                break;
941            }
942            e++;
943        }
944    }
945    Py_XDECREF(res);
946
947    return 0;
948}
949
950static int
951_textiowrapper_fix_encoder_state(textio *self)
952{
953    if (!self->seekable || !self->encoder) {
954        return 0;
955    }
956
957    self->encoding_start_of_stream = 1;
958
959    PyObject *cookieObj = PyObject_CallMethodNoArgs(
960        self->buffer, &_Py_ID(tell));
961    if (cookieObj == NULL) {
962        return -1;
963    }
964
965    int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
966    Py_DECREF(cookieObj);
967    if (cmp < 0) {
968        return -1;
969    }
970
971    if (cmp == 0) {
972        self->encoding_start_of_stream = 0;
973        PyObject *res = PyObject_CallMethodOneArg(
974            self->encoder, &_Py_ID(setstate), _PyLong_GetZero());
975        if (res == NULL) {
976            return -1;
977        }
978        Py_DECREF(res);
979    }
980
981    return 0;
982}
983
984static int
985io_check_errors(PyObject *errors)
986{
987    assert(errors != NULL && errors != Py_None);
988
989    PyInterpreterState *interp = _PyInterpreterState_GET();
990#ifndef Py_DEBUG
991    /* In release mode, only check in development mode (-X dev) */
992    if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
993        return 0;
994    }
995#else
996    /* Always check in debug mode */
997#endif
998
999    /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1000       before_PyUnicode_InitEncodings() is called. */
1001    if (!interp->unicode.fs_codec.encoding) {
1002        return 0;
1003    }
1004
1005    Py_ssize_t name_length;
1006    const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
1007    if (name == NULL) {
1008        return -1;
1009    }
1010    if (strlen(name) != (size_t)name_length) {
1011        PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
1012        return -1;
1013    }
1014    PyObject *handler = PyCodec_LookupError(name);
1015    if (handler != NULL) {
1016        Py_DECREF(handler);
1017        return 0;
1018    }
1019    return -1;
1020}
1021
1022
1023
1024/*[clinic input]
1025_io.TextIOWrapper.__init__
1026    buffer: object
1027    encoding: str(accept={str, NoneType}) = None
1028    errors: object = None
1029    newline: str(accept={str, NoneType}) = None
1030    line_buffering: bool(accept={int}) = False
1031    write_through: bool(accept={int}) = False
1032
1033Character and line based layer over a BufferedIOBase object, buffer.
1034
1035encoding gives the name of the encoding that the stream will be
1036decoded or encoded with. It defaults to locale.getencoding().
1037
1038errors determines the strictness of encoding and decoding (see
1039help(codecs.Codec) or the documentation for codecs.register) and
1040defaults to "strict".
1041
1042newline controls how line endings are handled. It can be None, '',
1043'\n', '\r', and '\r\n'.  It works as follows:
1044
1045* On input, if newline is None, universal newlines mode is
1046  enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1047  these are translated into '\n' before being returned to the
1048  caller. If it is '', universal newline mode is enabled, but line
1049  endings are returned to the caller untranslated. If it has any of
1050  the other legal values, input lines are only terminated by the given
1051  string, and the line ending is returned to the caller untranslated.
1052
1053* On output, if newline is None, any '\n' characters written are
1054  translated to the system default line separator, os.linesep. If
1055  newline is '' or '\n', no translation takes place. If newline is any
1056  of the other legal values, any '\n' characters written are translated
1057  to the given string.
1058
1059If line_buffering is True, a call to flush is implied when a call to
1060write contains a newline character.
1061[clinic start generated code]*/
1062
1063static int
1064_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1065                                const char *encoding, PyObject *errors,
1066                                const char *newline, int line_buffering,
1067                                int write_through)
1068/*[clinic end generated code: output=72267c0c01032ed2 input=72590963698f289b]*/
1069{
1070    PyObject *raw, *codec_info = NULL;
1071    PyObject *res;
1072    int r;
1073
1074    self->ok = 0;
1075    self->detached = 0;
1076
1077    if (encoding == NULL) {
1078        PyInterpreterState *interp = _PyInterpreterState_GET();
1079        if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
1080            if (PyErr_WarnEx(PyExc_EncodingWarning,
1081                             "'encoding' argument not specified", 1)) {
1082                return -1;
1083            }
1084        }
1085    }
1086
1087    if (errors == Py_None) {
1088        errors = &_Py_ID(strict);
1089    }
1090    else if (!PyUnicode_Check(errors)) {
1091        // Check 'errors' argument here because Argument Clinic doesn't support
1092        // 'str(accept={str, NoneType})' converter.
1093        PyErr_Format(
1094            PyExc_TypeError,
1095            "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1096            Py_TYPE(errors)->tp_name);
1097        return -1;
1098    }
1099    else if (io_check_errors(errors)) {
1100        return -1;
1101    }
1102
1103    if (validate_newline(newline) < 0) {
1104        return -1;
1105    }
1106
1107    Py_CLEAR(self->buffer);
1108    Py_CLEAR(self->encoding);
1109    Py_CLEAR(self->encoder);
1110    Py_CLEAR(self->decoder);
1111    Py_CLEAR(self->readnl);
1112    Py_CLEAR(self->decoded_chars);
1113    Py_CLEAR(self->pending_bytes);
1114    Py_CLEAR(self->snapshot);
1115    Py_CLEAR(self->errors);
1116    Py_CLEAR(self->raw);
1117    self->decoded_chars_used = 0;
1118    self->pending_bytes_count = 0;
1119    self->encodefunc = NULL;
1120    self->b2cratio = 0.0;
1121
1122    if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) {
1123        _Py_DECLARE_STR(utf_8, "utf-8");
1124        self->encoding = Py_NewRef(&_Py_STR(utf_8));
1125    }
1126    else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) {
1127        self->encoding = _Py_GetLocaleEncodingObject();
1128        if (self->encoding == NULL) {
1129            goto error;
1130        }
1131        assert(PyUnicode_Check(self->encoding));
1132    }
1133
1134    if (self->encoding != NULL) {
1135        encoding = PyUnicode_AsUTF8(self->encoding);
1136        if (encoding == NULL)
1137            goto error;
1138    }
1139    else if (encoding != NULL) {
1140        self->encoding = PyUnicode_FromString(encoding);
1141        if (self->encoding == NULL)
1142            goto error;
1143    }
1144    else {
1145        PyErr_SetString(PyExc_OSError,
1146                        "could not determine default encoding");
1147        goto error;
1148    }
1149
1150    /* Check we have been asked for a real text encoding */
1151    codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1152    if (codec_info == NULL) {
1153        Py_CLEAR(self->encoding);
1154        goto error;
1155    }
1156
1157    /* XXX: Failures beyond this point have the potential to leak elements
1158     * of the partially constructed object (like self->encoding)
1159     */
1160
1161    Py_INCREF(errors);
1162    self->errors = errors;
1163    self->chunk_size = 8192;
1164    self->line_buffering = line_buffering;
1165    self->write_through = write_through;
1166    if (set_newline(self, newline) < 0) {
1167        goto error;
1168    }
1169
1170    self->buffer = buffer;
1171    Py_INCREF(buffer);
1172
1173    /* Build the decoder object */
1174    if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1175        goto error;
1176
1177    /* Build the encoder object */
1178    if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1179        goto error;
1180
1181    /* Finished sorting out the codec details */
1182    Py_CLEAR(codec_info);
1183
1184    if (Py_IS_TYPE(buffer, &PyBufferedReader_Type) ||
1185        Py_IS_TYPE(buffer, &PyBufferedWriter_Type) ||
1186        Py_IS_TYPE(buffer, &PyBufferedRandom_Type))
1187    {
1188        if (_PyObject_LookupAttr(buffer, &_Py_ID(raw), &raw) < 0)
1189            goto error;
1190        /* Cache the raw FileIO object to speed up 'closed' checks */
1191        if (raw != NULL) {
1192            if (Py_IS_TYPE(raw, &PyFileIO_Type))
1193                self->raw = raw;
1194            else
1195                Py_DECREF(raw);
1196        }
1197    }
1198
1199    res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable));
1200    if (res == NULL)
1201        goto error;
1202    r = PyObject_IsTrue(res);
1203    Py_DECREF(res);
1204    if (r < 0)
1205        goto error;
1206    self->seekable = self->telling = r;
1207
1208    r = _PyObject_LookupAttr(buffer, &_Py_ID(read1), &res);
1209    if (r < 0) {
1210        goto error;
1211    }
1212    Py_XDECREF(res);
1213    self->has_read1 = r;
1214
1215    self->encoding_start_of_stream = 0;
1216    if (_textiowrapper_fix_encoder_state(self) < 0) {
1217        goto error;
1218    }
1219
1220    self->ok = 1;
1221    return 0;
1222
1223  error:
1224    Py_XDECREF(codec_info);
1225    return -1;
1226}
1227
1228/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1229 * -1 on error.
1230 */
1231static int
1232convert_optional_bool(PyObject *obj, int default_value)
1233{
1234    long v;
1235    if (obj == Py_None) {
1236        v = default_value;
1237    }
1238    else {
1239        v = PyLong_AsLong(obj);
1240        if (v == -1 && PyErr_Occurred())
1241            return -1;
1242    }
1243    return v != 0;
1244}
1245
1246static int
1247textiowrapper_change_encoding(textio *self, PyObject *encoding,
1248                              PyObject *errors, int newline_changed)
1249{
1250    /* Use existing settings where new settings are not specified */
1251    if (encoding == Py_None && errors == Py_None && !newline_changed) {
1252        return 0;  // no change
1253    }
1254
1255    if (encoding == Py_None) {
1256        encoding = self->encoding;
1257        if (errors == Py_None) {
1258            errors = self->errors;
1259        }
1260        Py_INCREF(encoding);
1261    }
1262    else {
1263        if (_PyUnicode_EqualToASCIIString(encoding, "locale")) {
1264            encoding = _Py_GetLocaleEncodingObject();
1265            if (encoding == NULL) {
1266                return -1;
1267            }
1268        } else {
1269            Py_INCREF(encoding);
1270        }
1271        if (errors == Py_None) {
1272            errors = &_Py_ID(strict);
1273        }
1274    }
1275
1276    const char *c_errors = PyUnicode_AsUTF8(errors);
1277    if (c_errors == NULL) {
1278        Py_DECREF(encoding);
1279        return -1;
1280    }
1281
1282    // Create new encoder & decoder
1283    PyObject *codec_info = _PyCodec_LookupTextEncoding(
1284        PyUnicode_AsUTF8(encoding), "codecs.open()");
1285    if (codec_info == NULL) {
1286        Py_DECREF(encoding);
1287        return -1;
1288    }
1289    if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1290            _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1291        Py_DECREF(codec_info);
1292        Py_DECREF(encoding);
1293        return -1;
1294    }
1295    Py_DECREF(codec_info);
1296
1297    Py_INCREF(errors);
1298    Py_SETREF(self->encoding, encoding);
1299    Py_SETREF(self->errors, errors);
1300
1301    return _textiowrapper_fix_encoder_state(self);
1302}
1303
1304/*[clinic input]
1305_io.TextIOWrapper.reconfigure
1306    *
1307    encoding: object = None
1308    errors: object = None
1309    newline as newline_obj: object(c_default="NULL") = None
1310    line_buffering as line_buffering_obj: object = None
1311    write_through as write_through_obj: object = None
1312
1313Reconfigure the text stream with new parameters.
1314
1315This also does an implicit stream flush.
1316
1317[clinic start generated code]*/
1318
1319static PyObject *
1320_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1321                                   PyObject *errors, PyObject *newline_obj,
1322                                   PyObject *line_buffering_obj,
1323                                   PyObject *write_through_obj)
1324/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
1325{
1326    int line_buffering;
1327    int write_through;
1328    const char *newline = NULL;
1329
1330    /* Check if something is in the read buffer */
1331    if (self->decoded_chars != NULL) {
1332        if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1333            _unsupported("It is not possible to set the encoding or newline "
1334                         "of stream after the first read");
1335            return NULL;
1336        }
1337    }
1338
1339    if (newline_obj != NULL && newline_obj != Py_None) {
1340        newline = PyUnicode_AsUTF8(newline_obj);
1341        if (newline == NULL || validate_newline(newline) < 0) {
1342            return NULL;
1343        }
1344    }
1345
1346    line_buffering = convert_optional_bool(line_buffering_obj,
1347                                           self->line_buffering);
1348    write_through = convert_optional_bool(write_through_obj,
1349                                          self->write_through);
1350    if (line_buffering < 0 || write_through < 0) {
1351        return NULL;
1352    }
1353
1354    PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
1355    if (res == NULL) {
1356        return NULL;
1357    }
1358    Py_DECREF(res);
1359    self->b2cratio = 0;
1360
1361    if (newline_obj != NULL && set_newline(self, newline) < 0) {
1362        return NULL;
1363    }
1364
1365    if (textiowrapper_change_encoding(
1366            self, encoding, errors, newline_obj != NULL) < 0) {
1367        return NULL;
1368    }
1369
1370    self->line_buffering = line_buffering;
1371    self->write_through = write_through;
1372    Py_RETURN_NONE;
1373}
1374
1375static int
1376textiowrapper_clear(textio *self)
1377{
1378    self->ok = 0;
1379    Py_CLEAR(self->buffer);
1380    Py_CLEAR(self->encoding);
1381    Py_CLEAR(self->encoder);
1382    Py_CLEAR(self->decoder);
1383    Py_CLEAR(self->readnl);
1384    Py_CLEAR(self->decoded_chars);
1385    Py_CLEAR(self->pending_bytes);
1386    Py_CLEAR(self->snapshot);
1387    Py_CLEAR(self->errors);
1388    Py_CLEAR(self->raw);
1389
1390    Py_CLEAR(self->dict);
1391    return 0;
1392}
1393
1394static void
1395textiowrapper_dealloc(textio *self)
1396{
1397    self->finalizing = 1;
1398    if (_PyIOBase_finalize((PyObject *) self) < 0)
1399        return;
1400    self->ok = 0;
1401    _PyObject_GC_UNTRACK(self);
1402    if (self->weakreflist != NULL)
1403        PyObject_ClearWeakRefs((PyObject *)self);
1404    textiowrapper_clear(self);
1405    Py_TYPE(self)->tp_free((PyObject *)self);
1406}
1407
1408static int
1409textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1410{
1411    Py_VISIT(self->buffer);
1412    Py_VISIT(self->encoding);
1413    Py_VISIT(self->encoder);
1414    Py_VISIT(self->decoder);
1415    Py_VISIT(self->readnl);
1416    Py_VISIT(self->decoded_chars);
1417    Py_VISIT(self->pending_bytes);
1418    Py_VISIT(self->snapshot);
1419    Py_VISIT(self->errors);
1420    Py_VISIT(self->raw);
1421
1422    Py_VISIT(self->dict);
1423    return 0;
1424}
1425
1426static PyObject *
1427textiowrapper_closed_get(textio *self, void *context);
1428
1429/* This macro takes some shortcuts to make the common case faster. */
1430#define CHECK_CLOSED(self) \
1431    do { \
1432        int r; \
1433        PyObject *_res; \
1434        if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { \
1435            if (self->raw != NULL) \
1436                r = _PyFileIO_closed(self->raw); \
1437            else { \
1438                _res = textiowrapper_closed_get(self, NULL); \
1439                if (_res == NULL) \
1440                    return NULL; \
1441                r = PyObject_IsTrue(_res); \
1442                Py_DECREF(_res); \
1443                if (r < 0) \
1444                    return NULL; \
1445            } \
1446            if (r > 0) { \
1447                PyErr_SetString(PyExc_ValueError, \
1448                                "I/O operation on closed file."); \
1449                return NULL; \
1450            } \
1451        } \
1452        else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1453            return NULL; \
1454    } while (0)
1455
1456#define CHECK_INITIALIZED(self) \
1457    if (self->ok <= 0) { \
1458        PyErr_SetString(PyExc_ValueError, \
1459            "I/O operation on uninitialized object"); \
1460        return NULL; \
1461    }
1462
1463#define CHECK_ATTACHED(self) \
1464    CHECK_INITIALIZED(self); \
1465    if (self->detached) { \
1466        PyErr_SetString(PyExc_ValueError, \
1467             "underlying buffer has been detached"); \
1468        return NULL; \
1469    }
1470
1471#define CHECK_ATTACHED_INT(self) \
1472    if (self->ok <= 0) { \
1473        PyErr_SetString(PyExc_ValueError, \
1474            "I/O operation on uninitialized object"); \
1475        return -1; \
1476    } else if (self->detached) { \
1477        PyErr_SetString(PyExc_ValueError, \
1478             "underlying buffer has been detached"); \
1479        return -1; \
1480    }
1481
1482
1483/*[clinic input]
1484_io.TextIOWrapper.detach
1485[clinic start generated code]*/
1486
1487static PyObject *
1488_io_TextIOWrapper_detach_impl(textio *self)
1489/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1490{
1491    PyObject *buffer, *res;
1492    CHECK_ATTACHED(self);
1493    res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
1494    if (res == NULL)
1495        return NULL;
1496    Py_DECREF(res);
1497    buffer = self->buffer;
1498    self->buffer = NULL;
1499    self->detached = 1;
1500    return buffer;
1501}
1502
1503/* Flush the internal write buffer. This doesn't explicitly flush the
1504   underlying buffered object, though. */
1505static int
1506_textiowrapper_writeflush(textio *self)
1507{
1508    if (self->pending_bytes == NULL)
1509        return 0;
1510
1511    PyObject *pending = self->pending_bytes;
1512    PyObject *b;
1513
1514    if (PyBytes_Check(pending)) {
1515        b = pending;
1516        Py_INCREF(b);
1517    }
1518    else if (PyUnicode_Check(pending)) {
1519        assert(PyUnicode_IS_ASCII(pending));
1520        assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1521        b = PyBytes_FromStringAndSize(
1522                PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1523        if (b == NULL) {
1524            return -1;
1525        }
1526    }
1527    else {
1528        assert(PyList_Check(pending));
1529        b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1530        if (b == NULL) {
1531            return -1;
1532        }
1533
1534        char *buf = PyBytes_AsString(b);
1535        Py_ssize_t pos = 0;
1536
1537        for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1538            PyObject *obj = PyList_GET_ITEM(pending, i);
1539            char *src;
1540            Py_ssize_t len;
1541            if (PyUnicode_Check(obj)) {
1542                assert(PyUnicode_IS_ASCII(obj));
1543                src = PyUnicode_DATA(obj);
1544                len = PyUnicode_GET_LENGTH(obj);
1545            }
1546            else {
1547                assert(PyBytes_Check(obj));
1548                if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1549                    Py_DECREF(b);
1550                    return -1;
1551                }
1552            }
1553            memcpy(buf + pos, src, len);
1554            pos += len;
1555        }
1556        assert(pos == self->pending_bytes_count);
1557    }
1558
1559    self->pending_bytes_count = 0;
1560    self->pending_bytes = NULL;
1561    Py_DECREF(pending);
1562
1563    PyObject *ret;
1564    do {
1565        ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
1566    } while (ret == NULL && _PyIO_trap_eintr());
1567    Py_DECREF(b);
1568    // NOTE: We cleared buffer but we don't know how many bytes are actually written
1569    // when an error occurred.
1570    if (ret == NULL)
1571        return -1;
1572    Py_DECREF(ret);
1573    return 0;
1574}
1575
1576/*[clinic input]
1577_io.TextIOWrapper.write
1578    text: unicode
1579    /
1580[clinic start generated code]*/
1581
1582static PyObject *
1583_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1584/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1585{
1586    PyObject *ret;
1587    PyObject *b;
1588    Py_ssize_t textlen;
1589    int haslf = 0;
1590    int needflush = 0, text_needflush = 0;
1591
1592    if (PyUnicode_READY(text) == -1)
1593        return NULL;
1594
1595    CHECK_ATTACHED(self);
1596    CHECK_CLOSED(self);
1597
1598    if (self->encoder == NULL)
1599        return _unsupported("not writable");
1600
1601    Py_INCREF(text);
1602
1603    textlen = PyUnicode_GET_LENGTH(text);
1604
1605    if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1606        if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1607            haslf = 1;
1608
1609    if (haslf && self->writetranslate && self->writenl != NULL) {
1610        PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace),
1611                                                 "ss", "\n", self->writenl);
1612        Py_DECREF(text);
1613        if (newtext == NULL)
1614            return NULL;
1615        text = newtext;
1616    }
1617
1618    if (self->write_through)
1619        text_needflush = 1;
1620    if (self->line_buffering &&
1621        (haslf ||
1622         PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1623        needflush = 1;
1624
1625    /* XXX What if we were just reading? */
1626    if (self->encodefunc != NULL) {
1627        if (PyUnicode_IS_ASCII(text) &&
1628                // See bpo-43260
1629                PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1630                is_asciicompat_encoding(self->encodefunc)) {
1631            b = text;
1632            Py_INCREF(b);
1633        }
1634        else {
1635            b = (*self->encodefunc)((PyObject *) self, text);
1636        }
1637        self->encoding_start_of_stream = 0;
1638    }
1639    else {
1640        b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text);
1641    }
1642
1643    Py_DECREF(text);
1644    if (b == NULL)
1645        return NULL;
1646    if (b != text && !PyBytes_Check(b)) {
1647        PyErr_Format(PyExc_TypeError,
1648                     "encoder should return a bytes object, not '%.200s'",
1649                     Py_TYPE(b)->tp_name);
1650        Py_DECREF(b);
1651        return NULL;
1652    }
1653
1654    Py_ssize_t bytes_len;
1655    if (b == text) {
1656        bytes_len = PyUnicode_GET_LENGTH(b);
1657    }
1658    else {
1659        bytes_len = PyBytes_GET_SIZE(b);
1660    }
1661
1662    if (self->pending_bytes == NULL) {
1663        self->pending_bytes_count = 0;
1664        self->pending_bytes = b;
1665    }
1666    else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
1667        // Prevent to concatenate more than chunk_size data.
1668        if (_textiowrapper_writeflush(self) < 0) {
1669            Py_DECREF(b);
1670            return NULL;
1671        }
1672        self->pending_bytes = b;
1673    }
1674    else if (!PyList_CheckExact(self->pending_bytes)) {
1675        PyObject *list = PyList_New(2);
1676        if (list == NULL) {
1677            Py_DECREF(b);
1678            return NULL;
1679        }
1680        PyList_SET_ITEM(list, 0, self->pending_bytes);
1681        PyList_SET_ITEM(list, 1, b);
1682        self->pending_bytes = list;
1683    }
1684    else {
1685        if (PyList_Append(self->pending_bytes, b) < 0) {
1686            Py_DECREF(b);
1687            return NULL;
1688        }
1689        Py_DECREF(b);
1690    }
1691
1692    self->pending_bytes_count += bytes_len;
1693    if (self->pending_bytes_count >= self->chunk_size || needflush ||
1694        text_needflush) {
1695        if (_textiowrapper_writeflush(self) < 0)
1696            return NULL;
1697    }
1698
1699    if (needflush) {
1700        ret = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
1701        if (ret == NULL)
1702            return NULL;
1703        Py_DECREF(ret);
1704    }
1705
1706    textiowrapper_set_decoded_chars(self, NULL);
1707    Py_CLEAR(self->snapshot);
1708
1709    if (self->decoder) {
1710        ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
1711        if (ret == NULL)
1712            return NULL;
1713        Py_DECREF(ret);
1714    }
1715
1716    return PyLong_FromSsize_t(textlen);
1717}
1718
1719/* Steal a reference to chars and store it in the decoded_char buffer;
1720 */
1721static void
1722textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1723{
1724    Py_XSETREF(self->decoded_chars, chars);
1725    self->decoded_chars_used = 0;
1726}
1727
1728static PyObject *
1729textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1730{
1731    PyObject *chars;
1732    Py_ssize_t avail;
1733
1734    if (self->decoded_chars == NULL)
1735        return PyUnicode_FromStringAndSize(NULL, 0);
1736
1737    /* decoded_chars is guaranteed to be "ready". */
1738    avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1739             - self->decoded_chars_used);
1740
1741    assert(avail >= 0);
1742
1743    if (n < 0 || n > avail)
1744        n = avail;
1745
1746    if (self->decoded_chars_used > 0 || n < avail) {
1747        chars = PyUnicode_Substring(self->decoded_chars,
1748                                    self->decoded_chars_used,
1749                                    self->decoded_chars_used + n);
1750        if (chars == NULL)
1751            return NULL;
1752    }
1753    else {
1754        chars = self->decoded_chars;
1755        Py_INCREF(chars);
1756    }
1757
1758    self->decoded_chars_used += n;
1759    return chars;
1760}
1761
1762/* Read and decode the next chunk of data from the BufferedReader.
1763 */
1764static int
1765textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1766{
1767    PyObject *dec_buffer = NULL;
1768    PyObject *dec_flags = NULL;
1769    PyObject *input_chunk = NULL;
1770    Py_buffer input_chunk_buf;
1771    PyObject *decoded_chars, *chunk_size;
1772    Py_ssize_t nbytes, nchars;
1773    int eof;
1774
1775    /* The return value is True unless EOF was reached.  The decoded string is
1776     * placed in self._decoded_chars (replacing its previous value).  The
1777     * entire input chunk is sent to the decoder, though some of it may remain
1778     * buffered in the decoder, yet to be converted.
1779     */
1780
1781    if (self->decoder == NULL) {
1782        _unsupported("not readable");
1783        return -1;
1784    }
1785
1786    if (self->telling) {
1787        /* To prepare for tell(), we need to snapshot a point in the file
1788         * where the decoder's input buffer is empty.
1789         */
1790        PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1791                                                     &_Py_ID(getstate));
1792        if (state == NULL)
1793            return -1;
1794        /* Given this, we know there was a valid snapshot point
1795         * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1796         */
1797        if (!PyTuple_Check(state)) {
1798            PyErr_SetString(PyExc_TypeError,
1799                            "illegal decoder state");
1800            Py_DECREF(state);
1801            return -1;
1802        }
1803        if (!PyArg_ParseTuple(state,
1804                              "OO;illegal decoder state", &dec_buffer, &dec_flags))
1805        {
1806            Py_DECREF(state);
1807            return -1;
1808        }
1809
1810        if (!PyBytes_Check(dec_buffer)) {
1811            PyErr_Format(PyExc_TypeError,
1812                         "illegal decoder state: the first item should be a "
1813                         "bytes object, not '%.200s'",
1814                         Py_TYPE(dec_buffer)->tp_name);
1815            Py_DECREF(state);
1816            return -1;
1817        }
1818        Py_INCREF(dec_buffer);
1819        Py_INCREF(dec_flags);
1820        Py_DECREF(state);
1821    }
1822
1823    /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1824    if (size_hint > 0) {
1825        size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1826    }
1827    chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1828    if (chunk_size == NULL)
1829        goto fail;
1830
1831    input_chunk = PyObject_CallMethodOneArg(self->buffer,
1832        (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
1833        chunk_size);
1834    Py_DECREF(chunk_size);
1835    if (input_chunk == NULL)
1836        goto fail;
1837
1838    if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1839        PyErr_Format(PyExc_TypeError,
1840                     "underlying %s() should have returned a bytes-like object, "
1841                     "not '%.200s'", (self->has_read1 ? "read1": "read"),
1842                     Py_TYPE(input_chunk)->tp_name);
1843        goto fail;
1844    }
1845
1846    nbytes = input_chunk_buf.len;
1847    eof = (nbytes == 0);
1848
1849    decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1850    PyBuffer_Release(&input_chunk_buf);
1851    if (decoded_chars == NULL)
1852        goto fail;
1853
1854    textiowrapper_set_decoded_chars(self, decoded_chars);
1855    nchars = PyUnicode_GET_LENGTH(decoded_chars);
1856    if (nchars > 0)
1857        self->b2cratio = (double) nbytes / nchars;
1858    else
1859        self->b2cratio = 0.0;
1860    if (nchars > 0)
1861        eof = 0;
1862
1863    if (self->telling) {
1864        /* At the snapshot point, len(dec_buffer) bytes before the read, the
1865         * next input to be decoded is dec_buffer + input_chunk.
1866         */
1867        PyObject *next_input = dec_buffer;
1868        PyBytes_Concat(&next_input, input_chunk);
1869        dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1870        if (next_input == NULL) {
1871            goto fail;
1872        }
1873        PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1874        if (snapshot == NULL) {
1875            dec_flags = NULL;
1876            goto fail;
1877        }
1878        Py_XSETREF(self->snapshot, snapshot);
1879    }
1880    Py_DECREF(input_chunk);
1881
1882    return (eof == 0);
1883
1884  fail:
1885    Py_XDECREF(dec_buffer);
1886    Py_XDECREF(dec_flags);
1887    Py_XDECREF(input_chunk);
1888    return -1;
1889}
1890
1891/*[clinic input]
1892_io.TextIOWrapper.read
1893    size as n: Py_ssize_t(accept={int, NoneType}) = -1
1894    /
1895[clinic start generated code]*/
1896
1897static PyObject *
1898_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1899/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
1900{
1901    PyObject *result = NULL, *chunks = NULL;
1902
1903    CHECK_ATTACHED(self);
1904    CHECK_CLOSED(self);
1905
1906    if (self->decoder == NULL)
1907        return _unsupported("not readable");
1908
1909    if (_textiowrapper_writeflush(self) < 0)
1910        return NULL;
1911
1912    if (n < 0) {
1913        /* Read everything */
1914        PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
1915        PyObject *decoded;
1916        if (bytes == NULL)
1917            goto fail;
1918
1919        if (Py_IS_TYPE(self->decoder, &PyIncrementalNewlineDecoder_Type))
1920            decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1921                                                          bytes, 1);
1922        else
1923            decoded = PyObject_CallMethodObjArgs(
1924                self->decoder, &_Py_ID(decode), bytes, Py_True, NULL);
1925        Py_DECREF(bytes);
1926        if (check_decoded(decoded) < 0)
1927            goto fail;
1928
1929        result = textiowrapper_get_decoded_chars(self, -1);
1930
1931        if (result == NULL) {
1932            Py_DECREF(decoded);
1933            return NULL;
1934        }
1935
1936        PyUnicode_AppendAndDel(&result, decoded);
1937        if (result == NULL)
1938            goto fail;
1939
1940        textiowrapper_set_decoded_chars(self, NULL);
1941        Py_CLEAR(self->snapshot);
1942        return result;
1943    }
1944    else {
1945        int res = 1;
1946        Py_ssize_t remaining = n;
1947
1948        result = textiowrapper_get_decoded_chars(self, n);
1949        if (result == NULL)
1950            goto fail;
1951        if (PyUnicode_READY(result) == -1)
1952            goto fail;
1953        remaining -= PyUnicode_GET_LENGTH(result);
1954
1955        /* Keep reading chunks until we have n characters to return */
1956        while (remaining > 0) {
1957            res = textiowrapper_read_chunk(self, remaining);
1958            if (res < 0) {
1959                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1960                   when EINTR occurs so we needn't do it ourselves. */
1961                if (_PyIO_trap_eintr()) {
1962                    continue;
1963                }
1964                goto fail;
1965            }
1966            if (res == 0)  /* EOF */
1967                break;
1968            if (chunks == NULL) {
1969                chunks = PyList_New(0);
1970                if (chunks == NULL)
1971                    goto fail;
1972            }
1973            if (PyUnicode_GET_LENGTH(result) > 0 &&
1974                PyList_Append(chunks, result) < 0)
1975                goto fail;
1976            Py_DECREF(result);
1977            result = textiowrapper_get_decoded_chars(self, remaining);
1978            if (result == NULL)
1979                goto fail;
1980            remaining -= PyUnicode_GET_LENGTH(result);
1981        }
1982        if (chunks != NULL) {
1983            if (result != NULL && PyList_Append(chunks, result) < 0)
1984                goto fail;
1985            _Py_DECLARE_STR(empty, "");
1986            Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks));
1987            if (result == NULL)
1988                goto fail;
1989            Py_CLEAR(chunks);
1990        }
1991        return result;
1992    }
1993  fail:
1994    Py_XDECREF(result);
1995    Py_XDECREF(chunks);
1996    return NULL;
1997}
1998
1999
2000/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2001   that is to the NUL character. Otherwise the function will produce
2002   incorrect results. */
2003static const char *
2004find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2005{
2006    if (kind == PyUnicode_1BYTE_KIND) {
2007        assert(ch < 256);
2008        return (char *) memchr((const void *) s, (char) ch, end - s);
2009    }
2010    for (;;) {
2011        while (PyUnicode_READ(kind, s, 0) > ch)
2012            s += kind;
2013        if (PyUnicode_READ(kind, s, 0) == ch)
2014            return s;
2015        if (s == end)
2016            return NULL;
2017        s += kind;
2018    }
2019}
2020
2021Py_ssize_t
2022_PyIO_find_line_ending(
2023    int translated, int universal, PyObject *readnl,
2024    int kind, const char *start, const char *end, Py_ssize_t *consumed)
2025{
2026    Py_ssize_t len = (end - start)/kind;
2027
2028    if (translated) {
2029        /* Newlines are already translated, only search for \n */
2030        const char *pos = find_control_char(kind, start, end, '\n');
2031        if (pos != NULL)
2032            return (pos - start)/kind + 1;
2033        else {
2034            *consumed = len;
2035            return -1;
2036        }
2037    }
2038    else if (universal) {
2039        /* Universal newline search. Find any of \r, \r\n, \n
2040         * The decoder ensures that \r\n are not split in two pieces
2041         */
2042        const char *s = start;
2043        for (;;) {
2044            Py_UCS4 ch;
2045            /* Fast path for non-control chars. The loop always ends
2046               since the Unicode string is NUL-terminated. */
2047            while (PyUnicode_READ(kind, s, 0) > '\r')
2048                s += kind;
2049            if (s >= end) {
2050                *consumed = len;
2051                return -1;
2052            }
2053            ch = PyUnicode_READ(kind, s, 0);
2054            s += kind;
2055            if (ch == '\n')
2056                return (s - start)/kind;
2057            if (ch == '\r') {
2058                if (PyUnicode_READ(kind, s, 0) == '\n')
2059                    return (s - start)/kind + 1;
2060                else
2061                    return (s - start)/kind;
2062            }
2063        }
2064    }
2065    else {
2066        /* Non-universal mode. */
2067        Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2068        const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2069        /* Assume that readnl is an ASCII character. */
2070        assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2071        if (readnl_len == 1) {
2072            const char *pos = find_control_char(kind, start, end, nl[0]);
2073            if (pos != NULL)
2074                return (pos - start)/kind + 1;
2075            *consumed = len;
2076            return -1;
2077        }
2078        else {
2079            const char *s = start;
2080            const char *e = end - (readnl_len - 1)*kind;
2081            const char *pos;
2082            if (e < s)
2083                e = s;
2084            while (s < e) {
2085                Py_ssize_t i;
2086                const char *pos = find_control_char(kind, s, end, nl[0]);
2087                if (pos == NULL || pos >= e)
2088                    break;
2089                for (i = 1; i < readnl_len; i++) {
2090                    if (PyUnicode_READ(kind, pos, i) != nl[i])
2091                        break;
2092                }
2093                if (i == readnl_len)
2094                    return (pos - start)/kind + readnl_len;
2095                s = pos + kind;
2096            }
2097            pos = find_control_char(kind, e, end, nl[0]);
2098            if (pos == NULL)
2099                *consumed = len;
2100            else
2101                *consumed = (pos - start)/kind;
2102            return -1;
2103        }
2104    }
2105}
2106
2107static PyObject *
2108_textiowrapper_readline(textio *self, Py_ssize_t limit)
2109{
2110    PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2111    Py_ssize_t start, endpos, chunked, offset_to_buffer;
2112    int res;
2113
2114    CHECK_CLOSED(self);
2115
2116    if (_textiowrapper_writeflush(self) < 0)
2117        return NULL;
2118
2119    chunked = 0;
2120
2121    while (1) {
2122        const char *ptr;
2123        Py_ssize_t line_len;
2124        int kind;
2125        Py_ssize_t consumed = 0;
2126
2127        /* First, get some data if necessary */
2128        res = 1;
2129        while (!self->decoded_chars ||
2130               !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2131            res = textiowrapper_read_chunk(self, 0);
2132            if (res < 0) {
2133                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2134                   when EINTR occurs so we needn't do it ourselves. */
2135                if (_PyIO_trap_eintr()) {
2136                    continue;
2137                }
2138                goto error;
2139            }
2140            if (res == 0)
2141                break;
2142        }
2143        if (res == 0) {
2144            /* end of file */
2145            textiowrapper_set_decoded_chars(self, NULL);
2146            Py_CLEAR(self->snapshot);
2147            start = endpos = offset_to_buffer = 0;
2148            break;
2149        }
2150
2151        if (remaining == NULL) {
2152            line = self->decoded_chars;
2153            start = self->decoded_chars_used;
2154            offset_to_buffer = 0;
2155            Py_INCREF(line);
2156        }
2157        else {
2158            assert(self->decoded_chars_used == 0);
2159            line = PyUnicode_Concat(remaining, self->decoded_chars);
2160            start = 0;
2161            offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2162            Py_CLEAR(remaining);
2163            if (line == NULL)
2164                goto error;
2165            if (PyUnicode_READY(line) == -1)
2166                goto error;
2167        }
2168
2169        ptr = PyUnicode_DATA(line);
2170        line_len = PyUnicode_GET_LENGTH(line);
2171        kind = PyUnicode_KIND(line);
2172
2173        endpos = _PyIO_find_line_ending(
2174            self->readtranslate, self->readuniversal, self->readnl,
2175            kind,
2176            ptr + kind * start,
2177            ptr + kind * line_len,
2178            &consumed);
2179        if (endpos >= 0) {
2180            endpos += start;
2181            if (limit >= 0 && (endpos - start) + chunked >= limit)
2182                endpos = start + limit - chunked;
2183            break;
2184        }
2185
2186        /* We can put aside up to `endpos` */
2187        endpos = consumed + start;
2188        if (limit >= 0 && (endpos - start) + chunked >= limit) {
2189            /* Didn't find line ending, but reached length limit */
2190            endpos = start + limit - chunked;
2191            break;
2192        }
2193
2194        if (endpos > start) {
2195            /* No line ending seen yet - put aside current data */
2196            PyObject *s;
2197            if (chunks == NULL) {
2198                chunks = PyList_New(0);
2199                if (chunks == NULL)
2200                    goto error;
2201            }
2202            s = PyUnicode_Substring(line, start, endpos);
2203            if (s == NULL)
2204                goto error;
2205            if (PyList_Append(chunks, s) < 0) {
2206                Py_DECREF(s);
2207                goto error;
2208            }
2209            chunked += PyUnicode_GET_LENGTH(s);
2210            Py_DECREF(s);
2211        }
2212        /* There may be some remaining bytes we'll have to prepend to the
2213           next chunk of data */
2214        if (endpos < line_len) {
2215            remaining = PyUnicode_Substring(line, endpos, line_len);
2216            if (remaining == NULL)
2217                goto error;
2218        }
2219        Py_CLEAR(line);
2220        /* We have consumed the buffer */
2221        textiowrapper_set_decoded_chars(self, NULL);
2222    }
2223
2224    if (line != NULL) {
2225        /* Our line ends in the current buffer */
2226        self->decoded_chars_used = endpos - offset_to_buffer;
2227        if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2228            PyObject *s = PyUnicode_Substring(line, start, endpos);
2229            Py_CLEAR(line);
2230            if (s == NULL)
2231                goto error;
2232            line = s;
2233        }
2234    }
2235    if (remaining != NULL) {
2236        if (chunks == NULL) {
2237            chunks = PyList_New(0);
2238            if (chunks == NULL)
2239                goto error;
2240        }
2241        if (PyList_Append(chunks, remaining) < 0)
2242            goto error;
2243        Py_CLEAR(remaining);
2244    }
2245    if (chunks != NULL) {
2246        if (line != NULL) {
2247            if (PyList_Append(chunks, line) < 0)
2248                goto error;
2249            Py_DECREF(line);
2250        }
2251        line = PyUnicode_Join(&_Py_STR(empty), chunks);
2252        if (line == NULL)
2253            goto error;
2254        Py_CLEAR(chunks);
2255    }
2256    if (line == NULL) {
2257        line = Py_NewRef(&_Py_STR(empty));
2258    }
2259
2260    return line;
2261
2262  error:
2263    Py_XDECREF(chunks);
2264    Py_XDECREF(remaining);
2265    Py_XDECREF(line);
2266    return NULL;
2267}
2268
2269/*[clinic input]
2270_io.TextIOWrapper.readline
2271    size: Py_ssize_t = -1
2272    /
2273[clinic start generated code]*/
2274
2275static PyObject *
2276_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2277/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2278{
2279    CHECK_ATTACHED(self);
2280    return _textiowrapper_readline(self, size);
2281}
2282
2283/* Seek and Tell */
2284
2285typedef struct {
2286    Py_off_t start_pos;
2287    int dec_flags;
2288    int bytes_to_feed;
2289    int chars_to_skip;
2290    char need_eof;
2291} cookie_type;
2292
2293/*
2294   To speed up cookie packing/unpacking, we store the fields in a temporary
2295   string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2296   The following macros define at which offsets in the intermediary byte
2297   string the various CookieStruct fields will be stored.
2298 */
2299
2300#define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2301
2302#if PY_BIG_ENDIAN
2303/* We want the least significant byte of start_pos to also be the least
2304   significant byte of the cookie, which means that in big-endian mode we
2305   must copy the fields in reverse order. */
2306
2307# define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
2308# define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
2309# define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
2310# define OFF_CHARS_TO_SKIP  (sizeof(char))
2311# define OFF_NEED_EOF       0
2312
2313#else
2314/* Little-endian mode: the least significant byte of start_pos will
2315   naturally end up the least significant byte of the cookie. */
2316
2317# define OFF_START_POS      0
2318# define OFF_DEC_FLAGS      (sizeof(Py_off_t))
2319# define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
2320# define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
2321# define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
2322
2323#endif
2324
2325static int
2326textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2327{
2328    unsigned char buffer[COOKIE_BUF_LEN];
2329    PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2330    if (cookieLong == NULL)
2331        return -1;
2332
2333    if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2334                            PY_LITTLE_ENDIAN, 0) < 0) {
2335        Py_DECREF(cookieLong);
2336        return -1;
2337    }
2338    Py_DECREF(cookieLong);
2339
2340    memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2341    memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2342    memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2343    memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2344    memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2345
2346    return 0;
2347}
2348
2349static PyObject *
2350textiowrapper_build_cookie(cookie_type *cookie)
2351{
2352    unsigned char buffer[COOKIE_BUF_LEN];
2353
2354    memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2355    memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2356    memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2357    memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2358    memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2359
2360    return _PyLong_FromByteArray(buffer, sizeof(buffer),
2361                                 PY_LITTLE_ENDIAN, 0);
2362}
2363
2364static int
2365_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2366{
2367    PyObject *res;
2368    /* When seeking to the start of the stream, we call decoder.reset()
2369       rather than decoder.getstate().
2370       This is for a few decoders such as utf-16 for which the state value
2371       at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2372       utf-16, that we are expecting a BOM).
2373    */
2374    if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2375        res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2376    }
2377    else {
2378        res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
2379                                   "((yi))", "", cookie->dec_flags);
2380    }
2381    if (res == NULL) {
2382        return -1;
2383    }
2384    Py_DECREF(res);
2385    return 0;
2386}
2387
2388static int
2389_textiowrapper_encoder_reset(textio *self, int start_of_stream)
2390{
2391    PyObject *res;
2392    if (start_of_stream) {
2393        res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset));
2394        self->encoding_start_of_stream = 1;
2395    }
2396    else {
2397        res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate),
2398                                        _PyLong_GetZero());
2399        self->encoding_start_of_stream = 0;
2400    }
2401    if (res == NULL)
2402        return -1;
2403    Py_DECREF(res);
2404    return 0;
2405}
2406
2407static int
2408_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2409{
2410    /* Same as _textiowrapper_decoder_setstate() above. */
2411    return _textiowrapper_encoder_reset(
2412        self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2413}
2414
2415/*[clinic input]
2416_io.TextIOWrapper.seek
2417    cookie as cookieObj: object
2418    whence: int = 0
2419    /
2420[clinic start generated code]*/
2421
2422static PyObject *
2423_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2424/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2425{
2426    PyObject *posobj;
2427    cookie_type cookie;
2428    PyObject *res;
2429    int cmp;
2430    PyObject *snapshot;
2431
2432    CHECK_ATTACHED(self);
2433    CHECK_CLOSED(self);
2434
2435    Py_INCREF(cookieObj);
2436
2437    if (!self->seekable) {
2438        _unsupported("underlying stream is not seekable");
2439        goto fail;
2440    }
2441
2442    PyObject *zero = _PyLong_GetZero();  // borrowed reference
2443
2444    switch (whence) {
2445    case SEEK_CUR:
2446        /* seek relative to current position */
2447        cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2448        if (cmp < 0)
2449            goto fail;
2450
2451        if (cmp == 0) {
2452            _unsupported("can't do nonzero cur-relative seeks");
2453            goto fail;
2454        }
2455
2456        /* Seeking to the current position should attempt to
2457         * sync the underlying buffer with the current position.
2458         */
2459        Py_DECREF(cookieObj);
2460        cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell));
2461        if (cookieObj == NULL)
2462            goto fail;
2463        break;
2464
2465    case SEEK_END:
2466        /* seek relative to end of file */
2467        cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2468        if (cmp < 0)
2469            goto fail;
2470
2471        if (cmp == 0) {
2472            _unsupported("can't do nonzero end-relative seeks");
2473            goto fail;
2474        }
2475
2476        res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2477        if (res == NULL)
2478            goto fail;
2479        Py_DECREF(res);
2480
2481        textiowrapper_set_decoded_chars(self, NULL);
2482        Py_CLEAR(self->snapshot);
2483        if (self->decoder) {
2484            res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2485            if (res == NULL)
2486                goto fail;
2487            Py_DECREF(res);
2488        }
2489
2490        res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
2491        Py_CLEAR(cookieObj);
2492        if (res == NULL)
2493            goto fail;
2494        if (self->encoder) {
2495            /* If seek() == 0, we are at the start of stream, otherwise not */
2496            cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
2497            if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2498                Py_DECREF(res);
2499                goto fail;
2500            }
2501        }
2502        return res;
2503
2504    case SEEK_SET:
2505        break;
2506
2507    default:
2508        PyErr_Format(PyExc_ValueError,
2509                     "invalid whence (%d, should be %d, %d or %d)", whence,
2510                     SEEK_SET, SEEK_CUR, SEEK_END);
2511        goto fail;
2512    }
2513
2514    cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
2515    if (cmp < 0)
2516        goto fail;
2517
2518    if (cmp == 1) {
2519        PyErr_Format(PyExc_ValueError,
2520                     "negative seek position %R", cookieObj);
2521        goto fail;
2522    }
2523
2524    res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2525    if (res == NULL)
2526        goto fail;
2527    Py_DECREF(res);
2528
2529    /* The strategy of seek() is to go back to the safe start point
2530     * and replay the effect of read(chars_to_skip) from there.
2531     */
2532    if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2533        goto fail;
2534
2535    /* Seek back to the safe start point. */
2536    posobj = PyLong_FromOff_t(cookie.start_pos);
2537    if (posobj == NULL)
2538        goto fail;
2539    res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
2540    Py_DECREF(posobj);
2541    if (res == NULL)
2542        goto fail;
2543    Py_DECREF(res);
2544
2545    textiowrapper_set_decoded_chars(self, NULL);
2546    Py_CLEAR(self->snapshot);
2547
2548    /* Restore the decoder to its state from the safe start point. */
2549    if (self->decoder) {
2550        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2551            goto fail;
2552    }
2553
2554    if (cookie.chars_to_skip) {
2555        /* Just like _read_chunk, feed the decoder and save a snapshot. */
2556        PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
2557                                                     "i", cookie.bytes_to_feed);
2558        PyObject *decoded;
2559
2560        if (input_chunk == NULL)
2561            goto fail;
2562
2563        if (!PyBytes_Check(input_chunk)) {
2564            PyErr_Format(PyExc_TypeError,
2565                         "underlying read() should have returned a bytes "
2566                         "object, not '%.200s'",
2567                         Py_TYPE(input_chunk)->tp_name);
2568            Py_DECREF(input_chunk);
2569            goto fail;
2570        }
2571
2572        snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2573        if (snapshot == NULL) {
2574            goto fail;
2575        }
2576        Py_XSETREF(self->snapshot, snapshot);
2577
2578        decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode),
2579            input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2580
2581        if (check_decoded(decoded) < 0)
2582            goto fail;
2583
2584        textiowrapper_set_decoded_chars(self, decoded);
2585
2586        /* Skip chars_to_skip of the decoded characters. */
2587        if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2588            PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2589            goto fail;
2590        }
2591        self->decoded_chars_used = cookie.chars_to_skip;
2592    }
2593    else {
2594        snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2595        if (snapshot == NULL)
2596            goto fail;
2597        Py_XSETREF(self->snapshot, snapshot);
2598    }
2599
2600    /* Finally, reset the encoder (merely useful for proper BOM handling) */
2601    if (self->encoder) {
2602        if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2603            goto fail;
2604    }
2605    return cookieObj;
2606  fail:
2607    Py_XDECREF(cookieObj);
2608    return NULL;
2609
2610}
2611
2612/*[clinic input]
2613_io.TextIOWrapper.tell
2614[clinic start generated code]*/
2615
2616static PyObject *
2617_io_TextIOWrapper_tell_impl(textio *self)
2618/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2619{
2620    PyObject *res;
2621    PyObject *posobj = NULL;
2622    cookie_type cookie = {0,0,0,0,0};
2623    PyObject *next_input;
2624    Py_ssize_t chars_to_skip, chars_decoded;
2625    Py_ssize_t skip_bytes, skip_back;
2626    PyObject *saved_state = NULL;
2627    const char *input, *input_end;
2628    Py_ssize_t dec_buffer_len;
2629    int dec_flags;
2630
2631    CHECK_ATTACHED(self);
2632    CHECK_CLOSED(self);
2633
2634    if (!self->seekable) {
2635        _unsupported("underlying stream is not seekable");
2636        goto fail;
2637    }
2638    if (!self->telling) {
2639        PyErr_SetString(PyExc_OSError,
2640                        "telling position disabled by next() call");
2641        goto fail;
2642    }
2643
2644    if (_textiowrapper_writeflush(self) < 0)
2645        return NULL;
2646    res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2647    if (res == NULL)
2648        goto fail;
2649    Py_DECREF(res);
2650
2651    posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
2652    if (posobj == NULL)
2653        goto fail;
2654
2655    if (self->decoder == NULL || self->snapshot == NULL) {
2656        assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2657        return posobj;
2658    }
2659
2660#if defined(HAVE_LARGEFILE_SUPPORT)
2661    cookie.start_pos = PyLong_AsLongLong(posobj);
2662#else
2663    cookie.start_pos = PyLong_AsLong(posobj);
2664#endif
2665    Py_DECREF(posobj);
2666    if (PyErr_Occurred())
2667        goto fail;
2668
2669    /* Skip backward to the snapshot point (see _read_chunk). */
2670    assert(PyTuple_Check(self->snapshot));
2671    if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2672        goto fail;
2673
2674    assert (PyBytes_Check(next_input));
2675
2676    cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2677
2678    /* How many decoded characters have been used up since the snapshot? */
2679    if (self->decoded_chars_used == 0)  {
2680        /* We haven't moved from the snapshot point. */
2681        return textiowrapper_build_cookie(&cookie);
2682    }
2683
2684    chars_to_skip = self->decoded_chars_used;
2685
2686    /* Decoder state will be restored at the end */
2687    saved_state = PyObject_CallMethodNoArgs(self->decoder,
2688                                             &_Py_ID(getstate));
2689    if (saved_state == NULL)
2690        goto fail;
2691
2692#define DECODER_GETSTATE() do { \
2693        PyObject *dec_buffer; \
2694        PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2695            &_Py_ID(getstate)); \
2696        if (_state == NULL) \
2697            goto fail; \
2698        if (!PyTuple_Check(_state)) { \
2699            PyErr_SetString(PyExc_TypeError, \
2700                            "illegal decoder state"); \
2701            Py_DECREF(_state); \
2702            goto fail; \
2703        } \
2704        if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2705                              &dec_buffer, &dec_flags)) \
2706        { \
2707            Py_DECREF(_state); \
2708            goto fail; \
2709        } \
2710        if (!PyBytes_Check(dec_buffer)) { \
2711            PyErr_Format(PyExc_TypeError, \
2712                         "illegal decoder state: the first item should be a " \
2713                         "bytes object, not '%.200s'", \
2714                         Py_TYPE(dec_buffer)->tp_name); \
2715            Py_DECREF(_state); \
2716            goto fail; \
2717        } \
2718        dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2719        Py_DECREF(_state); \
2720    } while (0)
2721
2722#define DECODER_DECODE(start, len, res) do { \
2723        PyObject *_decoded = _PyObject_CallMethod( \
2724            self->decoder, &_Py_ID(decode), "y#", start, len); \
2725        if (check_decoded(_decoded) < 0) \
2726            goto fail; \
2727        res = PyUnicode_GET_LENGTH(_decoded); \
2728        Py_DECREF(_decoded); \
2729    } while (0)
2730
2731    /* Fast search for an acceptable start point, close to our
2732       current pos */
2733    skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2734    skip_back = 1;
2735    assert(skip_back <= PyBytes_GET_SIZE(next_input));
2736    input = PyBytes_AS_STRING(next_input);
2737    while (skip_bytes > 0) {
2738        /* Decode up to temptative start point */
2739        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2740            goto fail;
2741        DECODER_DECODE(input, skip_bytes, chars_decoded);
2742        if (chars_decoded <= chars_to_skip) {
2743            DECODER_GETSTATE();
2744            if (dec_buffer_len == 0) {
2745                /* Before pos and no bytes buffered in decoder => OK */
2746                cookie.dec_flags = dec_flags;
2747                chars_to_skip -= chars_decoded;
2748                break;
2749            }
2750            /* Skip back by buffered amount and reset heuristic */
2751            skip_bytes -= dec_buffer_len;
2752            skip_back = 1;
2753        }
2754        else {
2755            /* We're too far ahead, skip back a bit */
2756            skip_bytes -= skip_back;
2757            skip_back *= 2;
2758        }
2759    }
2760    if (skip_bytes <= 0) {
2761        skip_bytes = 0;
2762        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2763            goto fail;
2764    }
2765
2766    /* Note our initial start point. */
2767    cookie.start_pos += skip_bytes;
2768    cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2769    if (chars_to_skip == 0)
2770        goto finally;
2771
2772    /* We should be close to the desired position.  Now feed the decoder one
2773     * byte at a time until we reach the `chars_to_skip` target.
2774     * As we go, note the nearest "safe start point" before the current
2775     * location (a point where the decoder has nothing buffered, so seek()
2776     * can safely start from there and advance to this location).
2777     */
2778    chars_decoded = 0;
2779    input = PyBytes_AS_STRING(next_input);
2780    input_end = input + PyBytes_GET_SIZE(next_input);
2781    input += skip_bytes;
2782    while (input < input_end) {
2783        Py_ssize_t n;
2784
2785        DECODER_DECODE(input, (Py_ssize_t)1, n);
2786        /* We got n chars for 1 byte */
2787        chars_decoded += n;
2788        cookie.bytes_to_feed += 1;
2789        DECODER_GETSTATE();
2790
2791        if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2792            /* Decoder buffer is empty, so this is a safe start point. */
2793            cookie.start_pos += cookie.bytes_to_feed;
2794            chars_to_skip -= chars_decoded;
2795            cookie.dec_flags = dec_flags;
2796            cookie.bytes_to_feed = 0;
2797            chars_decoded = 0;
2798        }
2799        if (chars_decoded >= chars_to_skip)
2800            break;
2801        input++;
2802    }
2803    if (input == input_end) {
2804        /* We didn't get enough decoded data; signal EOF to get more. */
2805        PyObject *decoded = _PyObject_CallMethod(
2806            self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True);
2807        if (check_decoded(decoded) < 0)
2808            goto fail;
2809        chars_decoded += PyUnicode_GET_LENGTH(decoded);
2810        Py_DECREF(decoded);
2811        cookie.need_eof = 1;
2812
2813        if (chars_decoded < chars_to_skip) {
2814            PyErr_SetString(PyExc_OSError,
2815                            "can't reconstruct logical file position");
2816            goto fail;
2817        }
2818    }
2819
2820finally:
2821    res = PyObject_CallMethodOneArg(
2822            self->decoder, &_Py_ID(setstate), saved_state);
2823    Py_DECREF(saved_state);
2824    if (res == NULL)
2825        return NULL;
2826    Py_DECREF(res);
2827
2828    /* The returned cookie corresponds to the last safe start point. */
2829    cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2830    return textiowrapper_build_cookie(&cookie);
2831
2832fail:
2833    if (saved_state) {
2834        PyObject *type, *value, *traceback;
2835        PyErr_Fetch(&type, &value, &traceback);
2836        res = PyObject_CallMethodOneArg(
2837                self->decoder, &_Py_ID(setstate), saved_state);
2838        _PyErr_ChainExceptions(type, value, traceback);
2839        Py_DECREF(saved_state);
2840        Py_XDECREF(res);
2841    }
2842    return NULL;
2843}
2844
2845/*[clinic input]
2846_io.TextIOWrapper.truncate
2847    pos: object = None
2848    /
2849[clinic start generated code]*/
2850
2851static PyObject *
2852_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2853/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2854{
2855    PyObject *res;
2856
2857    CHECK_ATTACHED(self)
2858
2859    res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2860    if (res == NULL)
2861        return NULL;
2862    Py_DECREF(res);
2863
2864    return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
2865}
2866
2867static PyObject *
2868textiowrapper_repr(textio *self)
2869{
2870    PyObject *nameobj, *modeobj, *res, *s;
2871    int status;
2872
2873    CHECK_INITIALIZED(self);
2874
2875    res = PyUnicode_FromString("<_io.TextIOWrapper");
2876    if (res == NULL)
2877        return NULL;
2878
2879    status = Py_ReprEnter((PyObject *)self);
2880    if (status != 0) {
2881        if (status > 0) {
2882            PyErr_Format(PyExc_RuntimeError,
2883                         "reentrant call inside %s.__repr__",
2884                         Py_TYPE(self)->tp_name);
2885        }
2886        goto error;
2887    }
2888    if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(name), &nameobj) < 0) {
2889        if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
2890            goto error;
2891        }
2892        /* Ignore ValueError raised if the underlying stream was detached */
2893        PyErr_Clear();
2894    }
2895    if (nameobj != NULL) {
2896        s = PyUnicode_FromFormat(" name=%R", nameobj);
2897        Py_DECREF(nameobj);
2898        if (s == NULL)
2899            goto error;
2900        PyUnicode_AppendAndDel(&res, s);
2901        if (res == NULL)
2902            goto error;
2903    }
2904    if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(mode), &modeobj) < 0) {
2905        goto error;
2906    }
2907    if (modeobj != NULL) {
2908        s = PyUnicode_FromFormat(" mode=%R", modeobj);
2909        Py_DECREF(modeobj);
2910        if (s == NULL)
2911            goto error;
2912        PyUnicode_AppendAndDel(&res, s);
2913        if (res == NULL)
2914            goto error;
2915    }
2916    s = PyUnicode_FromFormat("%U encoding=%R>",
2917                             res, self->encoding);
2918    Py_DECREF(res);
2919    if (status == 0) {
2920        Py_ReprLeave((PyObject *)self);
2921    }
2922    return s;
2923
2924  error:
2925    Py_XDECREF(res);
2926    if (status == 0) {
2927        Py_ReprLeave((PyObject *)self);
2928    }
2929    return NULL;
2930}
2931
2932
2933/* Inquiries */
2934
2935/*[clinic input]
2936_io.TextIOWrapper.fileno
2937[clinic start generated code]*/
2938
2939static PyObject *
2940_io_TextIOWrapper_fileno_impl(textio *self)
2941/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2942{
2943    CHECK_ATTACHED(self);
2944    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
2945}
2946
2947/*[clinic input]
2948_io.TextIOWrapper.seekable
2949[clinic start generated code]*/
2950
2951static PyObject *
2952_io_TextIOWrapper_seekable_impl(textio *self)
2953/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2954{
2955    CHECK_ATTACHED(self);
2956    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
2957}
2958
2959/*[clinic input]
2960_io.TextIOWrapper.readable
2961[clinic start generated code]*/
2962
2963static PyObject *
2964_io_TextIOWrapper_readable_impl(textio *self)
2965/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2966{
2967    CHECK_ATTACHED(self);
2968    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
2969}
2970
2971/*[clinic input]
2972_io.TextIOWrapper.writable
2973[clinic start generated code]*/
2974
2975static PyObject *
2976_io_TextIOWrapper_writable_impl(textio *self)
2977/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
2978{
2979    CHECK_ATTACHED(self);
2980    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
2981}
2982
2983/*[clinic input]
2984_io.TextIOWrapper.isatty
2985[clinic start generated code]*/
2986
2987static PyObject *
2988_io_TextIOWrapper_isatty_impl(textio *self)
2989/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
2990{
2991    CHECK_ATTACHED(self);
2992    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
2993}
2994
2995/*[clinic input]
2996_io.TextIOWrapper.flush
2997[clinic start generated code]*/
2998
2999static PyObject *
3000_io_TextIOWrapper_flush_impl(textio *self)
3001/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
3002{
3003    CHECK_ATTACHED(self);
3004    CHECK_CLOSED(self);
3005    self->telling = self->seekable;
3006    if (_textiowrapper_writeflush(self) < 0)
3007        return NULL;
3008    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
3009}
3010
3011/*[clinic input]
3012_io.TextIOWrapper.close
3013[clinic start generated code]*/
3014
3015static PyObject *
3016_io_TextIOWrapper_close_impl(textio *self)
3017/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
3018{
3019    PyObject *res;
3020    int r;
3021    CHECK_ATTACHED(self);
3022
3023    res = textiowrapper_closed_get(self, NULL);
3024    if (res == NULL)
3025        return NULL;
3026    r = PyObject_IsTrue(res);
3027    Py_DECREF(res);
3028    if (r < 0)
3029        return NULL;
3030
3031    if (r > 0) {
3032        Py_RETURN_NONE; /* stream already closed */
3033    }
3034    else {
3035        PyObject *exc = NULL, *val, *tb;
3036        if (self->finalizing) {
3037            res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
3038                                            (PyObject *)self);
3039            if (res)
3040                Py_DECREF(res);
3041            else
3042                PyErr_Clear();
3043        }
3044        res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
3045        if (res == NULL)
3046            PyErr_Fetch(&exc, &val, &tb);
3047        else
3048            Py_DECREF(res);
3049
3050        res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
3051        if (exc != NULL) {
3052            _PyErr_ChainExceptions(exc, val, tb);
3053            Py_CLEAR(res);
3054        }
3055        return res;
3056    }
3057}
3058
3059static PyObject *
3060textiowrapper_iternext(textio *self)
3061{
3062    PyObject *line;
3063
3064    CHECK_ATTACHED(self);
3065
3066    self->telling = 0;
3067    if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) {
3068        /* Skip method call overhead for speed */
3069        line = _textiowrapper_readline(self, -1);
3070    }
3071    else {
3072        line = PyObject_CallMethodNoArgs((PyObject *)self,
3073                                          &_Py_ID(readline));
3074        if (line && !PyUnicode_Check(line)) {
3075            PyErr_Format(PyExc_OSError,
3076                         "readline() should have returned a str object, "
3077                         "not '%.200s'", Py_TYPE(line)->tp_name);
3078            Py_DECREF(line);
3079            return NULL;
3080        }
3081    }
3082
3083    if (line == NULL || PyUnicode_READY(line) == -1)
3084        return NULL;
3085
3086    if (PyUnicode_GET_LENGTH(line) == 0) {
3087        /* Reached EOF or would have blocked */
3088        Py_DECREF(line);
3089        Py_CLEAR(self->snapshot);
3090        self->telling = self->seekable;
3091        return NULL;
3092    }
3093
3094    return line;
3095}
3096
3097static PyObject *
3098textiowrapper_name_get(textio *self, void *context)
3099{
3100    CHECK_ATTACHED(self);
3101    return PyObject_GetAttr(self->buffer, &_Py_ID(name));
3102}
3103
3104static PyObject *
3105textiowrapper_closed_get(textio *self, void *context)
3106{
3107    CHECK_ATTACHED(self);
3108    return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
3109}
3110
3111static PyObject *
3112textiowrapper_newlines_get(textio *self, void *context)
3113{
3114    PyObject *res;
3115    CHECK_ATTACHED(self);
3116    if (self->decoder == NULL ||
3117        _PyObject_LookupAttr(self->decoder, &_Py_ID(newlines), &res) == 0)
3118    {
3119        Py_RETURN_NONE;
3120    }
3121    return res;
3122}
3123
3124static PyObject *
3125textiowrapper_errors_get(textio *self, void *context)
3126{
3127    CHECK_INITIALIZED(self);
3128    Py_INCREF(self->errors);
3129    return self->errors;
3130}
3131
3132static PyObject *
3133textiowrapper_chunk_size_get(textio *self, void *context)
3134{
3135    CHECK_ATTACHED(self);
3136    return PyLong_FromSsize_t(self->chunk_size);
3137}
3138
3139static int
3140textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
3141{
3142    Py_ssize_t n;
3143    CHECK_ATTACHED_INT(self);
3144    if (arg == NULL) {
3145        PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3146        return -1;
3147    }
3148    n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
3149    if (n == -1 && PyErr_Occurred())
3150        return -1;
3151    if (n <= 0) {
3152        PyErr_SetString(PyExc_ValueError,
3153                        "a strictly positive integer is required");
3154        return -1;
3155    }
3156    self->chunk_size = n;
3157    return 0;
3158}
3159
3160#include "clinic/textio.c.h"
3161
3162static PyMethodDef incrementalnewlinedecoder_methods[] = {
3163    _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3164    _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3165    _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3166    _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3167    {NULL}
3168};
3169
3170static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3171    {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3172    {NULL}
3173};
3174
3175PyTypeObject PyIncrementalNewlineDecoder_Type = {
3176    PyVarObject_HEAD_INIT(NULL, 0)
3177    "_io.IncrementalNewlineDecoder", /*tp_name*/
3178    sizeof(nldecoder_object), /*tp_basicsize*/
3179    0,                          /*tp_itemsize*/
3180    (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3181    0,                          /*tp_vectorcall_offset*/
3182    0,                          /*tp_getattr*/
3183    0,                          /*tp_setattr*/
3184    0,                          /*tp_as_async*/
3185    0,                          /*tp_repr*/
3186    0,                          /*tp_as_number*/
3187    0,                          /*tp_as_sequence*/
3188    0,                          /*tp_as_mapping*/
3189    0,                          /*tp_hash */
3190    0,                          /*tp_call*/
3191    0,                          /*tp_str*/
3192    0,                          /*tp_getattro*/
3193    0,                          /*tp_setattro*/
3194    0,                          /*tp_as_buffer*/
3195    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
3196    _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3197    0,                          /* tp_traverse */
3198    0,                          /* tp_clear */
3199    0,                          /* tp_richcompare */
3200    0,                          /*tp_weaklistoffset*/
3201    0,                          /* tp_iter */
3202    0,                          /* tp_iternext */
3203    incrementalnewlinedecoder_methods, /* tp_methods */
3204    0,                          /* tp_members */
3205    incrementalnewlinedecoder_getset, /* tp_getset */
3206    0,                          /* tp_base */
3207    0,                          /* tp_dict */
3208    0,                          /* tp_descr_get */
3209    0,                          /* tp_descr_set */
3210    0,                          /* tp_dictoffset */
3211    _io_IncrementalNewlineDecoder___init__, /* tp_init */
3212    0,                          /* tp_alloc */
3213    PyType_GenericNew,          /* tp_new */
3214};
3215
3216
3217static PyMethodDef textiowrapper_methods[] = {
3218    _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3219    _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3220    _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3221    _IO_TEXTIOWRAPPER_READ_METHODDEF
3222    _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3223    _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3224    _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3225
3226    _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3227    _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3228    _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3229    _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3230    _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3231
3232    _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3233    _IO_TEXTIOWRAPPER_TELL_METHODDEF
3234    _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3235    {NULL, NULL}
3236};
3237
3238static PyMemberDef textiowrapper_members[] = {
3239    {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3240    {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3241    {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
3242    {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
3243    {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
3244    {NULL}
3245};
3246
3247static PyGetSetDef textiowrapper_getset[] = {
3248    {"name", (getter)textiowrapper_name_get, NULL, NULL},
3249    {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
3250/*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3251*/
3252    {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3253    {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3254    {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3255                    (setter)textiowrapper_chunk_size_set, NULL},
3256    {NULL}
3257};
3258
3259PyTypeObject PyTextIOWrapper_Type = {
3260    PyVarObject_HEAD_INIT(NULL, 0)
3261    "_io.TextIOWrapper",        /*tp_name*/
3262    sizeof(textio), /*tp_basicsize*/
3263    0,                          /*tp_itemsize*/
3264    (destructor)textiowrapper_dealloc, /*tp_dealloc*/
3265    0,                          /*tp_vectorcall_offset*/
3266    0,                          /*tp_getattr*/
3267    0,                          /*tps_etattr*/
3268    0,                          /*tp_as_async*/
3269    (reprfunc)textiowrapper_repr,/*tp_repr*/
3270    0,                          /*tp_as_number*/
3271    0,                          /*tp_as_sequence*/
3272    0,                          /*tp_as_mapping*/
3273    0,                          /*tp_hash */
3274    0,                          /*tp_call*/
3275    0,                          /*tp_str*/
3276    0,                          /*tp_getattro*/
3277    0,                          /*tp_setattro*/
3278    0,                          /*tp_as_buffer*/
3279    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
3280        | Py_TPFLAGS_HAVE_GC,   /*tp_flags*/
3281    _io_TextIOWrapper___init____doc__, /* tp_doc */
3282    (traverseproc)textiowrapper_traverse, /* tp_traverse */
3283    (inquiry)textiowrapper_clear, /* tp_clear */
3284    0,                          /* tp_richcompare */
3285    offsetof(textio, weakreflist), /*tp_weaklistoffset*/
3286    0,                          /* tp_iter */
3287    (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3288    textiowrapper_methods,      /* tp_methods */
3289    textiowrapper_members,      /* tp_members */
3290    textiowrapper_getset,       /* tp_getset */
3291    0,                          /* tp_base */
3292    0,                          /* tp_dict */
3293    0,                          /* tp_descr_get */
3294    0,                          /* tp_descr_set */
3295    offsetof(textio, dict), /*tp_dictoffset*/
3296    _io_TextIOWrapper___init__, /* tp_init */
3297    0,                          /* tp_alloc */
3298    PyType_GenericNew,          /* tp_new */
3299    0,                          /* tp_free */
3300    0,                          /* tp_is_gc */
3301    0,                          /* tp_bases */
3302    0,                          /* tp_mro */
3303    0,                          /* tp_cache */
3304    0,                          /* tp_subclasses */
3305    0,                          /* tp_weaklist */
3306    0,                          /* tp_del */
3307    0,                          /* tp_version_tag */
3308    0,                          /* tp_finalize */
3309};
3310