xref: /third_party/python/Modules/_io/bytesio.c (revision 7db96d56)
1#include "Python.h"
2#include "pycore_object.h"
3#include <stddef.h>               // offsetof()
4#include "_iomodule.h"
5
6/*[clinic input]
7module _io
8class _io.BytesIO "bytesio *" "&PyBytesIO_Type"
9[clinic start generated code]*/
10/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7f50ec034f5c0b26]*/
11
12typedef struct {
13    PyObject_HEAD
14    PyObject *buf;
15    Py_ssize_t pos;
16    Py_ssize_t string_size;
17    PyObject *dict;
18    PyObject *weakreflist;
19    Py_ssize_t exports;
20} bytesio;
21
22typedef struct {
23    PyObject_HEAD
24    bytesio *source;
25} bytesiobuf;
26
27/* The bytesio object can be in three states:
28  * Py_REFCNT(buf) == 1, exports == 0.
29  * Py_REFCNT(buf) > 1.  exports == 0,
30    first modification or export causes the internal buffer copying.
31  * exports > 0.  Py_REFCNT(buf) == 1, any modifications are forbidden.
32*/
33
34static int
35check_closed(bytesio *self)
36{
37    if (self->buf == NULL) {
38        PyErr_SetString(PyExc_ValueError, "I/O operation on closed file.");
39        return 1;
40    }
41    return 0;
42}
43
44static int
45check_exports(bytesio *self)
46{
47    if (self->exports > 0) {
48        PyErr_SetString(PyExc_BufferError,
49                        "Existing exports of data: object cannot be re-sized");
50        return 1;
51    }
52    return 0;
53}
54
55#define CHECK_CLOSED(self)                                  \
56    if (check_closed(self)) {                               \
57        return NULL;                                        \
58    }
59
60#define CHECK_EXPORTS(self) \
61    if (check_exports(self)) { \
62        return NULL; \
63    }
64
65#define SHARED_BUF(self) (Py_REFCNT((self)->buf) > 1)
66
67
68/* Internal routine to get a line from the buffer of a BytesIO
69   object. Returns the length between the current position to the
70   next newline character. */
71static Py_ssize_t
72scan_eol(bytesio *self, Py_ssize_t len)
73{
74    const char *start, *n;
75    Py_ssize_t maxlen;
76
77    assert(self->buf != NULL);
78    assert(self->pos >= 0);
79
80    if (self->pos >= self->string_size)
81        return 0;
82
83    /* Move to the end of the line, up to the end of the string, s. */
84    maxlen = self->string_size - self->pos;
85    if (len < 0 || len > maxlen)
86        len = maxlen;
87
88    if (len) {
89        start = PyBytes_AS_STRING(self->buf) + self->pos;
90        n = memchr(start, '\n', len);
91        if (n)
92            /* Get the length from the current position to the end of
93               the line. */
94            len = n - start + 1;
95    }
96    assert(len >= 0);
97    assert(self->pos < PY_SSIZE_T_MAX - len);
98
99    return len;
100}
101
102/* Internal routine for detaching the shared buffer of BytesIO objects.
103   The caller should ensure that the 'size' argument is non-negative and
104   not lesser than self->string_size.  Returns 0 on success, -1 otherwise. */
105static int
106unshare_buffer(bytesio *self, size_t size)
107{
108    PyObject *new_buf;
109    assert(SHARED_BUF(self));
110    assert(self->exports == 0);
111    assert(size >= (size_t)self->string_size);
112    new_buf = PyBytes_FromStringAndSize(NULL, size);
113    if (new_buf == NULL)
114        return -1;
115    memcpy(PyBytes_AS_STRING(new_buf), PyBytes_AS_STRING(self->buf),
116           self->string_size);
117    Py_SETREF(self->buf, new_buf);
118    return 0;
119}
120
121/* Internal routine for changing the size of the buffer of BytesIO objects.
122   The caller should ensure that the 'size' argument is non-negative.  Returns
123   0 on success, -1 otherwise. */
124static int
125resize_buffer(bytesio *self, size_t size)
126{
127    /* Here, unsigned types are used to avoid dealing with signed integer
128       overflow, which is undefined in C. */
129    size_t alloc = PyBytes_GET_SIZE(self->buf);
130
131    assert(self->buf != NULL);
132
133    /* For simplicity, stay in the range of the signed type. Anyway, Python
134       doesn't allow strings to be longer than this. */
135    if (size > PY_SSIZE_T_MAX)
136        goto overflow;
137
138    if (size < alloc / 2) {
139        /* Major downsize; resize down to exact size. */
140        alloc = size + 1;
141    }
142    else if (size < alloc) {
143        /* Within allocated size; quick exit */
144        return 0;
145    }
146    else if (size <= alloc * 1.125) {
147        /* Moderate upsize; overallocate similar to list_resize() */
148        alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
149    }
150    else {
151        /* Major upsize; resize up to exact size */
152        alloc = size + 1;
153    }
154
155    if (alloc > ((size_t)-1) / sizeof(char))
156        goto overflow;
157
158    if (SHARED_BUF(self)) {
159        if (unshare_buffer(self, alloc) < 0)
160            return -1;
161    }
162    else {
163        if (_PyBytes_Resize(&self->buf, alloc) < 0)
164            return -1;
165    }
166
167    return 0;
168
169  overflow:
170    PyErr_SetString(PyExc_OverflowError,
171                    "new buffer size too large");
172    return -1;
173}
174
175/* Internal routine for writing a string of bytes to the buffer of a BytesIO
176   object. Returns the number of bytes written, or -1 on error.
177   Inlining is disabled because it's significantly decreases performance
178   of writelines() in PGO build. */
179Py_NO_INLINE static Py_ssize_t
180write_bytes(bytesio *self, PyObject *b)
181{
182    if (check_closed(self)) {
183        return -1;
184    }
185    if (check_exports(self)) {
186        return -1;
187    }
188
189    Py_buffer buf;
190    if (PyObject_GetBuffer(b, &buf, PyBUF_CONTIG_RO) < 0) {
191        return -1;
192    }
193    Py_ssize_t len = buf.len;
194    if (len == 0) {
195        goto done;
196    }
197
198    assert(self->pos >= 0);
199    size_t endpos = (size_t)self->pos + len;
200    if (endpos > (size_t)PyBytes_GET_SIZE(self->buf)) {
201        if (resize_buffer(self, endpos) < 0) {
202            len = -1;
203            goto done;
204        }
205    }
206    else if (SHARED_BUF(self)) {
207        if (unshare_buffer(self, Py_MAX(endpos, (size_t)self->string_size)) < 0) {
208            len = -1;
209            goto done;
210        }
211    }
212
213    if (self->pos > self->string_size) {
214        /* In case of overseek, pad with null bytes the buffer region between
215           the end of stream and the current position.
216
217          0   lo      string_size                           hi
218          |   |<---used--->|<----------available----------->|
219          |   |            <--to pad-->|<---to write--->    |
220          0   buf                   position
221        */
222        memset(PyBytes_AS_STRING(self->buf) + self->string_size, '\0',
223               (self->pos - self->string_size) * sizeof(char));
224    }
225
226    /* Copy the data to the internal buffer, overwriting some of the existing
227       data if self->pos < self->string_size. */
228    memcpy(PyBytes_AS_STRING(self->buf) + self->pos, buf.buf, len);
229    self->pos = endpos;
230
231    /* Set the new length of the internal string if it has changed. */
232    if ((size_t)self->string_size < endpos) {
233        self->string_size = endpos;
234    }
235
236  done:
237    PyBuffer_Release(&buf);
238    return len;
239}
240
241static PyObject *
242bytesio_get_closed(bytesio *self, void *Py_UNUSED(ignored))
243{
244    if (self->buf == NULL) {
245        Py_RETURN_TRUE;
246    }
247    else {
248        Py_RETURN_FALSE;
249    }
250}
251
252/*[clinic input]
253_io.BytesIO.readable
254
255Returns True if the IO object can be read.
256[clinic start generated code]*/
257
258static PyObject *
259_io_BytesIO_readable_impl(bytesio *self)
260/*[clinic end generated code: output=4e93822ad5b62263 input=96c5d0cccfb29f5c]*/
261{
262    CHECK_CLOSED(self);
263    Py_RETURN_TRUE;
264}
265
266/*[clinic input]
267_io.BytesIO.writable
268
269Returns True if the IO object can be written.
270[clinic start generated code]*/
271
272static PyObject *
273_io_BytesIO_writable_impl(bytesio *self)
274/*[clinic end generated code: output=64ff6a254b1150b8 input=700eed808277560a]*/
275{
276    CHECK_CLOSED(self);
277    Py_RETURN_TRUE;
278}
279
280/*[clinic input]
281_io.BytesIO.seekable
282
283Returns True if the IO object can be seeked.
284[clinic start generated code]*/
285
286static PyObject *
287_io_BytesIO_seekable_impl(bytesio *self)
288/*[clinic end generated code: output=6b417f46dcc09b56 input=9421f65627a344dd]*/
289{
290    CHECK_CLOSED(self);
291    Py_RETURN_TRUE;
292}
293
294/*[clinic input]
295_io.BytesIO.flush
296
297Does nothing.
298[clinic start generated code]*/
299
300static PyObject *
301_io_BytesIO_flush_impl(bytesio *self)
302/*[clinic end generated code: output=187e3d781ca134a0 input=561ea490be4581a7]*/
303{
304    CHECK_CLOSED(self);
305    Py_RETURN_NONE;
306}
307
308/*[clinic input]
309_io.BytesIO.getbuffer
310
311Get a read-write view over the contents of the BytesIO object.
312[clinic start generated code]*/
313
314static PyObject *
315_io_BytesIO_getbuffer_impl(bytesio *self)
316/*[clinic end generated code: output=72cd7c6e13aa09ed input=8f738ef615865176]*/
317{
318    PyTypeObject *type = &_PyBytesIOBuffer_Type;
319    bytesiobuf *buf;
320    PyObject *view;
321
322    CHECK_CLOSED(self);
323
324    buf = (bytesiobuf *) type->tp_alloc(type, 0);
325    if (buf == NULL)
326        return NULL;
327    Py_INCREF(self);
328    buf->source = self;
329    view = PyMemoryView_FromObject((PyObject *) buf);
330    Py_DECREF(buf);
331    return view;
332}
333
334/*[clinic input]
335_io.BytesIO.getvalue
336
337Retrieve the entire contents of the BytesIO object.
338[clinic start generated code]*/
339
340static PyObject *
341_io_BytesIO_getvalue_impl(bytesio *self)
342/*[clinic end generated code: output=b3f6a3233c8fd628 input=4b403ac0af3973ed]*/
343{
344    CHECK_CLOSED(self);
345    if (self->string_size <= 1 || self->exports > 0)
346        return PyBytes_FromStringAndSize(PyBytes_AS_STRING(self->buf),
347                                         self->string_size);
348
349    if (self->string_size != PyBytes_GET_SIZE(self->buf)) {
350        if (SHARED_BUF(self)) {
351            if (unshare_buffer(self, self->string_size) < 0)
352                return NULL;
353        }
354        else {
355            if (_PyBytes_Resize(&self->buf, self->string_size) < 0)
356                return NULL;
357        }
358    }
359    Py_INCREF(self->buf);
360    return self->buf;
361}
362
363/*[clinic input]
364_io.BytesIO.isatty
365
366Always returns False.
367
368BytesIO objects are not connected to a TTY-like device.
369[clinic start generated code]*/
370
371static PyObject *
372_io_BytesIO_isatty_impl(bytesio *self)
373/*[clinic end generated code: output=df67712e669f6c8f input=6f97f0985d13f827]*/
374{
375    CHECK_CLOSED(self);
376    Py_RETURN_FALSE;
377}
378
379/*[clinic input]
380_io.BytesIO.tell
381
382Current file position, an integer.
383[clinic start generated code]*/
384
385static PyObject *
386_io_BytesIO_tell_impl(bytesio *self)
387/*[clinic end generated code: output=b54b0f93cd0e5e1d input=b106adf099cb3657]*/
388{
389    CHECK_CLOSED(self);
390    return PyLong_FromSsize_t(self->pos);
391}
392
393static PyObject *
394read_bytes(bytesio *self, Py_ssize_t size)
395{
396    const char *output;
397
398    assert(self->buf != NULL);
399    assert(size <= self->string_size);
400    if (size > 1 &&
401        self->pos == 0 && size == PyBytes_GET_SIZE(self->buf) &&
402        self->exports == 0) {
403        self->pos += size;
404        Py_INCREF(self->buf);
405        return self->buf;
406    }
407
408    output = PyBytes_AS_STRING(self->buf) + self->pos;
409    self->pos += size;
410    return PyBytes_FromStringAndSize(output, size);
411}
412
413/*[clinic input]
414_io.BytesIO.read
415    size: Py_ssize_t(accept={int, NoneType}) = -1
416    /
417
418Read at most size bytes, returned as a bytes object.
419
420If the size argument is negative, read until EOF is reached.
421Return an empty bytes object at EOF.
422[clinic start generated code]*/
423
424static PyObject *
425_io_BytesIO_read_impl(bytesio *self, Py_ssize_t size)
426/*[clinic end generated code: output=9cc025f21c75bdd2 input=74344a39f431c3d7]*/
427{
428    Py_ssize_t n;
429
430    CHECK_CLOSED(self);
431
432    /* adjust invalid sizes */
433    n = self->string_size - self->pos;
434    if (size < 0 || size > n) {
435        size = n;
436        if (size < 0)
437            size = 0;
438    }
439
440    return read_bytes(self, size);
441}
442
443
444/*[clinic input]
445_io.BytesIO.read1
446    size: Py_ssize_t(accept={int, NoneType}) = -1
447    /
448
449Read at most size bytes, returned as a bytes object.
450
451If the size argument is negative or omitted, read until EOF is reached.
452Return an empty bytes object at EOF.
453[clinic start generated code]*/
454
455static PyObject *
456_io_BytesIO_read1_impl(bytesio *self, Py_ssize_t size)
457/*[clinic end generated code: output=d0f843285aa95f1c input=440a395bf9129ef5]*/
458{
459    return _io_BytesIO_read_impl(self, size);
460}
461
462/*[clinic input]
463_io.BytesIO.readline
464    size: Py_ssize_t(accept={int, NoneType}) = -1
465    /
466
467Next line from the file, as a bytes object.
468
469Retain newline.  A non-negative size argument limits the maximum
470number of bytes to return (an incomplete line may be returned then).
471Return an empty bytes object at EOF.
472[clinic start generated code]*/
473
474static PyObject *
475_io_BytesIO_readline_impl(bytesio *self, Py_ssize_t size)
476/*[clinic end generated code: output=4bff3c251df8ffcd input=e7c3fbd1744e2783]*/
477{
478    Py_ssize_t n;
479
480    CHECK_CLOSED(self);
481
482    n = scan_eol(self, size);
483
484    return read_bytes(self, n);
485}
486
487/*[clinic input]
488_io.BytesIO.readlines
489    size as arg: object = None
490    /
491
492List of bytes objects, each a line from the file.
493
494Call readline() repeatedly and return a list of the lines so read.
495The optional size argument, if given, is an approximate bound on the
496total number of bytes in the lines returned.
497[clinic start generated code]*/
498
499static PyObject *
500_io_BytesIO_readlines_impl(bytesio *self, PyObject *arg)
501/*[clinic end generated code: output=09b8e34c880808ff input=691aa1314f2c2a87]*/
502{
503    Py_ssize_t maxsize, size, n;
504    PyObject *result, *line;
505    const char *output;
506
507    CHECK_CLOSED(self);
508
509    if (PyLong_Check(arg)) {
510        maxsize = PyLong_AsSsize_t(arg);
511        if (maxsize == -1 && PyErr_Occurred())
512            return NULL;
513    }
514    else if (arg == Py_None) {
515        /* No size limit, by default. */
516        maxsize = -1;
517    }
518    else {
519        PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
520                     Py_TYPE(arg)->tp_name);
521        return NULL;
522    }
523
524    size = 0;
525    result = PyList_New(0);
526    if (!result)
527        return NULL;
528
529    output = PyBytes_AS_STRING(self->buf) + self->pos;
530    while ((n = scan_eol(self, -1)) != 0) {
531        self->pos += n;
532        line = PyBytes_FromStringAndSize(output, n);
533        if (!line)
534            goto on_error;
535        if (PyList_Append(result, line) == -1) {
536            Py_DECREF(line);
537            goto on_error;
538        }
539        Py_DECREF(line);
540        size += n;
541        if (maxsize > 0 && size >= maxsize)
542            break;
543        output += n;
544    }
545    return result;
546
547  on_error:
548    Py_DECREF(result);
549    return NULL;
550}
551
552/*[clinic input]
553_io.BytesIO.readinto
554    buffer: Py_buffer(accept={rwbuffer})
555    /
556
557Read bytes into buffer.
558
559Returns number of bytes read (0 for EOF), or None if the object
560is set not to block and has no data to read.
561[clinic start generated code]*/
562
563static PyObject *
564_io_BytesIO_readinto_impl(bytesio *self, Py_buffer *buffer)
565/*[clinic end generated code: output=a5d407217dcf0639 input=1424d0fdce857919]*/
566{
567    Py_ssize_t len, n;
568
569    CHECK_CLOSED(self);
570
571    /* adjust invalid sizes */
572    len = buffer->len;
573    n = self->string_size - self->pos;
574    if (len > n) {
575        len = n;
576        if (len < 0)
577            len = 0;
578    }
579
580    memcpy(buffer->buf, PyBytes_AS_STRING(self->buf) + self->pos, len);
581    assert(self->pos + len < PY_SSIZE_T_MAX);
582    assert(len >= 0);
583    self->pos += len;
584
585    return PyLong_FromSsize_t(len);
586}
587
588/*[clinic input]
589_io.BytesIO.truncate
590    size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None
591    /
592
593Truncate the file to at most size bytes.
594
595Size defaults to the current file position, as returned by tell().
596The current file position is unchanged.  Returns the new size.
597[clinic start generated code]*/
598
599static PyObject *
600_io_BytesIO_truncate_impl(bytesio *self, Py_ssize_t size)
601/*[clinic end generated code: output=9ad17650c15fa09b input=423759dd42d2f7c1]*/
602{
603    CHECK_CLOSED(self);
604    CHECK_EXPORTS(self);
605
606    if (size < 0) {
607        PyErr_Format(PyExc_ValueError,
608                     "negative size value %zd", size);
609        return NULL;
610    }
611
612    if (size < self->string_size) {
613        self->string_size = size;
614        if (resize_buffer(self, size) < 0)
615            return NULL;
616    }
617
618    return PyLong_FromSsize_t(size);
619}
620
621static PyObject *
622bytesio_iternext(bytesio *self)
623{
624    Py_ssize_t n;
625
626    CHECK_CLOSED(self);
627
628    n = scan_eol(self, -1);
629
630    if (n == 0)
631        return NULL;
632
633    return read_bytes(self, n);
634}
635
636/*[clinic input]
637_io.BytesIO.seek
638    pos: Py_ssize_t
639    whence: int = 0
640    /
641
642Change stream position.
643
644Seek to byte offset pos relative to position indicated by whence:
645     0  Start of stream (the default).  pos should be >= 0;
646     1  Current position - pos may be negative;
647     2  End of stream - pos usually negative.
648Returns the new absolute position.
649[clinic start generated code]*/
650
651static PyObject *
652_io_BytesIO_seek_impl(bytesio *self, Py_ssize_t pos, int whence)
653/*[clinic end generated code: output=c26204a68e9190e4 input=1e875e6ebc652948]*/
654{
655    CHECK_CLOSED(self);
656
657    if (pos < 0 && whence == 0) {
658        PyErr_Format(PyExc_ValueError,
659                     "negative seek value %zd", pos);
660        return NULL;
661    }
662
663    /* whence = 0: offset relative to beginning of the string.
664       whence = 1: offset relative to current position.
665       whence = 2: offset relative the end of the string. */
666    if (whence == 1) {
667        if (pos > PY_SSIZE_T_MAX - self->pos) {
668            PyErr_SetString(PyExc_OverflowError,
669                            "new position too large");
670            return NULL;
671        }
672        pos += self->pos;
673    }
674    else if (whence == 2) {
675        if (pos > PY_SSIZE_T_MAX - self->string_size) {
676            PyErr_SetString(PyExc_OverflowError,
677                            "new position too large");
678            return NULL;
679        }
680        pos += self->string_size;
681    }
682    else if (whence != 0) {
683        PyErr_Format(PyExc_ValueError,
684                     "invalid whence (%i, should be 0, 1 or 2)", whence);
685        return NULL;
686    }
687
688    if (pos < 0)
689        pos = 0;
690    self->pos = pos;
691
692    return PyLong_FromSsize_t(self->pos);
693}
694
695/*[clinic input]
696_io.BytesIO.write
697    b: object
698    /
699
700Write bytes to file.
701
702Return the number of bytes written.
703[clinic start generated code]*/
704
705static PyObject *
706_io_BytesIO_write(bytesio *self, PyObject *b)
707/*[clinic end generated code: output=53316d99800a0b95 input=f5ec7c8c64ed720a]*/
708{
709    Py_ssize_t n = write_bytes(self, b);
710    return n >= 0 ? PyLong_FromSsize_t(n) : NULL;
711}
712
713/*[clinic input]
714_io.BytesIO.writelines
715    lines: object
716    /
717
718Write lines to the file.
719
720Note that newlines are not added.  lines can be any iterable object
721producing bytes-like objects. This is equivalent to calling write() for
722each element.
723[clinic start generated code]*/
724
725static PyObject *
726_io_BytesIO_writelines(bytesio *self, PyObject *lines)
727/*[clinic end generated code: output=7f33aa3271c91752 input=e972539176fc8fc1]*/
728{
729    PyObject *it, *item;
730
731    CHECK_CLOSED(self);
732
733    it = PyObject_GetIter(lines);
734    if (it == NULL)
735        return NULL;
736
737    while ((item = PyIter_Next(it)) != NULL) {
738        Py_ssize_t ret = write_bytes(self, item);
739        Py_DECREF(item);
740        if (ret < 0) {
741            Py_DECREF(it);
742            return NULL;
743        }
744    }
745    Py_DECREF(it);
746
747    /* See if PyIter_Next failed */
748    if (PyErr_Occurred())
749        return NULL;
750
751    Py_RETURN_NONE;
752}
753
754/*[clinic input]
755_io.BytesIO.close
756
757Disable all I/O operations.
758[clinic start generated code]*/
759
760static PyObject *
761_io_BytesIO_close_impl(bytesio *self)
762/*[clinic end generated code: output=1471bb9411af84a0 input=37e1f55556e61f60]*/
763{
764    CHECK_EXPORTS(self);
765    Py_CLEAR(self->buf);
766    Py_RETURN_NONE;
767}
768
769/* Pickling support.
770
771   Note that only pickle protocol 2 and onward are supported since we use
772   extended __reduce__ API of PEP 307 to make BytesIO instances picklable.
773
774   Providing support for protocol < 2 would require the __reduce_ex__ method
775   which is notably long-winded when defined properly.
776
777   For BytesIO, the implementation would similar to one coded for
778   object.__reduce_ex__, but slightly less general. To be more specific, we
779   could call bytesio_getstate directly and avoid checking for the presence of
780   a fallback __reduce__ method. However, we would still need a __newobj__
781   function to use the efficient instance representation of PEP 307.
782 */
783
784static PyObject *
785bytesio_getstate(bytesio *self, PyObject *Py_UNUSED(ignored))
786{
787    PyObject *initvalue = _io_BytesIO_getvalue_impl(self);
788    PyObject *dict;
789    PyObject *state;
790
791    if (initvalue == NULL)
792        return NULL;
793    if (self->dict == NULL) {
794        Py_INCREF(Py_None);
795        dict = Py_None;
796    }
797    else {
798        dict = PyDict_Copy(self->dict);
799        if (dict == NULL) {
800            Py_DECREF(initvalue);
801            return NULL;
802        }
803    }
804
805    state = Py_BuildValue("(OnN)", initvalue, self->pos, dict);
806    Py_DECREF(initvalue);
807    return state;
808}
809
810static PyObject *
811bytesio_setstate(bytesio *self, PyObject *state)
812{
813    PyObject *result;
814    PyObject *position_obj;
815    PyObject *dict;
816    Py_ssize_t pos;
817
818    assert(state != NULL);
819
820    /* We allow the state tuple to be longer than 3, because we may need
821       someday to extend the object's state without breaking
822       backward-compatibility. */
823    if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 3) {
824        PyErr_Format(PyExc_TypeError,
825                     "%.200s.__setstate__ argument should be 3-tuple, got %.200s",
826                     Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
827        return NULL;
828    }
829    CHECK_EXPORTS(self);
830    /* Reset the object to its default state. This is only needed to handle
831       the case of repeated calls to __setstate__. */
832    self->string_size = 0;
833    self->pos = 0;
834
835    /* Set the value of the internal buffer. If state[0] does not support the
836       buffer protocol, bytesio_write will raise the appropriate TypeError. */
837    result = _io_BytesIO_write(self, PyTuple_GET_ITEM(state, 0));
838    if (result == NULL)
839        return NULL;
840    Py_DECREF(result);
841
842    /* Set carefully the position value. Alternatively, we could use the seek
843       method instead of modifying self->pos directly to better protect the
844       object internal state against erroneous (or malicious) inputs. */
845    position_obj = PyTuple_GET_ITEM(state, 1);
846    if (!PyLong_Check(position_obj)) {
847        PyErr_Format(PyExc_TypeError,
848                     "second item of state must be an integer, not %.200s",
849                     Py_TYPE(position_obj)->tp_name);
850        return NULL;
851    }
852    pos = PyLong_AsSsize_t(position_obj);
853    if (pos == -1 && PyErr_Occurred())
854        return NULL;
855    if (pos < 0) {
856        PyErr_SetString(PyExc_ValueError,
857                        "position value cannot be negative");
858        return NULL;
859    }
860    self->pos = pos;
861
862    /* Set the dictionary of the instance variables. */
863    dict = PyTuple_GET_ITEM(state, 2);
864    if (dict != Py_None) {
865        if (!PyDict_Check(dict)) {
866            PyErr_Format(PyExc_TypeError,
867                         "third item of state should be a dict, got a %.200s",
868                         Py_TYPE(dict)->tp_name);
869            return NULL;
870        }
871        if (self->dict) {
872            /* Alternatively, we could replace the internal dictionary
873               completely. However, it seems more practical to just update it. */
874            if (PyDict_Update(self->dict, dict) < 0)
875                return NULL;
876        }
877        else {
878            Py_INCREF(dict);
879            self->dict = dict;
880        }
881    }
882
883    Py_RETURN_NONE;
884}
885
886static void
887bytesio_dealloc(bytesio *self)
888{
889    _PyObject_GC_UNTRACK(self);
890    if (self->exports > 0) {
891        PyErr_SetString(PyExc_SystemError,
892                        "deallocated BytesIO object has exported buffers");
893        PyErr_Print();
894    }
895    Py_CLEAR(self->buf);
896    Py_CLEAR(self->dict);
897    if (self->weakreflist != NULL)
898        PyObject_ClearWeakRefs((PyObject *) self);
899    Py_TYPE(self)->tp_free(self);
900}
901
902static PyObject *
903bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
904{
905    bytesio *self;
906
907    assert(type != NULL && type->tp_alloc != NULL);
908    self = (bytesio *)type->tp_alloc(type, 0);
909    if (self == NULL)
910        return NULL;
911
912    /* tp_alloc initializes all the fields to zero. So we don't have to
913       initialize them here. */
914
915    self->buf = PyBytes_FromStringAndSize(NULL, 0);
916    if (self->buf == NULL) {
917        Py_DECREF(self);
918        return PyErr_NoMemory();
919    }
920
921    return (PyObject *)self;
922}
923
924/*[clinic input]
925_io.BytesIO.__init__
926    initial_bytes as initvalue: object(c_default="NULL") = b''
927
928Buffered I/O implementation using an in-memory bytes buffer.
929[clinic start generated code]*/
930
931static int
932_io_BytesIO___init___impl(bytesio *self, PyObject *initvalue)
933/*[clinic end generated code: output=65c0c51e24c5b621 input=aac7f31b67bf0fb6]*/
934{
935    /* In case, __init__ is called multiple times. */
936    self->string_size = 0;
937    self->pos = 0;
938
939    if (self->exports > 0) {
940        PyErr_SetString(PyExc_BufferError,
941                        "Existing exports of data: object cannot be re-sized");
942        return -1;
943    }
944    if (initvalue && initvalue != Py_None) {
945        if (PyBytes_CheckExact(initvalue)) {
946            Py_INCREF(initvalue);
947            Py_XSETREF(self->buf, initvalue);
948            self->string_size = PyBytes_GET_SIZE(initvalue);
949        }
950        else {
951            PyObject *res;
952            res = _io_BytesIO_write(self, initvalue);
953            if (res == NULL)
954                return -1;
955            Py_DECREF(res);
956            self->pos = 0;
957        }
958    }
959
960    return 0;
961}
962
963static PyObject *
964bytesio_sizeof(bytesio *self, void *unused)
965{
966    Py_ssize_t res;
967
968    res = _PyObject_SIZE(Py_TYPE(self));
969    if (self->buf && !SHARED_BUF(self)) {
970        Py_ssize_t s = _PySys_GetSizeOf(self->buf);
971        if (s == -1) {
972            return NULL;
973        }
974        res += s;
975    }
976    return PyLong_FromSsize_t(res);
977}
978
979static int
980bytesio_traverse(bytesio *self, visitproc visit, void *arg)
981{
982    Py_VISIT(self->dict);
983    return 0;
984}
985
986static int
987bytesio_clear(bytesio *self)
988{
989    Py_CLEAR(self->dict);
990    return 0;
991}
992
993
994#include "clinic/bytesio.c.h"
995
996static PyGetSetDef bytesio_getsetlist[] = {
997    {"closed",  (getter)bytesio_get_closed, NULL,
998     "True if the file is closed."},
999    {NULL},            /* sentinel */
1000};
1001
1002static struct PyMethodDef bytesio_methods[] = {
1003    _IO_BYTESIO_READABLE_METHODDEF
1004    _IO_BYTESIO_SEEKABLE_METHODDEF
1005    _IO_BYTESIO_WRITABLE_METHODDEF
1006    _IO_BYTESIO_CLOSE_METHODDEF
1007    _IO_BYTESIO_FLUSH_METHODDEF
1008    _IO_BYTESIO_ISATTY_METHODDEF
1009    _IO_BYTESIO_TELL_METHODDEF
1010    _IO_BYTESIO_WRITE_METHODDEF
1011    _IO_BYTESIO_WRITELINES_METHODDEF
1012    _IO_BYTESIO_READ1_METHODDEF
1013    _IO_BYTESIO_READINTO_METHODDEF
1014    _IO_BYTESIO_READLINE_METHODDEF
1015    _IO_BYTESIO_READLINES_METHODDEF
1016    _IO_BYTESIO_READ_METHODDEF
1017    _IO_BYTESIO_GETBUFFER_METHODDEF
1018    _IO_BYTESIO_GETVALUE_METHODDEF
1019    _IO_BYTESIO_SEEK_METHODDEF
1020    _IO_BYTESIO_TRUNCATE_METHODDEF
1021    {"__getstate__",  (PyCFunction)bytesio_getstate,  METH_NOARGS, NULL},
1022    {"__setstate__",  (PyCFunction)bytesio_setstate,  METH_O, NULL},
1023    {"__sizeof__", (PyCFunction)bytesio_sizeof,     METH_NOARGS, NULL},
1024    {NULL, NULL}        /* sentinel */
1025};
1026
1027PyTypeObject PyBytesIO_Type = {
1028    PyVarObject_HEAD_INIT(NULL, 0)
1029    "_io.BytesIO",                             /*tp_name*/
1030    sizeof(bytesio),                     /*tp_basicsize*/
1031    0,                                         /*tp_itemsize*/
1032    (destructor)bytesio_dealloc,               /*tp_dealloc*/
1033    0,                                         /*tp_vectorcall_offset*/
1034    0,                                         /*tp_getattr*/
1035    0,                                         /*tp_setattr*/
1036    0,                                         /*tp_as_async*/
1037    0,                                         /*tp_repr*/
1038    0,                                         /*tp_as_number*/
1039    0,                                         /*tp_as_sequence*/
1040    0,                                         /*tp_as_mapping*/
1041    0,                                         /*tp_hash*/
1042    0,                                         /*tp_call*/
1043    0,                                         /*tp_str*/
1044    0,                                         /*tp_getattro*/
1045    0,                                         /*tp_setattro*/
1046    0,                                         /*tp_as_buffer*/
1047    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1048    Py_TPFLAGS_HAVE_GC,                        /*tp_flags*/
1049    _io_BytesIO___init____doc__,               /*tp_doc*/
1050    (traverseproc)bytesio_traverse,            /*tp_traverse*/
1051    (inquiry)bytesio_clear,                    /*tp_clear*/
1052    0,                                         /*tp_richcompare*/
1053    offsetof(bytesio, weakreflist),      /*tp_weaklistoffset*/
1054    PyObject_SelfIter,                         /*tp_iter*/
1055    (iternextfunc)bytesio_iternext,            /*tp_iternext*/
1056    bytesio_methods,                           /*tp_methods*/
1057    0,                                         /*tp_members*/
1058    bytesio_getsetlist,                        /*tp_getset*/
1059    0,                                         /*tp_base*/
1060    0,                                         /*tp_dict*/
1061    0,                                         /*tp_descr_get*/
1062    0,                                         /*tp_descr_set*/
1063    offsetof(bytesio, dict),             /*tp_dictoffset*/
1064    _io_BytesIO___init__,                      /*tp_init*/
1065    0,                                         /*tp_alloc*/
1066    bytesio_new,                               /*tp_new*/
1067};
1068
1069
1070/*
1071 * Implementation of the small intermediate object used by getbuffer().
1072 * getbuffer() returns a memoryview over this object, which should make it
1073 * invisible from Python code.
1074 */
1075
1076static int
1077bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags)
1078{
1079    bytesio *b = (bytesio *) obj->source;
1080
1081    if (view == NULL) {
1082        PyErr_SetString(PyExc_BufferError,
1083            "bytesiobuf_getbuffer: view==NULL argument is obsolete");
1084        return -1;
1085    }
1086    if (SHARED_BUF(b)) {
1087        if (unshare_buffer(b, b->string_size) < 0)
1088            return -1;
1089    }
1090
1091    /* cannot fail if view != NULL and readonly == 0 */
1092    (void)PyBuffer_FillInfo(view, (PyObject*)obj,
1093                            PyBytes_AS_STRING(b->buf), b->string_size,
1094                            0, flags);
1095    b->exports++;
1096    return 0;
1097}
1098
1099static void
1100bytesiobuf_releasebuffer(bytesiobuf *obj, Py_buffer *view)
1101{
1102    bytesio *b = (bytesio *) obj->source;
1103    b->exports--;
1104}
1105
1106static int
1107bytesiobuf_traverse(bytesiobuf *self, visitproc visit, void *arg)
1108{
1109    Py_VISIT(self->source);
1110    return 0;
1111}
1112
1113static void
1114bytesiobuf_dealloc(bytesiobuf *self)
1115{
1116    /* bpo-31095: UnTrack is needed before calling any callbacks */
1117    PyObject_GC_UnTrack(self);
1118    Py_CLEAR(self->source);
1119    Py_TYPE(self)->tp_free(self);
1120}
1121
1122static PyBufferProcs bytesiobuf_as_buffer = {
1123    (getbufferproc) bytesiobuf_getbuffer,
1124    (releasebufferproc) bytesiobuf_releasebuffer,
1125};
1126
1127Py_EXPORTED_SYMBOL PyTypeObject _PyBytesIOBuffer_Type = {
1128    PyVarObject_HEAD_INIT(NULL, 0)
1129    "_io._BytesIOBuffer",                      /*tp_name*/
1130    sizeof(bytesiobuf),                        /*tp_basicsize*/
1131    0,                                         /*tp_itemsize*/
1132    (destructor)bytesiobuf_dealloc,            /*tp_dealloc*/
1133    0,                                         /*tp_vectorcall_offset*/
1134    0,                                         /*tp_getattr*/
1135    0,                                         /*tp_setattr*/
1136    0,                                         /*tp_as_async*/
1137    0,                                         /*tp_repr*/
1138    0,                                         /*tp_as_number*/
1139    0,                                         /*tp_as_sequence*/
1140    0,                                         /*tp_as_mapping*/
1141    0,                                         /*tp_hash*/
1142    0,                                         /*tp_call*/
1143    0,                                         /*tp_str*/
1144    0,                                         /*tp_getattro*/
1145    0,                                         /*tp_setattro*/
1146    &bytesiobuf_as_buffer,                     /*tp_as_buffer*/
1147    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /*tp_flags*/
1148    0,                                         /*tp_doc*/
1149    (traverseproc)bytesiobuf_traverse,         /*tp_traverse*/
1150    0,                                         /*tp_clear*/
1151    0,                                         /*tp_richcompare*/
1152    0,                                         /*tp_weaklistoffset*/
1153    0,                                         /*tp_iter*/
1154    0,                                         /*tp_iternext*/
1155    0,                                         /*tp_methods*/
1156    0,                                         /*tp_members*/
1157    0,                                         /*tp_getset*/
1158    0,                                         /*tp_base*/
1159    0,                                         /*tp_dict*/
1160    0,                                         /*tp_descr_get*/
1161    0,                                         /*tp_descr_set*/
1162    0,                                         /*tp_dictoffset*/
1163    0,                                         /*tp_init*/
1164    0,                                         /*tp_alloc*/
1165    0,                                         /*tp_new*/
1166};
1167