1/* 2 An implementation of Text I/O as defined by PEP 3116 - "New I/O" 3 4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper. 5 6 Written by Amaury Forgeot d'Arc and Antoine Pitrou 7*/ 8 9#define PY_SSIZE_T_CLEAN 10#include "Python.h" 11#include "pycore_interp.h" // PyInterpreterState.fs_codec 12#include "pycore_long.h" // _PyLong_GetZero() 13#include "pycore_fileutils.h" // _Py_GetLocaleEncoding() 14#include "pycore_object.h" 15#include "pycore_pystate.h" // _PyInterpreterState_GET() 16#include "structmember.h" // PyMemberDef 17#include "_iomodule.h" 18 19/*[clinic input] 20module _io 21class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type" 22class _io.TextIOWrapper "textio *" "&TextIOWrapper_Type" 23[clinic start generated code]*/ 24/*[clinic end generated code: output=da39a3ee5e6b4b0d input=ed072384f8aada2c]*/ 25 26/* TextIOBase */ 27 28PyDoc_STRVAR(textiobase_doc, 29 "Base class for text I/O.\n" 30 "\n" 31 "This class provides a character and line based interface to stream\n" 32 "I/O. There is no readinto method because Python's character strings\n" 33 "are immutable.\n" 34 ); 35 36static PyObject * 37_unsupported(const char *message) 38{ 39 _PyIO_State *state = IO_STATE(); 40 if (state != NULL) 41 PyErr_SetString(state->unsupported_operation, message); 42 return NULL; 43} 44 45PyDoc_STRVAR(textiobase_detach_doc, 46 "Separate the underlying buffer from the TextIOBase and return it.\n" 47 "\n" 48 "After the underlying buffer has been detached, the TextIO is in an\n" 49 "unusable state.\n" 50 ); 51 52static PyObject * 53textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored)) 54{ 55 return _unsupported("detach"); 56} 57 58PyDoc_STRVAR(textiobase_read_doc, 59 "Read at most n characters from stream.\n" 60 "\n" 61 "Read from underlying buffer until we have n characters or we hit EOF.\n" 62 "If n is negative or omitted, read until EOF.\n" 63 ); 64 65static PyObject * 66textiobase_read(PyObject *self, PyObject *args) 67{ 68 return _unsupported("read"); 69} 70 71PyDoc_STRVAR(textiobase_readline_doc, 72 "Read until newline or EOF.\n" 73 "\n" 74 "Returns an empty string if EOF is hit immediately.\n" 75 ); 76 77static PyObject * 78textiobase_readline(PyObject *self, PyObject *args) 79{ 80 return _unsupported("readline"); 81} 82 83PyDoc_STRVAR(textiobase_write_doc, 84 "Write string to stream.\n" 85 "Returns the number of characters written (which is always equal to\n" 86 "the length of the string).\n" 87 ); 88 89static PyObject * 90textiobase_write(PyObject *self, PyObject *args) 91{ 92 return _unsupported("write"); 93} 94 95PyDoc_STRVAR(textiobase_encoding_doc, 96 "Encoding of the text stream.\n" 97 "\n" 98 "Subclasses should override.\n" 99 ); 100 101static PyObject * 102textiobase_encoding_get(PyObject *self, void *context) 103{ 104 Py_RETURN_NONE; 105} 106 107PyDoc_STRVAR(textiobase_newlines_doc, 108 "Line endings translated so far.\n" 109 "\n" 110 "Only line endings translated during reading are considered.\n" 111 "\n" 112 "Subclasses should override.\n" 113 ); 114 115static PyObject * 116textiobase_newlines_get(PyObject *self, void *context) 117{ 118 Py_RETURN_NONE; 119} 120 121PyDoc_STRVAR(textiobase_errors_doc, 122 "The error setting of the decoder or encoder.\n" 123 "\n" 124 "Subclasses should override.\n" 125 ); 126 127static PyObject * 128textiobase_errors_get(PyObject *self, void *context) 129{ 130 Py_RETURN_NONE; 131} 132 133 134static PyMethodDef textiobase_methods[] = { 135 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc}, 136 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc}, 137 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc}, 138 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc}, 139 {NULL, NULL} 140}; 141 142static PyGetSetDef textiobase_getset[] = { 143 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc}, 144 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc}, 145 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc}, 146 {NULL} 147}; 148 149PyTypeObject PyTextIOBase_Type = { 150 PyVarObject_HEAD_INIT(NULL, 0) 151 "_io._TextIOBase", /*tp_name*/ 152 0, /*tp_basicsize*/ 153 0, /*tp_itemsize*/ 154 0, /*tp_dealloc*/ 155 0, /*tp_vectorcall_offset*/ 156 0, /*tp_getattr*/ 157 0, /*tp_setattr*/ 158 0, /*tp_as_async*/ 159 0, /*tp_repr*/ 160 0, /*tp_as_number*/ 161 0, /*tp_as_sequence*/ 162 0, /*tp_as_mapping*/ 163 0, /*tp_hash */ 164 0, /*tp_call*/ 165 0, /*tp_str*/ 166 0, /*tp_getattro*/ 167 0, /*tp_setattro*/ 168 0, /*tp_as_buffer*/ 169 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ 170 textiobase_doc, /* tp_doc */ 171 0, /* tp_traverse */ 172 0, /* tp_clear */ 173 0, /* tp_richcompare */ 174 0, /* tp_weaklistoffset */ 175 0, /* tp_iter */ 176 0, /* tp_iternext */ 177 textiobase_methods, /* tp_methods */ 178 0, /* tp_members */ 179 textiobase_getset, /* tp_getset */ 180 &PyIOBase_Type, /* tp_base */ 181 0, /* tp_dict */ 182 0, /* tp_descr_get */ 183 0, /* tp_descr_set */ 184 0, /* tp_dictoffset */ 185 0, /* tp_init */ 186 0, /* tp_alloc */ 187 0, /* tp_new */ 188 0, /* tp_free */ 189 0, /* tp_is_gc */ 190 0, /* tp_bases */ 191 0, /* tp_mro */ 192 0, /* tp_cache */ 193 0, /* tp_subclasses */ 194 0, /* tp_weaklist */ 195 0, /* tp_del */ 196 0, /* tp_version_tag */ 197 0, /* tp_finalize */ 198}; 199 200 201/* IncrementalNewlineDecoder */ 202 203typedef struct { 204 PyObject_HEAD 205 PyObject *decoder; 206 PyObject *errors; 207 unsigned int pendingcr: 1; 208 unsigned int translate: 1; 209 unsigned int seennl: 3; 210} nldecoder_object; 211 212/*[clinic input] 213_io.IncrementalNewlineDecoder.__init__ 214 decoder: object 215 translate: int 216 errors: object(c_default="NULL") = "strict" 217 218Codec used when reading a file in universal newlines mode. 219 220It wraps another incremental decoder, translating \r\n and \r into \n. 221It also records the types of newlines encountered. When used with 222translate=False, it ensures that the newline sequence is returned in 223one piece. When used with decoder=None, it expects unicode strings as 224decode input and translates newlines without first invoking an external 225decoder. 226[clinic start generated code]*/ 227 228static int 229_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self, 230 PyObject *decoder, int translate, 231 PyObject *errors) 232/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/ 233{ 234 235 if (errors == NULL) { 236 errors = Py_NewRef(&_Py_ID(strict)); 237 } 238 else { 239 errors = Py_NewRef(errors); 240 } 241 242 Py_XSETREF(self->errors, errors); 243 Py_XSETREF(self->decoder, Py_NewRef(decoder)); 244 self->translate = translate ? 1 : 0; 245 self->seennl = 0; 246 self->pendingcr = 0; 247 248 return 0; 249} 250 251static void 252incrementalnewlinedecoder_dealloc(nldecoder_object *self) 253{ 254 Py_CLEAR(self->decoder); 255 Py_CLEAR(self->errors); 256 Py_TYPE(self)->tp_free((PyObject *)self); 257} 258 259static int 260check_decoded(PyObject *decoded) 261{ 262 if (decoded == NULL) 263 return -1; 264 if (!PyUnicode_Check(decoded)) { 265 PyErr_Format(PyExc_TypeError, 266 "decoder should return a string result, not '%.200s'", 267 Py_TYPE(decoded)->tp_name); 268 Py_DECREF(decoded); 269 return -1; 270 } 271 if (PyUnicode_READY(decoded) < 0) { 272 Py_DECREF(decoded); 273 return -1; 274 } 275 return 0; 276} 277 278#define CHECK_INITIALIZED_DECODER(self) \ 279 if (self->errors == NULL) { \ 280 PyErr_SetString(PyExc_ValueError, \ 281 "IncrementalNewlineDecoder.__init__() not called"); \ 282 return NULL; \ 283 } 284 285#define SEEN_CR 1 286#define SEEN_LF 2 287#define SEEN_CRLF 4 288#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF) 289 290PyObject * 291_PyIncrementalNewlineDecoder_decode(PyObject *myself, 292 PyObject *input, int final) 293{ 294 PyObject *output; 295 Py_ssize_t output_len; 296 nldecoder_object *self = (nldecoder_object *) myself; 297 298 CHECK_INITIALIZED_DECODER(self); 299 300 /* decode input (with the eventual \r from a previous pass) */ 301 if (self->decoder != Py_None) { 302 output = PyObject_CallMethodObjArgs(self->decoder, 303 &_Py_ID(decode), input, final ? Py_True : Py_False, NULL); 304 } 305 else { 306 output = input; 307 Py_INCREF(output); 308 } 309 310 if (check_decoded(output) < 0) 311 return NULL; 312 313 output_len = PyUnicode_GET_LENGTH(output); 314 if (self->pendingcr && (final || output_len > 0)) { 315 /* Prefix output with CR */ 316 int kind; 317 PyObject *modified; 318 char *out; 319 320 modified = PyUnicode_New(output_len + 1, 321 PyUnicode_MAX_CHAR_VALUE(output)); 322 if (modified == NULL) 323 goto error; 324 kind = PyUnicode_KIND(modified); 325 out = PyUnicode_DATA(modified); 326 PyUnicode_WRITE(kind, out, 0, '\r'); 327 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len); 328 Py_DECREF(output); 329 output = modified; /* output remains ready */ 330 self->pendingcr = 0; 331 output_len++; 332 } 333 334 /* retain last \r even when not translating data: 335 * then readline() is sure to get \r\n in one pass 336 */ 337 if (!final) { 338 if (output_len > 0 339 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r') 340 { 341 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1); 342 if (modified == NULL) 343 goto error; 344 Py_DECREF(output); 345 output = modified; 346 self->pendingcr = 1; 347 } 348 } 349 350 /* Record which newlines are read and do newline translation if desired, 351 all in one pass. */ 352 { 353 const void *in_str; 354 Py_ssize_t len; 355 int seennl = self->seennl; 356 int only_lf = 0; 357 int kind; 358 359 in_str = PyUnicode_DATA(output); 360 len = PyUnicode_GET_LENGTH(output); 361 kind = PyUnicode_KIND(output); 362 363 if (len == 0) 364 return output; 365 366 /* If, up to now, newlines are consistently \n, do a quick check 367 for the \r *byte* with the libc's optimized memchr. 368 */ 369 if (seennl == SEEN_LF || seennl == 0) { 370 only_lf = (memchr(in_str, '\r', kind * len) == NULL); 371 } 372 373 if (only_lf) { 374 /* If not already seen, quick scan for a possible "\n" character. 375 (there's nothing else to be done, even when in translation mode) 376 */ 377 if (seennl == 0 && 378 memchr(in_str, '\n', kind * len) != NULL) { 379 if (kind == PyUnicode_1BYTE_KIND) 380 seennl |= SEEN_LF; 381 else { 382 Py_ssize_t i = 0; 383 for (;;) { 384 Py_UCS4 c; 385 /* Fast loop for non-control characters */ 386 while (PyUnicode_READ(kind, in_str, i) > '\n') 387 i++; 388 c = PyUnicode_READ(kind, in_str, i++); 389 if (c == '\n') { 390 seennl |= SEEN_LF; 391 break; 392 } 393 if (i >= len) 394 break; 395 } 396 } 397 } 398 /* Finished: we have scanned for newlines, and none of them 399 need translating */ 400 } 401 else if (!self->translate) { 402 Py_ssize_t i = 0; 403 /* We have already seen all newline types, no need to scan again */ 404 if (seennl == SEEN_ALL) 405 goto endscan; 406 for (;;) { 407 Py_UCS4 c; 408 /* Fast loop for non-control characters */ 409 while (PyUnicode_READ(kind, in_str, i) > '\r') 410 i++; 411 c = PyUnicode_READ(kind, in_str, i++); 412 if (c == '\n') 413 seennl |= SEEN_LF; 414 else if (c == '\r') { 415 if (PyUnicode_READ(kind, in_str, i) == '\n') { 416 seennl |= SEEN_CRLF; 417 i++; 418 } 419 else 420 seennl |= SEEN_CR; 421 } 422 if (i >= len) 423 break; 424 if (seennl == SEEN_ALL) 425 break; 426 } 427 endscan: 428 ; 429 } 430 else { 431 void *translated; 432 int kind = PyUnicode_KIND(output); 433 const void *in_str = PyUnicode_DATA(output); 434 Py_ssize_t in, out; 435 /* XXX: Previous in-place translation here is disabled as 436 resizing is not possible anymore */ 437 /* We could try to optimize this so that we only do a copy 438 when there is something to translate. On the other hand, 439 we already know there is a \r byte, so chances are high 440 that something needs to be done. */ 441 translated = PyMem_Malloc(kind * len); 442 if (translated == NULL) { 443 PyErr_NoMemory(); 444 goto error; 445 } 446 in = out = 0; 447 for (;;) { 448 Py_UCS4 c; 449 /* Fast loop for non-control characters */ 450 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r') 451 PyUnicode_WRITE(kind, translated, out++, c); 452 if (c == '\n') { 453 PyUnicode_WRITE(kind, translated, out++, c); 454 seennl |= SEEN_LF; 455 continue; 456 } 457 if (c == '\r') { 458 if (PyUnicode_READ(kind, in_str, in) == '\n') { 459 in++; 460 seennl |= SEEN_CRLF; 461 } 462 else 463 seennl |= SEEN_CR; 464 PyUnicode_WRITE(kind, translated, out++, '\n'); 465 continue; 466 } 467 if (in > len) 468 break; 469 PyUnicode_WRITE(kind, translated, out++, c); 470 } 471 Py_DECREF(output); 472 output = PyUnicode_FromKindAndData(kind, translated, out); 473 PyMem_Free(translated); 474 if (!output) 475 return NULL; 476 } 477 self->seennl |= seennl; 478 } 479 480 return output; 481 482 error: 483 Py_DECREF(output); 484 return NULL; 485} 486 487/*[clinic input] 488_io.IncrementalNewlineDecoder.decode 489 input: object 490 final: bool(accept={int}) = False 491[clinic start generated code]*/ 492 493static PyObject * 494_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self, 495 PyObject *input, int final) 496/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/ 497{ 498 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final); 499} 500 501/*[clinic input] 502_io.IncrementalNewlineDecoder.getstate 503[clinic start generated code]*/ 504 505static PyObject * 506_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self) 507/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/ 508{ 509 PyObject *buffer; 510 unsigned long long flag; 511 512 CHECK_INITIALIZED_DECODER(self); 513 514 if (self->decoder != Py_None) { 515 PyObject *state = PyObject_CallMethodNoArgs(self->decoder, 516 &_Py_ID(getstate)); 517 if (state == NULL) 518 return NULL; 519 if (!PyTuple_Check(state)) { 520 PyErr_SetString(PyExc_TypeError, 521 "illegal decoder state"); 522 Py_DECREF(state); 523 return NULL; 524 } 525 if (!PyArg_ParseTuple(state, "OK;illegal decoder state", 526 &buffer, &flag)) 527 { 528 Py_DECREF(state); 529 return NULL; 530 } 531 Py_INCREF(buffer); 532 Py_DECREF(state); 533 } 534 else { 535 buffer = PyBytes_FromString(""); 536 flag = 0; 537 } 538 flag <<= 1; 539 if (self->pendingcr) 540 flag |= 1; 541 return Py_BuildValue("NK", buffer, flag); 542} 543 544/*[clinic input] 545_io.IncrementalNewlineDecoder.setstate 546 state: object 547 / 548[clinic start generated code]*/ 549 550static PyObject * 551_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self, 552 PyObject *state) 553/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/ 554{ 555 PyObject *buffer; 556 unsigned long long flag; 557 558 CHECK_INITIALIZED_DECODER(self); 559 560 if (!PyTuple_Check(state)) { 561 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple"); 562 return NULL; 563 } 564 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument", 565 &buffer, &flag)) 566 { 567 return NULL; 568 } 569 570 self->pendingcr = (int) (flag & 1); 571 flag >>= 1; 572 573 if (self->decoder != Py_None) { 574 return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate), 575 "((OK))", buffer, flag); 576 } 577 else { 578 Py_RETURN_NONE; 579 } 580} 581 582/*[clinic input] 583_io.IncrementalNewlineDecoder.reset 584[clinic start generated code]*/ 585 586static PyObject * 587_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self) 588/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/ 589{ 590 CHECK_INITIALIZED_DECODER(self); 591 592 self->seennl = 0; 593 self->pendingcr = 0; 594 if (self->decoder != Py_None) 595 return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset)); 596 else 597 Py_RETURN_NONE; 598} 599 600static PyObject * 601incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context) 602{ 603 CHECK_INITIALIZED_DECODER(self); 604 605 switch (self->seennl) { 606 case SEEN_CR: 607 return PyUnicode_FromString("\r"); 608 case SEEN_LF: 609 return PyUnicode_FromString("\n"); 610 case SEEN_CRLF: 611 return PyUnicode_FromString("\r\n"); 612 case SEEN_CR | SEEN_LF: 613 return Py_BuildValue("ss", "\r", "\n"); 614 case SEEN_CR | SEEN_CRLF: 615 return Py_BuildValue("ss", "\r", "\r\n"); 616 case SEEN_LF | SEEN_CRLF: 617 return Py_BuildValue("ss", "\n", "\r\n"); 618 case SEEN_CR | SEEN_LF | SEEN_CRLF: 619 return Py_BuildValue("sss", "\r", "\n", "\r\n"); 620 default: 621 Py_RETURN_NONE; 622 } 623 624} 625 626/* TextIOWrapper */ 627 628typedef PyObject * 629 (*encodefunc_t)(PyObject *, PyObject *); 630 631typedef struct 632{ 633 PyObject_HEAD 634 int ok; /* initialized? */ 635 int detached; 636 Py_ssize_t chunk_size; 637 PyObject *buffer; 638 PyObject *encoding; 639 PyObject *encoder; 640 PyObject *decoder; 641 PyObject *readnl; 642 PyObject *errors; 643 const char *writenl; /* ASCII-encoded; NULL stands for \n */ 644 char line_buffering; 645 char write_through; 646 char readuniversal; 647 char readtranslate; 648 char writetranslate; 649 char seekable; 650 char has_read1; 651 char telling; 652 char finalizing; 653 /* Specialized encoding func (see below) */ 654 encodefunc_t encodefunc; 655 /* Whether or not it's the start of the stream */ 656 char encoding_start_of_stream; 657 658 /* Reads and writes are internally buffered in order to speed things up. 659 However, any read will first flush the write buffer if itsn't empty. 660 661 Please also note that text to be written is first encoded before being 662 buffered. This is necessary so that encoding errors are immediately 663 reported to the caller, but it unfortunately means that the 664 IncrementalEncoder (whose encode() method is always written in Python) 665 becomes a bottleneck for small writes. 666 */ 667 PyObject *decoded_chars; /* buffer for text returned from decoder */ 668 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */ 669 PyObject *pending_bytes; // data waiting to be written. 670 // ascii unicode, bytes, or list of them. 671 Py_ssize_t pending_bytes_count; 672 673 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where 674 * dec_flags is the second (integer) item of the decoder state and 675 * next_input is the chunk of input bytes that comes next after the 676 * snapshot point. We use this to reconstruct decoder states in tell(). 677 */ 678 PyObject *snapshot; 679 /* Bytes-to-characters ratio for the current chunk. Serves as input for 680 the heuristic in tell(). */ 681 double b2cratio; 682 683 /* Cache raw object if it's a FileIO object */ 684 PyObject *raw; 685 686 PyObject *weakreflist; 687 PyObject *dict; 688} textio; 689 690static void 691textiowrapper_set_decoded_chars(textio *self, PyObject *chars); 692 693/* A couple of specialized cases in order to bypass the slow incremental 694 encoding methods for the most popular encodings. */ 695 696static PyObject * 697ascii_encode(textio *self, PyObject *text) 698{ 699 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors)); 700} 701 702static PyObject * 703utf16be_encode(textio *self, PyObject *text) 704{ 705 return _PyUnicode_EncodeUTF16(text, 706 PyUnicode_AsUTF8(self->errors), 1); 707} 708 709static PyObject * 710utf16le_encode(textio *self, PyObject *text) 711{ 712 return _PyUnicode_EncodeUTF16(text, 713 PyUnicode_AsUTF8(self->errors), -1); 714} 715 716static PyObject * 717utf16_encode(textio *self, PyObject *text) 718{ 719 if (!self->encoding_start_of_stream) { 720 /* Skip the BOM and use native byte ordering */ 721#if PY_BIG_ENDIAN 722 return utf16be_encode(self, text); 723#else 724 return utf16le_encode(self, text); 725#endif 726 } 727 return _PyUnicode_EncodeUTF16(text, 728 PyUnicode_AsUTF8(self->errors), 0); 729} 730 731static PyObject * 732utf32be_encode(textio *self, PyObject *text) 733{ 734 return _PyUnicode_EncodeUTF32(text, 735 PyUnicode_AsUTF8(self->errors), 1); 736} 737 738static PyObject * 739utf32le_encode(textio *self, PyObject *text) 740{ 741 return _PyUnicode_EncodeUTF32(text, 742 PyUnicode_AsUTF8(self->errors), -1); 743} 744 745static PyObject * 746utf32_encode(textio *self, PyObject *text) 747{ 748 if (!self->encoding_start_of_stream) { 749 /* Skip the BOM and use native byte ordering */ 750#if PY_BIG_ENDIAN 751 return utf32be_encode(self, text); 752#else 753 return utf32le_encode(self, text); 754#endif 755 } 756 return _PyUnicode_EncodeUTF32(text, 757 PyUnicode_AsUTF8(self->errors), 0); 758} 759 760static PyObject * 761utf8_encode(textio *self, PyObject *text) 762{ 763 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors)); 764} 765 766static PyObject * 767latin1_encode(textio *self, PyObject *text) 768{ 769 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors)); 770} 771 772// Return true when encoding can be skipped when text is ascii. 773static inline int 774is_asciicompat_encoding(encodefunc_t f) 775{ 776 return f == (encodefunc_t) ascii_encode 777 || f == (encodefunc_t) latin1_encode 778 || f == (encodefunc_t) utf8_encode; 779} 780 781/* Map normalized encoding names onto the specialized encoding funcs */ 782 783typedef struct { 784 const char *name; 785 encodefunc_t encodefunc; 786} encodefuncentry; 787 788static const encodefuncentry encodefuncs[] = { 789 {"ascii", (encodefunc_t) ascii_encode}, 790 {"iso8859-1", (encodefunc_t) latin1_encode}, 791 {"utf-8", (encodefunc_t) utf8_encode}, 792 {"utf-16-be", (encodefunc_t) utf16be_encode}, 793 {"utf-16-le", (encodefunc_t) utf16le_encode}, 794 {"utf-16", (encodefunc_t) utf16_encode}, 795 {"utf-32-be", (encodefunc_t) utf32be_encode}, 796 {"utf-32-le", (encodefunc_t) utf32le_encode}, 797 {"utf-32", (encodefunc_t) utf32_encode}, 798 {NULL, NULL} 799}; 800 801static int 802validate_newline(const char *newline) 803{ 804 if (newline && newline[0] != '\0' 805 && !(newline[0] == '\n' && newline[1] == '\0') 806 && !(newline[0] == '\r' && newline[1] == '\0') 807 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) { 808 PyErr_Format(PyExc_ValueError, 809 "illegal newline value: %s", newline); 810 return -1; 811 } 812 return 0; 813} 814 815static int 816set_newline(textio *self, const char *newline) 817{ 818 PyObject *old = self->readnl; 819 if (newline == NULL) { 820 self->readnl = NULL; 821 } 822 else { 823 self->readnl = PyUnicode_FromString(newline); 824 if (self->readnl == NULL) { 825 self->readnl = old; 826 return -1; 827 } 828 } 829 self->readuniversal = (newline == NULL || newline[0] == '\0'); 830 self->readtranslate = (newline == NULL); 831 self->writetranslate = (newline == NULL || newline[0] != '\0'); 832 if (!self->readuniversal && self->readnl != NULL) { 833 // validate_newline() accepts only ASCII newlines. 834 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND); 835 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl); 836 if (strcmp(self->writenl, "\n") == 0) { 837 self->writenl = NULL; 838 } 839 } 840 else { 841#ifdef MS_WINDOWS 842 self->writenl = "\r\n"; 843#else 844 self->writenl = NULL; 845#endif 846 } 847 Py_XDECREF(old); 848 return 0; 849} 850 851static int 852_textiowrapper_set_decoder(textio *self, PyObject *codec_info, 853 const char *errors) 854{ 855 PyObject *res; 856 int r; 857 858 res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable)); 859 if (res == NULL) 860 return -1; 861 862 r = PyObject_IsTrue(res); 863 Py_DECREF(res); 864 if (r == -1) 865 return -1; 866 867 if (r != 1) 868 return 0; 869 870 Py_CLEAR(self->decoder); 871 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors); 872 if (self->decoder == NULL) 873 return -1; 874 875 if (self->readuniversal) { 876 PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs( 877 (PyObject *)&PyIncrementalNewlineDecoder_Type, 878 self->decoder, self->readtranslate ? Py_True : Py_False, NULL); 879 if (incrementalDecoder == NULL) 880 return -1; 881 Py_CLEAR(self->decoder); 882 self->decoder = incrementalDecoder; 883 } 884 885 return 0; 886} 887 888static PyObject* 889_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof) 890{ 891 PyObject *chars; 892 893 if (Py_IS_TYPE(decoder, &PyIncrementalNewlineDecoder_Type)) 894 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof); 895 else 896 chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes, 897 eof ? Py_True : Py_False, NULL); 898 899 if (check_decoded(chars) < 0) 900 // check_decoded already decreases refcount 901 return NULL; 902 903 return chars; 904} 905 906static int 907_textiowrapper_set_encoder(textio *self, PyObject *codec_info, 908 const char *errors) 909{ 910 PyObject *res; 911 int r; 912 913 res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable)); 914 if (res == NULL) 915 return -1; 916 917 r = PyObject_IsTrue(res); 918 Py_DECREF(res); 919 if (r == -1) 920 return -1; 921 922 if (r != 1) 923 return 0; 924 925 Py_CLEAR(self->encoder); 926 self->encodefunc = NULL; 927 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors); 928 if (self->encoder == NULL) 929 return -1; 930 931 /* Get the normalized named of the codec */ 932 if (_PyObject_LookupAttr(codec_info, &_Py_ID(name), &res) < 0) { 933 return -1; 934 } 935 if (res != NULL && PyUnicode_Check(res)) { 936 const encodefuncentry *e = encodefuncs; 937 while (e->name != NULL) { 938 if (_PyUnicode_EqualToASCIIString(res, e->name)) { 939 self->encodefunc = e->encodefunc; 940 break; 941 } 942 e++; 943 } 944 } 945 Py_XDECREF(res); 946 947 return 0; 948} 949 950static int 951_textiowrapper_fix_encoder_state(textio *self) 952{ 953 if (!self->seekable || !self->encoder) { 954 return 0; 955 } 956 957 self->encoding_start_of_stream = 1; 958 959 PyObject *cookieObj = PyObject_CallMethodNoArgs( 960 self->buffer, &_Py_ID(tell)); 961 if (cookieObj == NULL) { 962 return -1; 963 } 964 965 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ); 966 Py_DECREF(cookieObj); 967 if (cmp < 0) { 968 return -1; 969 } 970 971 if (cmp == 0) { 972 self->encoding_start_of_stream = 0; 973 PyObject *res = PyObject_CallMethodOneArg( 974 self->encoder, &_Py_ID(setstate), _PyLong_GetZero()); 975 if (res == NULL) { 976 return -1; 977 } 978 Py_DECREF(res); 979 } 980 981 return 0; 982} 983 984static int 985io_check_errors(PyObject *errors) 986{ 987 assert(errors != NULL && errors != Py_None); 988 989 PyInterpreterState *interp = _PyInterpreterState_GET(); 990#ifndef Py_DEBUG 991 /* In release mode, only check in development mode (-X dev) */ 992 if (!_PyInterpreterState_GetConfig(interp)->dev_mode) { 993 return 0; 994 } 995#else 996 /* Always check in debug mode */ 997#endif 998 999 /* Avoid calling PyCodec_LookupError() before the codec registry is ready: 1000 before_PyUnicode_InitEncodings() is called. */ 1001 if (!interp->unicode.fs_codec.encoding) { 1002 return 0; 1003 } 1004 1005 Py_ssize_t name_length; 1006 const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length); 1007 if (name == NULL) { 1008 return -1; 1009 } 1010 if (strlen(name) != (size_t)name_length) { 1011 PyErr_SetString(PyExc_ValueError, "embedded null character in errors"); 1012 return -1; 1013 } 1014 PyObject *handler = PyCodec_LookupError(name); 1015 if (handler != NULL) { 1016 Py_DECREF(handler); 1017 return 0; 1018 } 1019 return -1; 1020} 1021 1022 1023 1024/*[clinic input] 1025_io.TextIOWrapper.__init__ 1026 buffer: object 1027 encoding: str(accept={str, NoneType}) = None 1028 errors: object = None 1029 newline: str(accept={str, NoneType}) = None 1030 line_buffering: bool(accept={int}) = False 1031 write_through: bool(accept={int}) = False 1032 1033Character and line based layer over a BufferedIOBase object, buffer. 1034 1035encoding gives the name of the encoding that the stream will be 1036decoded or encoded with. It defaults to locale.getencoding(). 1037 1038errors determines the strictness of encoding and decoding (see 1039help(codecs.Codec) or the documentation for codecs.register) and 1040defaults to "strict". 1041 1042newline controls how line endings are handled. It can be None, '', 1043'\n', '\r', and '\r\n'. It works as follows: 1044 1045* On input, if newline is None, universal newlines mode is 1046 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and 1047 these are translated into '\n' before being returned to the 1048 caller. If it is '', universal newline mode is enabled, but line 1049 endings are returned to the caller untranslated. If it has any of 1050 the other legal values, input lines are only terminated by the given 1051 string, and the line ending is returned to the caller untranslated. 1052 1053* On output, if newline is None, any '\n' characters written are 1054 translated to the system default line separator, os.linesep. If 1055 newline is '' or '\n', no translation takes place. If newline is any 1056 of the other legal values, any '\n' characters written are translated 1057 to the given string. 1058 1059If line_buffering is True, a call to flush is implied when a call to 1060write contains a newline character. 1061[clinic start generated code]*/ 1062 1063static int 1064_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, 1065 const char *encoding, PyObject *errors, 1066 const char *newline, int line_buffering, 1067 int write_through) 1068/*[clinic end generated code: output=72267c0c01032ed2 input=72590963698f289b]*/ 1069{ 1070 PyObject *raw, *codec_info = NULL; 1071 PyObject *res; 1072 int r; 1073 1074 self->ok = 0; 1075 self->detached = 0; 1076 1077 if (encoding == NULL) { 1078 PyInterpreterState *interp = _PyInterpreterState_GET(); 1079 if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { 1080 if (PyErr_WarnEx(PyExc_EncodingWarning, 1081 "'encoding' argument not specified", 1)) { 1082 return -1; 1083 } 1084 } 1085 } 1086 1087 if (errors == Py_None) { 1088 errors = &_Py_ID(strict); 1089 } 1090 else if (!PyUnicode_Check(errors)) { 1091 // Check 'errors' argument here because Argument Clinic doesn't support 1092 // 'str(accept={str, NoneType})' converter. 1093 PyErr_Format( 1094 PyExc_TypeError, 1095 "TextIOWrapper() argument 'errors' must be str or None, not %.50s", 1096 Py_TYPE(errors)->tp_name); 1097 return -1; 1098 } 1099 else if (io_check_errors(errors)) { 1100 return -1; 1101 } 1102 1103 if (validate_newline(newline) < 0) { 1104 return -1; 1105 } 1106 1107 Py_CLEAR(self->buffer); 1108 Py_CLEAR(self->encoding); 1109 Py_CLEAR(self->encoder); 1110 Py_CLEAR(self->decoder); 1111 Py_CLEAR(self->readnl); 1112 Py_CLEAR(self->decoded_chars); 1113 Py_CLEAR(self->pending_bytes); 1114 Py_CLEAR(self->snapshot); 1115 Py_CLEAR(self->errors); 1116 Py_CLEAR(self->raw); 1117 self->decoded_chars_used = 0; 1118 self->pending_bytes_count = 0; 1119 self->encodefunc = NULL; 1120 self->b2cratio = 0.0; 1121 1122 if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) { 1123 _Py_DECLARE_STR(utf_8, "utf-8"); 1124 self->encoding = Py_NewRef(&_Py_STR(utf_8)); 1125 } 1126 else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) { 1127 self->encoding = _Py_GetLocaleEncodingObject(); 1128 if (self->encoding == NULL) { 1129 goto error; 1130 } 1131 assert(PyUnicode_Check(self->encoding)); 1132 } 1133 1134 if (self->encoding != NULL) { 1135 encoding = PyUnicode_AsUTF8(self->encoding); 1136 if (encoding == NULL) 1137 goto error; 1138 } 1139 else if (encoding != NULL) { 1140 self->encoding = PyUnicode_FromString(encoding); 1141 if (self->encoding == NULL) 1142 goto error; 1143 } 1144 else { 1145 PyErr_SetString(PyExc_OSError, 1146 "could not determine default encoding"); 1147 goto error; 1148 } 1149 1150 /* Check we have been asked for a real text encoding */ 1151 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()"); 1152 if (codec_info == NULL) { 1153 Py_CLEAR(self->encoding); 1154 goto error; 1155 } 1156 1157 /* XXX: Failures beyond this point have the potential to leak elements 1158 * of the partially constructed object (like self->encoding) 1159 */ 1160 1161 Py_INCREF(errors); 1162 self->errors = errors; 1163 self->chunk_size = 8192; 1164 self->line_buffering = line_buffering; 1165 self->write_through = write_through; 1166 if (set_newline(self, newline) < 0) { 1167 goto error; 1168 } 1169 1170 self->buffer = buffer; 1171 Py_INCREF(buffer); 1172 1173 /* Build the decoder object */ 1174 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0) 1175 goto error; 1176 1177 /* Build the encoder object */ 1178 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0) 1179 goto error; 1180 1181 /* Finished sorting out the codec details */ 1182 Py_CLEAR(codec_info); 1183 1184 if (Py_IS_TYPE(buffer, &PyBufferedReader_Type) || 1185 Py_IS_TYPE(buffer, &PyBufferedWriter_Type) || 1186 Py_IS_TYPE(buffer, &PyBufferedRandom_Type)) 1187 { 1188 if (_PyObject_LookupAttr(buffer, &_Py_ID(raw), &raw) < 0) 1189 goto error; 1190 /* Cache the raw FileIO object to speed up 'closed' checks */ 1191 if (raw != NULL) { 1192 if (Py_IS_TYPE(raw, &PyFileIO_Type)) 1193 self->raw = raw; 1194 else 1195 Py_DECREF(raw); 1196 } 1197 } 1198 1199 res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable)); 1200 if (res == NULL) 1201 goto error; 1202 r = PyObject_IsTrue(res); 1203 Py_DECREF(res); 1204 if (r < 0) 1205 goto error; 1206 self->seekable = self->telling = r; 1207 1208 r = _PyObject_LookupAttr(buffer, &_Py_ID(read1), &res); 1209 if (r < 0) { 1210 goto error; 1211 } 1212 Py_XDECREF(res); 1213 self->has_read1 = r; 1214 1215 self->encoding_start_of_stream = 0; 1216 if (_textiowrapper_fix_encoder_state(self) < 0) { 1217 goto error; 1218 } 1219 1220 self->ok = 1; 1221 return 0; 1222 1223 error: 1224 Py_XDECREF(codec_info); 1225 return -1; 1226} 1227 1228/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true, 1229 * -1 on error. 1230 */ 1231static int 1232convert_optional_bool(PyObject *obj, int default_value) 1233{ 1234 long v; 1235 if (obj == Py_None) { 1236 v = default_value; 1237 } 1238 else { 1239 v = PyLong_AsLong(obj); 1240 if (v == -1 && PyErr_Occurred()) 1241 return -1; 1242 } 1243 return v != 0; 1244} 1245 1246static int 1247textiowrapper_change_encoding(textio *self, PyObject *encoding, 1248 PyObject *errors, int newline_changed) 1249{ 1250 /* Use existing settings where new settings are not specified */ 1251 if (encoding == Py_None && errors == Py_None && !newline_changed) { 1252 return 0; // no change 1253 } 1254 1255 if (encoding == Py_None) { 1256 encoding = self->encoding; 1257 if (errors == Py_None) { 1258 errors = self->errors; 1259 } 1260 Py_INCREF(encoding); 1261 } 1262 else { 1263 if (_PyUnicode_EqualToASCIIString(encoding, "locale")) { 1264 encoding = _Py_GetLocaleEncodingObject(); 1265 if (encoding == NULL) { 1266 return -1; 1267 } 1268 } else { 1269 Py_INCREF(encoding); 1270 } 1271 if (errors == Py_None) { 1272 errors = &_Py_ID(strict); 1273 } 1274 } 1275 1276 const char *c_errors = PyUnicode_AsUTF8(errors); 1277 if (c_errors == NULL) { 1278 Py_DECREF(encoding); 1279 return -1; 1280 } 1281 1282 // Create new encoder & decoder 1283 PyObject *codec_info = _PyCodec_LookupTextEncoding( 1284 PyUnicode_AsUTF8(encoding), "codecs.open()"); 1285 if (codec_info == NULL) { 1286 Py_DECREF(encoding); 1287 return -1; 1288 } 1289 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 || 1290 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) { 1291 Py_DECREF(codec_info); 1292 Py_DECREF(encoding); 1293 return -1; 1294 } 1295 Py_DECREF(codec_info); 1296 1297 Py_INCREF(errors); 1298 Py_SETREF(self->encoding, encoding); 1299 Py_SETREF(self->errors, errors); 1300 1301 return _textiowrapper_fix_encoder_state(self); 1302} 1303 1304/*[clinic input] 1305_io.TextIOWrapper.reconfigure 1306 * 1307 encoding: object = None 1308 errors: object = None 1309 newline as newline_obj: object(c_default="NULL") = None 1310 line_buffering as line_buffering_obj: object = None 1311 write_through as write_through_obj: object = None 1312 1313Reconfigure the text stream with new parameters. 1314 1315This also does an implicit stream flush. 1316 1317[clinic start generated code]*/ 1318 1319static PyObject * 1320_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding, 1321 PyObject *errors, PyObject *newline_obj, 1322 PyObject *line_buffering_obj, 1323 PyObject *write_through_obj) 1324/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/ 1325{ 1326 int line_buffering; 1327 int write_through; 1328 const char *newline = NULL; 1329 1330 /* Check if something is in the read buffer */ 1331 if (self->decoded_chars != NULL) { 1332 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) { 1333 _unsupported("It is not possible to set the encoding or newline " 1334 "of stream after the first read"); 1335 return NULL; 1336 } 1337 } 1338 1339 if (newline_obj != NULL && newline_obj != Py_None) { 1340 newline = PyUnicode_AsUTF8(newline_obj); 1341 if (newline == NULL || validate_newline(newline) < 0) { 1342 return NULL; 1343 } 1344 } 1345 1346 line_buffering = convert_optional_bool(line_buffering_obj, 1347 self->line_buffering); 1348 write_through = convert_optional_bool(write_through_obj, 1349 self->write_through); 1350 if (line_buffering < 0 || write_through < 0) { 1351 return NULL; 1352 } 1353 1354 PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush)); 1355 if (res == NULL) { 1356 return NULL; 1357 } 1358 Py_DECREF(res); 1359 self->b2cratio = 0; 1360 1361 if (newline_obj != NULL && set_newline(self, newline) < 0) { 1362 return NULL; 1363 } 1364 1365 if (textiowrapper_change_encoding( 1366 self, encoding, errors, newline_obj != NULL) < 0) { 1367 return NULL; 1368 } 1369 1370 self->line_buffering = line_buffering; 1371 self->write_through = write_through; 1372 Py_RETURN_NONE; 1373} 1374 1375static int 1376textiowrapper_clear(textio *self) 1377{ 1378 self->ok = 0; 1379 Py_CLEAR(self->buffer); 1380 Py_CLEAR(self->encoding); 1381 Py_CLEAR(self->encoder); 1382 Py_CLEAR(self->decoder); 1383 Py_CLEAR(self->readnl); 1384 Py_CLEAR(self->decoded_chars); 1385 Py_CLEAR(self->pending_bytes); 1386 Py_CLEAR(self->snapshot); 1387 Py_CLEAR(self->errors); 1388 Py_CLEAR(self->raw); 1389 1390 Py_CLEAR(self->dict); 1391 return 0; 1392} 1393 1394static void 1395textiowrapper_dealloc(textio *self) 1396{ 1397 self->finalizing = 1; 1398 if (_PyIOBase_finalize((PyObject *) self) < 0) 1399 return; 1400 self->ok = 0; 1401 _PyObject_GC_UNTRACK(self); 1402 if (self->weakreflist != NULL) 1403 PyObject_ClearWeakRefs((PyObject *)self); 1404 textiowrapper_clear(self); 1405 Py_TYPE(self)->tp_free((PyObject *)self); 1406} 1407 1408static int 1409textiowrapper_traverse(textio *self, visitproc visit, void *arg) 1410{ 1411 Py_VISIT(self->buffer); 1412 Py_VISIT(self->encoding); 1413 Py_VISIT(self->encoder); 1414 Py_VISIT(self->decoder); 1415 Py_VISIT(self->readnl); 1416 Py_VISIT(self->decoded_chars); 1417 Py_VISIT(self->pending_bytes); 1418 Py_VISIT(self->snapshot); 1419 Py_VISIT(self->errors); 1420 Py_VISIT(self->raw); 1421 1422 Py_VISIT(self->dict); 1423 return 0; 1424} 1425 1426static PyObject * 1427textiowrapper_closed_get(textio *self, void *context); 1428 1429/* This macro takes some shortcuts to make the common case faster. */ 1430#define CHECK_CLOSED(self) \ 1431 do { \ 1432 int r; \ 1433 PyObject *_res; \ 1434 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { \ 1435 if (self->raw != NULL) \ 1436 r = _PyFileIO_closed(self->raw); \ 1437 else { \ 1438 _res = textiowrapper_closed_get(self, NULL); \ 1439 if (_res == NULL) \ 1440 return NULL; \ 1441 r = PyObject_IsTrue(_res); \ 1442 Py_DECREF(_res); \ 1443 if (r < 0) \ 1444 return NULL; \ 1445 } \ 1446 if (r > 0) { \ 1447 PyErr_SetString(PyExc_ValueError, \ 1448 "I/O operation on closed file."); \ 1449 return NULL; \ 1450 } \ 1451 } \ 1452 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \ 1453 return NULL; \ 1454 } while (0) 1455 1456#define CHECK_INITIALIZED(self) \ 1457 if (self->ok <= 0) { \ 1458 PyErr_SetString(PyExc_ValueError, \ 1459 "I/O operation on uninitialized object"); \ 1460 return NULL; \ 1461 } 1462 1463#define CHECK_ATTACHED(self) \ 1464 CHECK_INITIALIZED(self); \ 1465 if (self->detached) { \ 1466 PyErr_SetString(PyExc_ValueError, \ 1467 "underlying buffer has been detached"); \ 1468 return NULL; \ 1469 } 1470 1471#define CHECK_ATTACHED_INT(self) \ 1472 if (self->ok <= 0) { \ 1473 PyErr_SetString(PyExc_ValueError, \ 1474 "I/O operation on uninitialized object"); \ 1475 return -1; \ 1476 } else if (self->detached) { \ 1477 PyErr_SetString(PyExc_ValueError, \ 1478 "underlying buffer has been detached"); \ 1479 return -1; \ 1480 } 1481 1482 1483/*[clinic input] 1484_io.TextIOWrapper.detach 1485[clinic start generated code]*/ 1486 1487static PyObject * 1488_io_TextIOWrapper_detach_impl(textio *self) 1489/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/ 1490{ 1491 PyObject *buffer, *res; 1492 CHECK_ATTACHED(self); 1493 res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush)); 1494 if (res == NULL) 1495 return NULL; 1496 Py_DECREF(res); 1497 buffer = self->buffer; 1498 self->buffer = NULL; 1499 self->detached = 1; 1500 return buffer; 1501} 1502 1503/* Flush the internal write buffer. This doesn't explicitly flush the 1504 underlying buffered object, though. */ 1505static int 1506_textiowrapper_writeflush(textio *self) 1507{ 1508 if (self->pending_bytes == NULL) 1509 return 0; 1510 1511 PyObject *pending = self->pending_bytes; 1512 PyObject *b; 1513 1514 if (PyBytes_Check(pending)) { 1515 b = pending; 1516 Py_INCREF(b); 1517 } 1518 else if (PyUnicode_Check(pending)) { 1519 assert(PyUnicode_IS_ASCII(pending)); 1520 assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count); 1521 b = PyBytes_FromStringAndSize( 1522 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending)); 1523 if (b == NULL) { 1524 return -1; 1525 } 1526 } 1527 else { 1528 assert(PyList_Check(pending)); 1529 b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count); 1530 if (b == NULL) { 1531 return -1; 1532 } 1533 1534 char *buf = PyBytes_AsString(b); 1535 Py_ssize_t pos = 0; 1536 1537 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) { 1538 PyObject *obj = PyList_GET_ITEM(pending, i); 1539 char *src; 1540 Py_ssize_t len; 1541 if (PyUnicode_Check(obj)) { 1542 assert(PyUnicode_IS_ASCII(obj)); 1543 src = PyUnicode_DATA(obj); 1544 len = PyUnicode_GET_LENGTH(obj); 1545 } 1546 else { 1547 assert(PyBytes_Check(obj)); 1548 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) { 1549 Py_DECREF(b); 1550 return -1; 1551 } 1552 } 1553 memcpy(buf + pos, src, len); 1554 pos += len; 1555 } 1556 assert(pos == self->pending_bytes_count); 1557 } 1558 1559 self->pending_bytes_count = 0; 1560 self->pending_bytes = NULL; 1561 Py_DECREF(pending); 1562 1563 PyObject *ret; 1564 do { 1565 ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b); 1566 } while (ret == NULL && _PyIO_trap_eintr()); 1567 Py_DECREF(b); 1568 // NOTE: We cleared buffer but we don't know how many bytes are actually written 1569 // when an error occurred. 1570 if (ret == NULL) 1571 return -1; 1572 Py_DECREF(ret); 1573 return 0; 1574} 1575 1576/*[clinic input] 1577_io.TextIOWrapper.write 1578 text: unicode 1579 / 1580[clinic start generated code]*/ 1581 1582static PyObject * 1583_io_TextIOWrapper_write_impl(textio *self, PyObject *text) 1584/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/ 1585{ 1586 PyObject *ret; 1587 PyObject *b; 1588 Py_ssize_t textlen; 1589 int haslf = 0; 1590 int needflush = 0, text_needflush = 0; 1591 1592 if (PyUnicode_READY(text) == -1) 1593 return NULL; 1594 1595 CHECK_ATTACHED(self); 1596 CHECK_CLOSED(self); 1597 1598 if (self->encoder == NULL) 1599 return _unsupported("not writable"); 1600 1601 Py_INCREF(text); 1602 1603 textlen = PyUnicode_GET_LENGTH(text); 1604 1605 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering) 1606 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1) 1607 haslf = 1; 1608 1609 if (haslf && self->writetranslate && self->writenl != NULL) { 1610 PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace), 1611 "ss", "\n", self->writenl); 1612 Py_DECREF(text); 1613 if (newtext == NULL) 1614 return NULL; 1615 text = newtext; 1616 } 1617 1618 if (self->write_through) 1619 text_needflush = 1; 1620 if (self->line_buffering && 1621 (haslf || 1622 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1)) 1623 needflush = 1; 1624 1625 /* XXX What if we were just reading? */ 1626 if (self->encodefunc != NULL) { 1627 if (PyUnicode_IS_ASCII(text) && 1628 // See bpo-43260 1629 PyUnicode_GET_LENGTH(text) <= self->chunk_size && 1630 is_asciicompat_encoding(self->encodefunc)) { 1631 b = text; 1632 Py_INCREF(b); 1633 } 1634 else { 1635 b = (*self->encodefunc)((PyObject *) self, text); 1636 } 1637 self->encoding_start_of_stream = 0; 1638 } 1639 else { 1640 b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text); 1641 } 1642 1643 Py_DECREF(text); 1644 if (b == NULL) 1645 return NULL; 1646 if (b != text && !PyBytes_Check(b)) { 1647 PyErr_Format(PyExc_TypeError, 1648 "encoder should return a bytes object, not '%.200s'", 1649 Py_TYPE(b)->tp_name); 1650 Py_DECREF(b); 1651 return NULL; 1652 } 1653 1654 Py_ssize_t bytes_len; 1655 if (b == text) { 1656 bytes_len = PyUnicode_GET_LENGTH(b); 1657 } 1658 else { 1659 bytes_len = PyBytes_GET_SIZE(b); 1660 } 1661 1662 if (self->pending_bytes == NULL) { 1663 self->pending_bytes_count = 0; 1664 self->pending_bytes = b; 1665 } 1666 else if (self->pending_bytes_count + bytes_len > self->chunk_size) { 1667 // Prevent to concatenate more than chunk_size data. 1668 if (_textiowrapper_writeflush(self) < 0) { 1669 Py_DECREF(b); 1670 return NULL; 1671 } 1672 self->pending_bytes = b; 1673 } 1674 else if (!PyList_CheckExact(self->pending_bytes)) { 1675 PyObject *list = PyList_New(2); 1676 if (list == NULL) { 1677 Py_DECREF(b); 1678 return NULL; 1679 } 1680 PyList_SET_ITEM(list, 0, self->pending_bytes); 1681 PyList_SET_ITEM(list, 1, b); 1682 self->pending_bytes = list; 1683 } 1684 else { 1685 if (PyList_Append(self->pending_bytes, b) < 0) { 1686 Py_DECREF(b); 1687 return NULL; 1688 } 1689 Py_DECREF(b); 1690 } 1691 1692 self->pending_bytes_count += bytes_len; 1693 if (self->pending_bytes_count >= self->chunk_size || needflush || 1694 text_needflush) { 1695 if (_textiowrapper_writeflush(self) < 0) 1696 return NULL; 1697 } 1698 1699 if (needflush) { 1700 ret = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush)); 1701 if (ret == NULL) 1702 return NULL; 1703 Py_DECREF(ret); 1704 } 1705 1706 textiowrapper_set_decoded_chars(self, NULL); 1707 Py_CLEAR(self->snapshot); 1708 1709 if (self->decoder) { 1710 ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset)); 1711 if (ret == NULL) 1712 return NULL; 1713 Py_DECREF(ret); 1714 } 1715 1716 return PyLong_FromSsize_t(textlen); 1717} 1718 1719/* Steal a reference to chars and store it in the decoded_char buffer; 1720 */ 1721static void 1722textiowrapper_set_decoded_chars(textio *self, PyObject *chars) 1723{ 1724 Py_XSETREF(self->decoded_chars, chars); 1725 self->decoded_chars_used = 0; 1726} 1727 1728static PyObject * 1729textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n) 1730{ 1731 PyObject *chars; 1732 Py_ssize_t avail; 1733 1734 if (self->decoded_chars == NULL) 1735 return PyUnicode_FromStringAndSize(NULL, 0); 1736 1737 /* decoded_chars is guaranteed to be "ready". */ 1738 avail = (PyUnicode_GET_LENGTH(self->decoded_chars) 1739 - self->decoded_chars_used); 1740 1741 assert(avail >= 0); 1742 1743 if (n < 0 || n > avail) 1744 n = avail; 1745 1746 if (self->decoded_chars_used > 0 || n < avail) { 1747 chars = PyUnicode_Substring(self->decoded_chars, 1748 self->decoded_chars_used, 1749 self->decoded_chars_used + n); 1750 if (chars == NULL) 1751 return NULL; 1752 } 1753 else { 1754 chars = self->decoded_chars; 1755 Py_INCREF(chars); 1756 } 1757 1758 self->decoded_chars_used += n; 1759 return chars; 1760} 1761 1762/* Read and decode the next chunk of data from the BufferedReader. 1763 */ 1764static int 1765textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) 1766{ 1767 PyObject *dec_buffer = NULL; 1768 PyObject *dec_flags = NULL; 1769 PyObject *input_chunk = NULL; 1770 Py_buffer input_chunk_buf; 1771 PyObject *decoded_chars, *chunk_size; 1772 Py_ssize_t nbytes, nchars; 1773 int eof; 1774 1775 /* The return value is True unless EOF was reached. The decoded string is 1776 * placed in self._decoded_chars (replacing its previous value). The 1777 * entire input chunk is sent to the decoder, though some of it may remain 1778 * buffered in the decoder, yet to be converted. 1779 */ 1780 1781 if (self->decoder == NULL) { 1782 _unsupported("not readable"); 1783 return -1; 1784 } 1785 1786 if (self->telling) { 1787 /* To prepare for tell(), we need to snapshot a point in the file 1788 * where the decoder's input buffer is empty. 1789 */ 1790 PyObject *state = PyObject_CallMethodNoArgs(self->decoder, 1791 &_Py_ID(getstate)); 1792 if (state == NULL) 1793 return -1; 1794 /* Given this, we know there was a valid snapshot point 1795 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags). 1796 */ 1797 if (!PyTuple_Check(state)) { 1798 PyErr_SetString(PyExc_TypeError, 1799 "illegal decoder state"); 1800 Py_DECREF(state); 1801 return -1; 1802 } 1803 if (!PyArg_ParseTuple(state, 1804 "OO;illegal decoder state", &dec_buffer, &dec_flags)) 1805 { 1806 Py_DECREF(state); 1807 return -1; 1808 } 1809 1810 if (!PyBytes_Check(dec_buffer)) { 1811 PyErr_Format(PyExc_TypeError, 1812 "illegal decoder state: the first item should be a " 1813 "bytes object, not '%.200s'", 1814 Py_TYPE(dec_buffer)->tp_name); 1815 Py_DECREF(state); 1816 return -1; 1817 } 1818 Py_INCREF(dec_buffer); 1819 Py_INCREF(dec_flags); 1820 Py_DECREF(state); 1821 } 1822 1823 /* Read a chunk, decode it, and put the result in self._decoded_chars. */ 1824 if (size_hint > 0) { 1825 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint); 1826 } 1827 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint)); 1828 if (chunk_size == NULL) 1829 goto fail; 1830 1831 input_chunk = PyObject_CallMethodOneArg(self->buffer, 1832 (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)), 1833 chunk_size); 1834 Py_DECREF(chunk_size); 1835 if (input_chunk == NULL) 1836 goto fail; 1837 1838 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) { 1839 PyErr_Format(PyExc_TypeError, 1840 "underlying %s() should have returned a bytes-like object, " 1841 "not '%.200s'", (self->has_read1 ? "read1": "read"), 1842 Py_TYPE(input_chunk)->tp_name); 1843 goto fail; 1844 } 1845 1846 nbytes = input_chunk_buf.len; 1847 eof = (nbytes == 0); 1848 1849 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof); 1850 PyBuffer_Release(&input_chunk_buf); 1851 if (decoded_chars == NULL) 1852 goto fail; 1853 1854 textiowrapper_set_decoded_chars(self, decoded_chars); 1855 nchars = PyUnicode_GET_LENGTH(decoded_chars); 1856 if (nchars > 0) 1857 self->b2cratio = (double) nbytes / nchars; 1858 else 1859 self->b2cratio = 0.0; 1860 if (nchars > 0) 1861 eof = 0; 1862 1863 if (self->telling) { 1864 /* At the snapshot point, len(dec_buffer) bytes before the read, the 1865 * next input to be decoded is dec_buffer + input_chunk. 1866 */ 1867 PyObject *next_input = dec_buffer; 1868 PyBytes_Concat(&next_input, input_chunk); 1869 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */ 1870 if (next_input == NULL) { 1871 goto fail; 1872 } 1873 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input); 1874 if (snapshot == NULL) { 1875 dec_flags = NULL; 1876 goto fail; 1877 } 1878 Py_XSETREF(self->snapshot, snapshot); 1879 } 1880 Py_DECREF(input_chunk); 1881 1882 return (eof == 0); 1883 1884 fail: 1885 Py_XDECREF(dec_buffer); 1886 Py_XDECREF(dec_flags); 1887 Py_XDECREF(input_chunk); 1888 return -1; 1889} 1890 1891/*[clinic input] 1892_io.TextIOWrapper.read 1893 size as n: Py_ssize_t(accept={int, NoneType}) = -1 1894 / 1895[clinic start generated code]*/ 1896 1897static PyObject * 1898_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n) 1899/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/ 1900{ 1901 PyObject *result = NULL, *chunks = NULL; 1902 1903 CHECK_ATTACHED(self); 1904 CHECK_CLOSED(self); 1905 1906 if (self->decoder == NULL) 1907 return _unsupported("not readable"); 1908 1909 if (_textiowrapper_writeflush(self) < 0) 1910 return NULL; 1911 1912 if (n < 0) { 1913 /* Read everything */ 1914 PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read)); 1915 PyObject *decoded; 1916 if (bytes == NULL) 1917 goto fail; 1918 1919 if (Py_IS_TYPE(self->decoder, &PyIncrementalNewlineDecoder_Type)) 1920 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder, 1921 bytes, 1); 1922 else 1923 decoded = PyObject_CallMethodObjArgs( 1924 self->decoder, &_Py_ID(decode), bytes, Py_True, NULL); 1925 Py_DECREF(bytes); 1926 if (check_decoded(decoded) < 0) 1927 goto fail; 1928 1929 result = textiowrapper_get_decoded_chars(self, -1); 1930 1931 if (result == NULL) { 1932 Py_DECREF(decoded); 1933 return NULL; 1934 } 1935 1936 PyUnicode_AppendAndDel(&result, decoded); 1937 if (result == NULL) 1938 goto fail; 1939 1940 textiowrapper_set_decoded_chars(self, NULL); 1941 Py_CLEAR(self->snapshot); 1942 return result; 1943 } 1944 else { 1945 int res = 1; 1946 Py_ssize_t remaining = n; 1947 1948 result = textiowrapper_get_decoded_chars(self, n); 1949 if (result == NULL) 1950 goto fail; 1951 if (PyUnicode_READY(result) == -1) 1952 goto fail; 1953 remaining -= PyUnicode_GET_LENGTH(result); 1954 1955 /* Keep reading chunks until we have n characters to return */ 1956 while (remaining > 0) { 1957 res = textiowrapper_read_chunk(self, remaining); 1958 if (res < 0) { 1959 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() 1960 when EINTR occurs so we needn't do it ourselves. */ 1961 if (_PyIO_trap_eintr()) { 1962 continue; 1963 } 1964 goto fail; 1965 } 1966 if (res == 0) /* EOF */ 1967 break; 1968 if (chunks == NULL) { 1969 chunks = PyList_New(0); 1970 if (chunks == NULL) 1971 goto fail; 1972 } 1973 if (PyUnicode_GET_LENGTH(result) > 0 && 1974 PyList_Append(chunks, result) < 0) 1975 goto fail; 1976 Py_DECREF(result); 1977 result = textiowrapper_get_decoded_chars(self, remaining); 1978 if (result == NULL) 1979 goto fail; 1980 remaining -= PyUnicode_GET_LENGTH(result); 1981 } 1982 if (chunks != NULL) { 1983 if (result != NULL && PyList_Append(chunks, result) < 0) 1984 goto fail; 1985 _Py_DECLARE_STR(empty, ""); 1986 Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks)); 1987 if (result == NULL) 1988 goto fail; 1989 Py_CLEAR(chunks); 1990 } 1991 return result; 1992 } 1993 fail: 1994 Py_XDECREF(result); 1995 Py_XDECREF(chunks); 1996 return NULL; 1997} 1998 1999 2000/* NOTE: `end` must point to the real end of the Py_UCS4 storage, 2001 that is to the NUL character. Otherwise the function will produce 2002 incorrect results. */ 2003static const char * 2004find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch) 2005{ 2006 if (kind == PyUnicode_1BYTE_KIND) { 2007 assert(ch < 256); 2008 return (char *) memchr((const void *) s, (char) ch, end - s); 2009 } 2010 for (;;) { 2011 while (PyUnicode_READ(kind, s, 0) > ch) 2012 s += kind; 2013 if (PyUnicode_READ(kind, s, 0) == ch) 2014 return s; 2015 if (s == end) 2016 return NULL; 2017 s += kind; 2018 } 2019} 2020 2021Py_ssize_t 2022_PyIO_find_line_ending( 2023 int translated, int universal, PyObject *readnl, 2024 int kind, const char *start, const char *end, Py_ssize_t *consumed) 2025{ 2026 Py_ssize_t len = (end - start)/kind; 2027 2028 if (translated) { 2029 /* Newlines are already translated, only search for \n */ 2030 const char *pos = find_control_char(kind, start, end, '\n'); 2031 if (pos != NULL) 2032 return (pos - start)/kind + 1; 2033 else { 2034 *consumed = len; 2035 return -1; 2036 } 2037 } 2038 else if (universal) { 2039 /* Universal newline search. Find any of \r, \r\n, \n 2040 * The decoder ensures that \r\n are not split in two pieces 2041 */ 2042 const char *s = start; 2043 for (;;) { 2044 Py_UCS4 ch; 2045 /* Fast path for non-control chars. The loop always ends 2046 since the Unicode string is NUL-terminated. */ 2047 while (PyUnicode_READ(kind, s, 0) > '\r') 2048 s += kind; 2049 if (s >= end) { 2050 *consumed = len; 2051 return -1; 2052 } 2053 ch = PyUnicode_READ(kind, s, 0); 2054 s += kind; 2055 if (ch == '\n') 2056 return (s - start)/kind; 2057 if (ch == '\r') { 2058 if (PyUnicode_READ(kind, s, 0) == '\n') 2059 return (s - start)/kind + 1; 2060 else 2061 return (s - start)/kind; 2062 } 2063 } 2064 } 2065 else { 2066 /* Non-universal mode. */ 2067 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl); 2068 const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl); 2069 /* Assume that readnl is an ASCII character. */ 2070 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND); 2071 if (readnl_len == 1) { 2072 const char *pos = find_control_char(kind, start, end, nl[0]); 2073 if (pos != NULL) 2074 return (pos - start)/kind + 1; 2075 *consumed = len; 2076 return -1; 2077 } 2078 else { 2079 const char *s = start; 2080 const char *e = end - (readnl_len - 1)*kind; 2081 const char *pos; 2082 if (e < s) 2083 e = s; 2084 while (s < e) { 2085 Py_ssize_t i; 2086 const char *pos = find_control_char(kind, s, end, nl[0]); 2087 if (pos == NULL || pos >= e) 2088 break; 2089 for (i = 1; i < readnl_len; i++) { 2090 if (PyUnicode_READ(kind, pos, i) != nl[i]) 2091 break; 2092 } 2093 if (i == readnl_len) 2094 return (pos - start)/kind + readnl_len; 2095 s = pos + kind; 2096 } 2097 pos = find_control_char(kind, e, end, nl[0]); 2098 if (pos == NULL) 2099 *consumed = len; 2100 else 2101 *consumed = (pos - start)/kind; 2102 return -1; 2103 } 2104 } 2105} 2106 2107static PyObject * 2108_textiowrapper_readline(textio *self, Py_ssize_t limit) 2109{ 2110 PyObject *line = NULL, *chunks = NULL, *remaining = NULL; 2111 Py_ssize_t start, endpos, chunked, offset_to_buffer; 2112 int res; 2113 2114 CHECK_CLOSED(self); 2115 2116 if (_textiowrapper_writeflush(self) < 0) 2117 return NULL; 2118 2119 chunked = 0; 2120 2121 while (1) { 2122 const char *ptr; 2123 Py_ssize_t line_len; 2124 int kind; 2125 Py_ssize_t consumed = 0; 2126 2127 /* First, get some data if necessary */ 2128 res = 1; 2129 while (!self->decoded_chars || 2130 !PyUnicode_GET_LENGTH(self->decoded_chars)) { 2131 res = textiowrapper_read_chunk(self, 0); 2132 if (res < 0) { 2133 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() 2134 when EINTR occurs so we needn't do it ourselves. */ 2135 if (_PyIO_trap_eintr()) { 2136 continue; 2137 } 2138 goto error; 2139 } 2140 if (res == 0) 2141 break; 2142 } 2143 if (res == 0) { 2144 /* end of file */ 2145 textiowrapper_set_decoded_chars(self, NULL); 2146 Py_CLEAR(self->snapshot); 2147 start = endpos = offset_to_buffer = 0; 2148 break; 2149 } 2150 2151 if (remaining == NULL) { 2152 line = self->decoded_chars; 2153 start = self->decoded_chars_used; 2154 offset_to_buffer = 0; 2155 Py_INCREF(line); 2156 } 2157 else { 2158 assert(self->decoded_chars_used == 0); 2159 line = PyUnicode_Concat(remaining, self->decoded_chars); 2160 start = 0; 2161 offset_to_buffer = PyUnicode_GET_LENGTH(remaining); 2162 Py_CLEAR(remaining); 2163 if (line == NULL) 2164 goto error; 2165 if (PyUnicode_READY(line) == -1) 2166 goto error; 2167 } 2168 2169 ptr = PyUnicode_DATA(line); 2170 line_len = PyUnicode_GET_LENGTH(line); 2171 kind = PyUnicode_KIND(line); 2172 2173 endpos = _PyIO_find_line_ending( 2174 self->readtranslate, self->readuniversal, self->readnl, 2175 kind, 2176 ptr + kind * start, 2177 ptr + kind * line_len, 2178 &consumed); 2179 if (endpos >= 0) { 2180 endpos += start; 2181 if (limit >= 0 && (endpos - start) + chunked >= limit) 2182 endpos = start + limit - chunked; 2183 break; 2184 } 2185 2186 /* We can put aside up to `endpos` */ 2187 endpos = consumed + start; 2188 if (limit >= 0 && (endpos - start) + chunked >= limit) { 2189 /* Didn't find line ending, but reached length limit */ 2190 endpos = start + limit - chunked; 2191 break; 2192 } 2193 2194 if (endpos > start) { 2195 /* No line ending seen yet - put aside current data */ 2196 PyObject *s; 2197 if (chunks == NULL) { 2198 chunks = PyList_New(0); 2199 if (chunks == NULL) 2200 goto error; 2201 } 2202 s = PyUnicode_Substring(line, start, endpos); 2203 if (s == NULL) 2204 goto error; 2205 if (PyList_Append(chunks, s) < 0) { 2206 Py_DECREF(s); 2207 goto error; 2208 } 2209 chunked += PyUnicode_GET_LENGTH(s); 2210 Py_DECREF(s); 2211 } 2212 /* There may be some remaining bytes we'll have to prepend to the 2213 next chunk of data */ 2214 if (endpos < line_len) { 2215 remaining = PyUnicode_Substring(line, endpos, line_len); 2216 if (remaining == NULL) 2217 goto error; 2218 } 2219 Py_CLEAR(line); 2220 /* We have consumed the buffer */ 2221 textiowrapper_set_decoded_chars(self, NULL); 2222 } 2223 2224 if (line != NULL) { 2225 /* Our line ends in the current buffer */ 2226 self->decoded_chars_used = endpos - offset_to_buffer; 2227 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) { 2228 PyObject *s = PyUnicode_Substring(line, start, endpos); 2229 Py_CLEAR(line); 2230 if (s == NULL) 2231 goto error; 2232 line = s; 2233 } 2234 } 2235 if (remaining != NULL) { 2236 if (chunks == NULL) { 2237 chunks = PyList_New(0); 2238 if (chunks == NULL) 2239 goto error; 2240 } 2241 if (PyList_Append(chunks, remaining) < 0) 2242 goto error; 2243 Py_CLEAR(remaining); 2244 } 2245 if (chunks != NULL) { 2246 if (line != NULL) { 2247 if (PyList_Append(chunks, line) < 0) 2248 goto error; 2249 Py_DECREF(line); 2250 } 2251 line = PyUnicode_Join(&_Py_STR(empty), chunks); 2252 if (line == NULL) 2253 goto error; 2254 Py_CLEAR(chunks); 2255 } 2256 if (line == NULL) { 2257 line = Py_NewRef(&_Py_STR(empty)); 2258 } 2259 2260 return line; 2261 2262 error: 2263 Py_XDECREF(chunks); 2264 Py_XDECREF(remaining); 2265 Py_XDECREF(line); 2266 return NULL; 2267} 2268 2269/*[clinic input] 2270_io.TextIOWrapper.readline 2271 size: Py_ssize_t = -1 2272 / 2273[clinic start generated code]*/ 2274 2275static PyObject * 2276_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size) 2277/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/ 2278{ 2279 CHECK_ATTACHED(self); 2280 return _textiowrapper_readline(self, size); 2281} 2282 2283/* Seek and Tell */ 2284 2285typedef struct { 2286 Py_off_t start_pos; 2287 int dec_flags; 2288 int bytes_to_feed; 2289 int chars_to_skip; 2290 char need_eof; 2291} cookie_type; 2292 2293/* 2294 To speed up cookie packing/unpacking, we store the fields in a temporary 2295 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.). 2296 The following macros define at which offsets in the intermediary byte 2297 string the various CookieStruct fields will be stored. 2298 */ 2299 2300#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char)) 2301 2302#if PY_BIG_ENDIAN 2303/* We want the least significant byte of start_pos to also be the least 2304 significant byte of the cookie, which means that in big-endian mode we 2305 must copy the fields in reverse order. */ 2306 2307# define OFF_START_POS (sizeof(char) + 3 * sizeof(int)) 2308# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int)) 2309# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int)) 2310# define OFF_CHARS_TO_SKIP (sizeof(char)) 2311# define OFF_NEED_EOF 0 2312 2313#else 2314/* Little-endian mode: the least significant byte of start_pos will 2315 naturally end up the least significant byte of the cookie. */ 2316 2317# define OFF_START_POS 0 2318# define OFF_DEC_FLAGS (sizeof(Py_off_t)) 2319# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int)) 2320# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int)) 2321# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int)) 2322 2323#endif 2324 2325static int 2326textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj) 2327{ 2328 unsigned char buffer[COOKIE_BUF_LEN]; 2329 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj); 2330 if (cookieLong == NULL) 2331 return -1; 2332 2333 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer), 2334 PY_LITTLE_ENDIAN, 0) < 0) { 2335 Py_DECREF(cookieLong); 2336 return -1; 2337 } 2338 Py_DECREF(cookieLong); 2339 2340 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos)); 2341 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags)); 2342 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed)); 2343 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip)); 2344 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof)); 2345 2346 return 0; 2347} 2348 2349static PyObject * 2350textiowrapper_build_cookie(cookie_type *cookie) 2351{ 2352 unsigned char buffer[COOKIE_BUF_LEN]; 2353 2354 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos)); 2355 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags)); 2356 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed)); 2357 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip)); 2358 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof)); 2359 2360 return _PyLong_FromByteArray(buffer, sizeof(buffer), 2361 PY_LITTLE_ENDIAN, 0); 2362} 2363 2364static int 2365_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie) 2366{ 2367 PyObject *res; 2368 /* When seeking to the start of the stream, we call decoder.reset() 2369 rather than decoder.getstate(). 2370 This is for a few decoders such as utf-16 for which the state value 2371 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of 2372 utf-16, that we are expecting a BOM). 2373 */ 2374 if (cookie->start_pos == 0 && cookie->dec_flags == 0) { 2375 res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset)); 2376 } 2377 else { 2378 res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate), 2379 "((yi))", "", cookie->dec_flags); 2380 } 2381 if (res == NULL) { 2382 return -1; 2383 } 2384 Py_DECREF(res); 2385 return 0; 2386} 2387 2388static int 2389_textiowrapper_encoder_reset(textio *self, int start_of_stream) 2390{ 2391 PyObject *res; 2392 if (start_of_stream) { 2393 res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset)); 2394 self->encoding_start_of_stream = 1; 2395 } 2396 else { 2397 res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate), 2398 _PyLong_GetZero()); 2399 self->encoding_start_of_stream = 0; 2400 } 2401 if (res == NULL) 2402 return -1; 2403 Py_DECREF(res); 2404 return 0; 2405} 2406 2407static int 2408_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) 2409{ 2410 /* Same as _textiowrapper_decoder_setstate() above. */ 2411 return _textiowrapper_encoder_reset( 2412 self, cookie->start_pos == 0 && cookie->dec_flags == 0); 2413} 2414 2415/*[clinic input] 2416_io.TextIOWrapper.seek 2417 cookie as cookieObj: object 2418 whence: int = 0 2419 / 2420[clinic start generated code]*/ 2421 2422static PyObject * 2423_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence) 2424/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/ 2425{ 2426 PyObject *posobj; 2427 cookie_type cookie; 2428 PyObject *res; 2429 int cmp; 2430 PyObject *snapshot; 2431 2432 CHECK_ATTACHED(self); 2433 CHECK_CLOSED(self); 2434 2435 Py_INCREF(cookieObj); 2436 2437 if (!self->seekable) { 2438 _unsupported("underlying stream is not seekable"); 2439 goto fail; 2440 } 2441 2442 PyObject *zero = _PyLong_GetZero(); // borrowed reference 2443 2444 switch (whence) { 2445 case SEEK_CUR: 2446 /* seek relative to current position */ 2447 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ); 2448 if (cmp < 0) 2449 goto fail; 2450 2451 if (cmp == 0) { 2452 _unsupported("can't do nonzero cur-relative seeks"); 2453 goto fail; 2454 } 2455 2456 /* Seeking to the current position should attempt to 2457 * sync the underlying buffer with the current position. 2458 */ 2459 Py_DECREF(cookieObj); 2460 cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell)); 2461 if (cookieObj == NULL) 2462 goto fail; 2463 break; 2464 2465 case SEEK_END: 2466 /* seek relative to end of file */ 2467 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ); 2468 if (cmp < 0) 2469 goto fail; 2470 2471 if (cmp == 0) { 2472 _unsupported("can't do nonzero end-relative seeks"); 2473 goto fail; 2474 } 2475 2476 res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush)); 2477 if (res == NULL) 2478 goto fail; 2479 Py_DECREF(res); 2480 2481 textiowrapper_set_decoded_chars(self, NULL); 2482 Py_CLEAR(self->snapshot); 2483 if (self->decoder) { 2484 res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset)); 2485 if (res == NULL) 2486 goto fail; 2487 Py_DECREF(res); 2488 } 2489 2490 res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2); 2491 Py_CLEAR(cookieObj); 2492 if (res == NULL) 2493 goto fail; 2494 if (self->encoder) { 2495 /* If seek() == 0, we are at the start of stream, otherwise not */ 2496 cmp = PyObject_RichCompareBool(res, zero, Py_EQ); 2497 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) { 2498 Py_DECREF(res); 2499 goto fail; 2500 } 2501 } 2502 return res; 2503 2504 case SEEK_SET: 2505 break; 2506 2507 default: 2508 PyErr_Format(PyExc_ValueError, 2509 "invalid whence (%d, should be %d, %d or %d)", whence, 2510 SEEK_SET, SEEK_CUR, SEEK_END); 2511 goto fail; 2512 } 2513 2514 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT); 2515 if (cmp < 0) 2516 goto fail; 2517 2518 if (cmp == 1) { 2519 PyErr_Format(PyExc_ValueError, 2520 "negative seek position %R", cookieObj); 2521 goto fail; 2522 } 2523 2524 res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush)); 2525 if (res == NULL) 2526 goto fail; 2527 Py_DECREF(res); 2528 2529 /* The strategy of seek() is to go back to the safe start point 2530 * and replay the effect of read(chars_to_skip) from there. 2531 */ 2532 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0) 2533 goto fail; 2534 2535 /* Seek back to the safe start point. */ 2536 posobj = PyLong_FromOff_t(cookie.start_pos); 2537 if (posobj == NULL) 2538 goto fail; 2539 res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj); 2540 Py_DECREF(posobj); 2541 if (res == NULL) 2542 goto fail; 2543 Py_DECREF(res); 2544 2545 textiowrapper_set_decoded_chars(self, NULL); 2546 Py_CLEAR(self->snapshot); 2547 2548 /* Restore the decoder to its state from the safe start point. */ 2549 if (self->decoder) { 2550 if (_textiowrapper_decoder_setstate(self, &cookie) < 0) 2551 goto fail; 2552 } 2553 2554 if (cookie.chars_to_skip) { 2555 /* Just like _read_chunk, feed the decoder and save a snapshot. */ 2556 PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read), 2557 "i", cookie.bytes_to_feed); 2558 PyObject *decoded; 2559 2560 if (input_chunk == NULL) 2561 goto fail; 2562 2563 if (!PyBytes_Check(input_chunk)) { 2564 PyErr_Format(PyExc_TypeError, 2565 "underlying read() should have returned a bytes " 2566 "object, not '%.200s'", 2567 Py_TYPE(input_chunk)->tp_name); 2568 Py_DECREF(input_chunk); 2569 goto fail; 2570 } 2571 2572 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk); 2573 if (snapshot == NULL) { 2574 goto fail; 2575 } 2576 Py_XSETREF(self->snapshot, snapshot); 2577 2578 decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode), 2579 input_chunk, cookie.need_eof ? Py_True : Py_False, NULL); 2580 2581 if (check_decoded(decoded) < 0) 2582 goto fail; 2583 2584 textiowrapper_set_decoded_chars(self, decoded); 2585 2586 /* Skip chars_to_skip of the decoded characters. */ 2587 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) { 2588 PyErr_SetString(PyExc_OSError, "can't restore logical file position"); 2589 goto fail; 2590 } 2591 self->decoded_chars_used = cookie.chars_to_skip; 2592 } 2593 else { 2594 snapshot = Py_BuildValue("iy", cookie.dec_flags, ""); 2595 if (snapshot == NULL) 2596 goto fail; 2597 Py_XSETREF(self->snapshot, snapshot); 2598 } 2599 2600 /* Finally, reset the encoder (merely useful for proper BOM handling) */ 2601 if (self->encoder) { 2602 if (_textiowrapper_encoder_setstate(self, &cookie) < 0) 2603 goto fail; 2604 } 2605 return cookieObj; 2606 fail: 2607 Py_XDECREF(cookieObj); 2608 return NULL; 2609 2610} 2611 2612/*[clinic input] 2613_io.TextIOWrapper.tell 2614[clinic start generated code]*/ 2615 2616static PyObject * 2617_io_TextIOWrapper_tell_impl(textio *self) 2618/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/ 2619{ 2620 PyObject *res; 2621 PyObject *posobj = NULL; 2622 cookie_type cookie = {0,0,0,0,0}; 2623 PyObject *next_input; 2624 Py_ssize_t chars_to_skip, chars_decoded; 2625 Py_ssize_t skip_bytes, skip_back; 2626 PyObject *saved_state = NULL; 2627 const char *input, *input_end; 2628 Py_ssize_t dec_buffer_len; 2629 int dec_flags; 2630 2631 CHECK_ATTACHED(self); 2632 CHECK_CLOSED(self); 2633 2634 if (!self->seekable) { 2635 _unsupported("underlying stream is not seekable"); 2636 goto fail; 2637 } 2638 if (!self->telling) { 2639 PyErr_SetString(PyExc_OSError, 2640 "telling position disabled by next() call"); 2641 goto fail; 2642 } 2643 2644 if (_textiowrapper_writeflush(self) < 0) 2645 return NULL; 2646 res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush)); 2647 if (res == NULL) 2648 goto fail; 2649 Py_DECREF(res); 2650 2651 posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell)); 2652 if (posobj == NULL) 2653 goto fail; 2654 2655 if (self->decoder == NULL || self->snapshot == NULL) { 2656 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0); 2657 return posobj; 2658 } 2659 2660#if defined(HAVE_LARGEFILE_SUPPORT) 2661 cookie.start_pos = PyLong_AsLongLong(posobj); 2662#else 2663 cookie.start_pos = PyLong_AsLong(posobj); 2664#endif 2665 Py_DECREF(posobj); 2666 if (PyErr_Occurred()) 2667 goto fail; 2668 2669 /* Skip backward to the snapshot point (see _read_chunk). */ 2670 assert(PyTuple_Check(self->snapshot)); 2671 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input)) 2672 goto fail; 2673 2674 assert (PyBytes_Check(next_input)); 2675 2676 cookie.start_pos -= PyBytes_GET_SIZE(next_input); 2677 2678 /* How many decoded characters have been used up since the snapshot? */ 2679 if (self->decoded_chars_used == 0) { 2680 /* We haven't moved from the snapshot point. */ 2681 return textiowrapper_build_cookie(&cookie); 2682 } 2683 2684 chars_to_skip = self->decoded_chars_used; 2685 2686 /* Decoder state will be restored at the end */ 2687 saved_state = PyObject_CallMethodNoArgs(self->decoder, 2688 &_Py_ID(getstate)); 2689 if (saved_state == NULL) 2690 goto fail; 2691 2692#define DECODER_GETSTATE() do { \ 2693 PyObject *dec_buffer; \ 2694 PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \ 2695 &_Py_ID(getstate)); \ 2696 if (_state == NULL) \ 2697 goto fail; \ 2698 if (!PyTuple_Check(_state)) { \ 2699 PyErr_SetString(PyExc_TypeError, \ 2700 "illegal decoder state"); \ 2701 Py_DECREF(_state); \ 2702 goto fail; \ 2703 } \ 2704 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \ 2705 &dec_buffer, &dec_flags)) \ 2706 { \ 2707 Py_DECREF(_state); \ 2708 goto fail; \ 2709 } \ 2710 if (!PyBytes_Check(dec_buffer)) { \ 2711 PyErr_Format(PyExc_TypeError, \ 2712 "illegal decoder state: the first item should be a " \ 2713 "bytes object, not '%.200s'", \ 2714 Py_TYPE(dec_buffer)->tp_name); \ 2715 Py_DECREF(_state); \ 2716 goto fail; \ 2717 } \ 2718 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \ 2719 Py_DECREF(_state); \ 2720 } while (0) 2721 2722#define DECODER_DECODE(start, len, res) do { \ 2723 PyObject *_decoded = _PyObject_CallMethod( \ 2724 self->decoder, &_Py_ID(decode), "y#", start, len); \ 2725 if (check_decoded(_decoded) < 0) \ 2726 goto fail; \ 2727 res = PyUnicode_GET_LENGTH(_decoded); \ 2728 Py_DECREF(_decoded); \ 2729 } while (0) 2730 2731 /* Fast search for an acceptable start point, close to our 2732 current pos */ 2733 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip); 2734 skip_back = 1; 2735 assert(skip_back <= PyBytes_GET_SIZE(next_input)); 2736 input = PyBytes_AS_STRING(next_input); 2737 while (skip_bytes > 0) { 2738 /* Decode up to temptative start point */ 2739 if (_textiowrapper_decoder_setstate(self, &cookie) < 0) 2740 goto fail; 2741 DECODER_DECODE(input, skip_bytes, chars_decoded); 2742 if (chars_decoded <= chars_to_skip) { 2743 DECODER_GETSTATE(); 2744 if (dec_buffer_len == 0) { 2745 /* Before pos and no bytes buffered in decoder => OK */ 2746 cookie.dec_flags = dec_flags; 2747 chars_to_skip -= chars_decoded; 2748 break; 2749 } 2750 /* Skip back by buffered amount and reset heuristic */ 2751 skip_bytes -= dec_buffer_len; 2752 skip_back = 1; 2753 } 2754 else { 2755 /* We're too far ahead, skip back a bit */ 2756 skip_bytes -= skip_back; 2757 skip_back *= 2; 2758 } 2759 } 2760 if (skip_bytes <= 0) { 2761 skip_bytes = 0; 2762 if (_textiowrapper_decoder_setstate(self, &cookie) < 0) 2763 goto fail; 2764 } 2765 2766 /* Note our initial start point. */ 2767 cookie.start_pos += skip_bytes; 2768 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int); 2769 if (chars_to_skip == 0) 2770 goto finally; 2771 2772 /* We should be close to the desired position. Now feed the decoder one 2773 * byte at a time until we reach the `chars_to_skip` target. 2774 * As we go, note the nearest "safe start point" before the current 2775 * location (a point where the decoder has nothing buffered, so seek() 2776 * can safely start from there and advance to this location). 2777 */ 2778 chars_decoded = 0; 2779 input = PyBytes_AS_STRING(next_input); 2780 input_end = input + PyBytes_GET_SIZE(next_input); 2781 input += skip_bytes; 2782 while (input < input_end) { 2783 Py_ssize_t n; 2784 2785 DECODER_DECODE(input, (Py_ssize_t)1, n); 2786 /* We got n chars for 1 byte */ 2787 chars_decoded += n; 2788 cookie.bytes_to_feed += 1; 2789 DECODER_GETSTATE(); 2790 2791 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) { 2792 /* Decoder buffer is empty, so this is a safe start point. */ 2793 cookie.start_pos += cookie.bytes_to_feed; 2794 chars_to_skip -= chars_decoded; 2795 cookie.dec_flags = dec_flags; 2796 cookie.bytes_to_feed = 0; 2797 chars_decoded = 0; 2798 } 2799 if (chars_decoded >= chars_to_skip) 2800 break; 2801 input++; 2802 } 2803 if (input == input_end) { 2804 /* We didn't get enough decoded data; signal EOF to get more. */ 2805 PyObject *decoded = _PyObject_CallMethod( 2806 self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True); 2807 if (check_decoded(decoded) < 0) 2808 goto fail; 2809 chars_decoded += PyUnicode_GET_LENGTH(decoded); 2810 Py_DECREF(decoded); 2811 cookie.need_eof = 1; 2812 2813 if (chars_decoded < chars_to_skip) { 2814 PyErr_SetString(PyExc_OSError, 2815 "can't reconstruct logical file position"); 2816 goto fail; 2817 } 2818 } 2819 2820finally: 2821 res = PyObject_CallMethodOneArg( 2822 self->decoder, &_Py_ID(setstate), saved_state); 2823 Py_DECREF(saved_state); 2824 if (res == NULL) 2825 return NULL; 2826 Py_DECREF(res); 2827 2828 /* The returned cookie corresponds to the last safe start point. */ 2829 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int); 2830 return textiowrapper_build_cookie(&cookie); 2831 2832fail: 2833 if (saved_state) { 2834 PyObject *type, *value, *traceback; 2835 PyErr_Fetch(&type, &value, &traceback); 2836 res = PyObject_CallMethodOneArg( 2837 self->decoder, &_Py_ID(setstate), saved_state); 2838 _PyErr_ChainExceptions(type, value, traceback); 2839 Py_DECREF(saved_state); 2840 Py_XDECREF(res); 2841 } 2842 return NULL; 2843} 2844 2845/*[clinic input] 2846_io.TextIOWrapper.truncate 2847 pos: object = None 2848 / 2849[clinic start generated code]*/ 2850 2851static PyObject * 2852_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos) 2853/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/ 2854{ 2855 PyObject *res; 2856 2857 CHECK_ATTACHED(self) 2858 2859 res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush)); 2860 if (res == NULL) 2861 return NULL; 2862 Py_DECREF(res); 2863 2864 return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos); 2865} 2866 2867static PyObject * 2868textiowrapper_repr(textio *self) 2869{ 2870 PyObject *nameobj, *modeobj, *res, *s; 2871 int status; 2872 2873 CHECK_INITIALIZED(self); 2874 2875 res = PyUnicode_FromString("<_io.TextIOWrapper"); 2876 if (res == NULL) 2877 return NULL; 2878 2879 status = Py_ReprEnter((PyObject *)self); 2880 if (status != 0) { 2881 if (status > 0) { 2882 PyErr_Format(PyExc_RuntimeError, 2883 "reentrant call inside %s.__repr__", 2884 Py_TYPE(self)->tp_name); 2885 } 2886 goto error; 2887 } 2888 if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(name), &nameobj) < 0) { 2889 if (!PyErr_ExceptionMatches(PyExc_ValueError)) { 2890 goto error; 2891 } 2892 /* Ignore ValueError raised if the underlying stream was detached */ 2893 PyErr_Clear(); 2894 } 2895 if (nameobj != NULL) { 2896 s = PyUnicode_FromFormat(" name=%R", nameobj); 2897 Py_DECREF(nameobj); 2898 if (s == NULL) 2899 goto error; 2900 PyUnicode_AppendAndDel(&res, s); 2901 if (res == NULL) 2902 goto error; 2903 } 2904 if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(mode), &modeobj) < 0) { 2905 goto error; 2906 } 2907 if (modeobj != NULL) { 2908 s = PyUnicode_FromFormat(" mode=%R", modeobj); 2909 Py_DECREF(modeobj); 2910 if (s == NULL) 2911 goto error; 2912 PyUnicode_AppendAndDel(&res, s); 2913 if (res == NULL) 2914 goto error; 2915 } 2916 s = PyUnicode_FromFormat("%U encoding=%R>", 2917 res, self->encoding); 2918 Py_DECREF(res); 2919 if (status == 0) { 2920 Py_ReprLeave((PyObject *)self); 2921 } 2922 return s; 2923 2924 error: 2925 Py_XDECREF(res); 2926 if (status == 0) { 2927 Py_ReprLeave((PyObject *)self); 2928 } 2929 return NULL; 2930} 2931 2932 2933/* Inquiries */ 2934 2935/*[clinic input] 2936_io.TextIOWrapper.fileno 2937[clinic start generated code]*/ 2938 2939static PyObject * 2940_io_TextIOWrapper_fileno_impl(textio *self) 2941/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/ 2942{ 2943 CHECK_ATTACHED(self); 2944 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno)); 2945} 2946 2947/*[clinic input] 2948_io.TextIOWrapper.seekable 2949[clinic start generated code]*/ 2950 2951static PyObject * 2952_io_TextIOWrapper_seekable_impl(textio *self) 2953/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/ 2954{ 2955 CHECK_ATTACHED(self); 2956 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable)); 2957} 2958 2959/*[clinic input] 2960_io.TextIOWrapper.readable 2961[clinic start generated code]*/ 2962 2963static PyObject * 2964_io_TextIOWrapper_readable_impl(textio *self) 2965/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/ 2966{ 2967 CHECK_ATTACHED(self); 2968 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable)); 2969} 2970 2971/*[clinic input] 2972_io.TextIOWrapper.writable 2973[clinic start generated code]*/ 2974 2975static PyObject * 2976_io_TextIOWrapper_writable_impl(textio *self) 2977/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/ 2978{ 2979 CHECK_ATTACHED(self); 2980 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable)); 2981} 2982 2983/*[clinic input] 2984_io.TextIOWrapper.isatty 2985[clinic start generated code]*/ 2986 2987static PyObject * 2988_io_TextIOWrapper_isatty_impl(textio *self) 2989/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/ 2990{ 2991 CHECK_ATTACHED(self); 2992 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty)); 2993} 2994 2995/*[clinic input] 2996_io.TextIOWrapper.flush 2997[clinic start generated code]*/ 2998 2999static PyObject * 3000_io_TextIOWrapper_flush_impl(textio *self) 3001/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/ 3002{ 3003 CHECK_ATTACHED(self); 3004 CHECK_CLOSED(self); 3005 self->telling = self->seekable; 3006 if (_textiowrapper_writeflush(self) < 0) 3007 return NULL; 3008 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush)); 3009} 3010 3011/*[clinic input] 3012_io.TextIOWrapper.close 3013[clinic start generated code]*/ 3014 3015static PyObject * 3016_io_TextIOWrapper_close_impl(textio *self) 3017/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/ 3018{ 3019 PyObject *res; 3020 int r; 3021 CHECK_ATTACHED(self); 3022 3023 res = textiowrapper_closed_get(self, NULL); 3024 if (res == NULL) 3025 return NULL; 3026 r = PyObject_IsTrue(res); 3027 Py_DECREF(res); 3028 if (r < 0) 3029 return NULL; 3030 3031 if (r > 0) { 3032 Py_RETURN_NONE; /* stream already closed */ 3033 } 3034 else { 3035 PyObject *exc = NULL, *val, *tb; 3036 if (self->finalizing) { 3037 res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn), 3038 (PyObject *)self); 3039 if (res) 3040 Py_DECREF(res); 3041 else 3042 PyErr_Clear(); 3043 } 3044 res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush)); 3045 if (res == NULL) 3046 PyErr_Fetch(&exc, &val, &tb); 3047 else 3048 Py_DECREF(res); 3049 3050 res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close)); 3051 if (exc != NULL) { 3052 _PyErr_ChainExceptions(exc, val, tb); 3053 Py_CLEAR(res); 3054 } 3055 return res; 3056 } 3057} 3058 3059static PyObject * 3060textiowrapper_iternext(textio *self) 3061{ 3062 PyObject *line; 3063 3064 CHECK_ATTACHED(self); 3065 3066 self->telling = 0; 3067 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { 3068 /* Skip method call overhead for speed */ 3069 line = _textiowrapper_readline(self, -1); 3070 } 3071 else { 3072 line = PyObject_CallMethodNoArgs((PyObject *)self, 3073 &_Py_ID(readline)); 3074 if (line && !PyUnicode_Check(line)) { 3075 PyErr_Format(PyExc_OSError, 3076 "readline() should have returned a str object, " 3077 "not '%.200s'", Py_TYPE(line)->tp_name); 3078 Py_DECREF(line); 3079 return NULL; 3080 } 3081 } 3082 3083 if (line == NULL || PyUnicode_READY(line) == -1) 3084 return NULL; 3085 3086 if (PyUnicode_GET_LENGTH(line) == 0) { 3087 /* Reached EOF or would have blocked */ 3088 Py_DECREF(line); 3089 Py_CLEAR(self->snapshot); 3090 self->telling = self->seekable; 3091 return NULL; 3092 } 3093 3094 return line; 3095} 3096 3097static PyObject * 3098textiowrapper_name_get(textio *self, void *context) 3099{ 3100 CHECK_ATTACHED(self); 3101 return PyObject_GetAttr(self->buffer, &_Py_ID(name)); 3102} 3103 3104static PyObject * 3105textiowrapper_closed_get(textio *self, void *context) 3106{ 3107 CHECK_ATTACHED(self); 3108 return PyObject_GetAttr(self->buffer, &_Py_ID(closed)); 3109} 3110 3111static PyObject * 3112textiowrapper_newlines_get(textio *self, void *context) 3113{ 3114 PyObject *res; 3115 CHECK_ATTACHED(self); 3116 if (self->decoder == NULL || 3117 _PyObject_LookupAttr(self->decoder, &_Py_ID(newlines), &res) == 0) 3118 { 3119 Py_RETURN_NONE; 3120 } 3121 return res; 3122} 3123 3124static PyObject * 3125textiowrapper_errors_get(textio *self, void *context) 3126{ 3127 CHECK_INITIALIZED(self); 3128 Py_INCREF(self->errors); 3129 return self->errors; 3130} 3131 3132static PyObject * 3133textiowrapper_chunk_size_get(textio *self, void *context) 3134{ 3135 CHECK_ATTACHED(self); 3136 return PyLong_FromSsize_t(self->chunk_size); 3137} 3138 3139static int 3140textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context) 3141{ 3142 Py_ssize_t n; 3143 CHECK_ATTACHED_INT(self); 3144 if (arg == NULL) { 3145 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute"); 3146 return -1; 3147 } 3148 n = PyNumber_AsSsize_t(arg, PyExc_ValueError); 3149 if (n == -1 && PyErr_Occurred()) 3150 return -1; 3151 if (n <= 0) { 3152 PyErr_SetString(PyExc_ValueError, 3153 "a strictly positive integer is required"); 3154 return -1; 3155 } 3156 self->chunk_size = n; 3157 return 0; 3158} 3159 3160#include "clinic/textio.c.h" 3161 3162static PyMethodDef incrementalnewlinedecoder_methods[] = { 3163 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF 3164 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF 3165 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF 3166 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF 3167 {NULL} 3168}; 3169 3170static PyGetSetDef incrementalnewlinedecoder_getset[] = { 3171 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL}, 3172 {NULL} 3173}; 3174 3175PyTypeObject PyIncrementalNewlineDecoder_Type = { 3176 PyVarObject_HEAD_INIT(NULL, 0) 3177 "_io.IncrementalNewlineDecoder", /*tp_name*/ 3178 sizeof(nldecoder_object), /*tp_basicsize*/ 3179 0, /*tp_itemsize*/ 3180 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/ 3181 0, /*tp_vectorcall_offset*/ 3182 0, /*tp_getattr*/ 3183 0, /*tp_setattr*/ 3184 0, /*tp_as_async*/ 3185 0, /*tp_repr*/ 3186 0, /*tp_as_number*/ 3187 0, /*tp_as_sequence*/ 3188 0, /*tp_as_mapping*/ 3189 0, /*tp_hash */ 3190 0, /*tp_call*/ 3191 0, /*tp_str*/ 3192 0, /*tp_getattro*/ 3193 0, /*tp_setattro*/ 3194 0, /*tp_as_buffer*/ 3195 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ 3196 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */ 3197 0, /* tp_traverse */ 3198 0, /* tp_clear */ 3199 0, /* tp_richcompare */ 3200 0, /*tp_weaklistoffset*/ 3201 0, /* tp_iter */ 3202 0, /* tp_iternext */ 3203 incrementalnewlinedecoder_methods, /* tp_methods */ 3204 0, /* tp_members */ 3205 incrementalnewlinedecoder_getset, /* tp_getset */ 3206 0, /* tp_base */ 3207 0, /* tp_dict */ 3208 0, /* tp_descr_get */ 3209 0, /* tp_descr_set */ 3210 0, /* tp_dictoffset */ 3211 _io_IncrementalNewlineDecoder___init__, /* tp_init */ 3212 0, /* tp_alloc */ 3213 PyType_GenericNew, /* tp_new */ 3214}; 3215 3216 3217static PyMethodDef textiowrapper_methods[] = { 3218 _IO_TEXTIOWRAPPER_DETACH_METHODDEF 3219 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF 3220 _IO_TEXTIOWRAPPER_WRITE_METHODDEF 3221 _IO_TEXTIOWRAPPER_READ_METHODDEF 3222 _IO_TEXTIOWRAPPER_READLINE_METHODDEF 3223 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF 3224 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF 3225 3226 _IO_TEXTIOWRAPPER_FILENO_METHODDEF 3227 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF 3228 _IO_TEXTIOWRAPPER_READABLE_METHODDEF 3229 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF 3230 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF 3231 3232 _IO_TEXTIOWRAPPER_SEEK_METHODDEF 3233 _IO_TEXTIOWRAPPER_TELL_METHODDEF 3234 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF 3235 {NULL, NULL} 3236}; 3237 3238static PyMemberDef textiowrapper_members[] = { 3239 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY}, 3240 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY}, 3241 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY}, 3242 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY}, 3243 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0}, 3244 {NULL} 3245}; 3246 3247static PyGetSetDef textiowrapper_getset[] = { 3248 {"name", (getter)textiowrapper_name_get, NULL, NULL}, 3249 {"closed", (getter)textiowrapper_closed_get, NULL, NULL}, 3250/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL}, 3251*/ 3252 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL}, 3253 {"errors", (getter)textiowrapper_errors_get, NULL, NULL}, 3254 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get, 3255 (setter)textiowrapper_chunk_size_set, NULL}, 3256 {NULL} 3257}; 3258 3259PyTypeObject PyTextIOWrapper_Type = { 3260 PyVarObject_HEAD_INIT(NULL, 0) 3261 "_io.TextIOWrapper", /*tp_name*/ 3262 sizeof(textio), /*tp_basicsize*/ 3263 0, /*tp_itemsize*/ 3264 (destructor)textiowrapper_dealloc, /*tp_dealloc*/ 3265 0, /*tp_vectorcall_offset*/ 3266 0, /*tp_getattr*/ 3267 0, /*tps_etattr*/ 3268 0, /*tp_as_async*/ 3269 (reprfunc)textiowrapper_repr,/*tp_repr*/ 3270 0, /*tp_as_number*/ 3271 0, /*tp_as_sequence*/ 3272 0, /*tp_as_mapping*/ 3273 0, /*tp_hash */ 3274 0, /*tp_call*/ 3275 0, /*tp_str*/ 3276 0, /*tp_getattro*/ 3277 0, /*tp_setattro*/ 3278 0, /*tp_as_buffer*/ 3279 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE 3280 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ 3281 _io_TextIOWrapper___init____doc__, /* tp_doc */ 3282 (traverseproc)textiowrapper_traverse, /* tp_traverse */ 3283 (inquiry)textiowrapper_clear, /* tp_clear */ 3284 0, /* tp_richcompare */ 3285 offsetof(textio, weakreflist), /*tp_weaklistoffset*/ 3286 0, /* tp_iter */ 3287 (iternextfunc)textiowrapper_iternext, /* tp_iternext */ 3288 textiowrapper_methods, /* tp_methods */ 3289 textiowrapper_members, /* tp_members */ 3290 textiowrapper_getset, /* tp_getset */ 3291 0, /* tp_base */ 3292 0, /* tp_dict */ 3293 0, /* tp_descr_get */ 3294 0, /* tp_descr_set */ 3295 offsetof(textio, dict), /*tp_dictoffset*/ 3296 _io_TextIOWrapper___init__, /* tp_init */ 3297 0, /* tp_alloc */ 3298 PyType_GenericNew, /* tp_new */ 3299 0, /* tp_free */ 3300 0, /* tp_is_gc */ 3301 0, /* tp_bases */ 3302 0, /* tp_mro */ 3303 0, /* tp_cache */ 3304 0, /* tp_subclasses */ 3305 0, /* tp_weaklist */ 3306 0, /* tp_del */ 3307 0, /* tp_version_tag */ 3308 0, /* tp_finalize */ 3309}; 3310