1/* 2 * multibytecodec.c: Common Multibyte Codec Implementation 3 * 4 * Written by Hye-Shik Chang <perky@FreeBSD.org> 5 */ 6 7#define PY_SSIZE_T_CLEAN 8#include "Python.h" 9#include "structmember.h" // PyMemberDef 10#include "multibytecodec.h" 11#include "clinic/multibytecodec.c.h" 12 13#define MODULE_NAME "_multibytecodec" 14 15typedef struct { 16 PyTypeObject *encoder_type; 17 PyTypeObject *decoder_type; 18 PyTypeObject *reader_type; 19 PyTypeObject *writer_type; 20 PyTypeObject *multibytecodec_type; 21 PyObject *str_write; 22} _multibytecodec_state; 23 24static _multibytecodec_state * 25_multibytecodec_get_state(PyObject *module) 26{ 27 _multibytecodec_state *state = PyModule_GetState(module); 28 assert(state != NULL); 29 return state; 30} 31 32static struct PyModuleDef _multibytecodecmodule; 33static _multibytecodec_state * 34_multibyte_codec_find_state_by_type(PyTypeObject *type) 35{ 36 PyObject *module = PyType_GetModuleByDef(type, &_multibytecodecmodule); 37 assert(module != NULL); 38 return _multibytecodec_get_state(module); 39} 40 41#define clinic_get_state() _multibyte_codec_find_state_by_type(type) 42/*[clinic input] 43module _multibytecodec 44class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type" 45class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "clinic_get_state()->encoder_type" 46class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "clinic_get_state()->decoder_type" 47class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "clinic_get_state()->reader_type" 48class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "clinic_get_state()->writer_type" 49[clinic start generated code]*/ 50/*[clinic end generated code: output=da39a3ee5e6b4b0d input=305a76dfdd24b99c]*/ 51#undef clinic_get_state 52 53typedef struct { 54 PyObject *inobj; 55 Py_ssize_t inpos, inlen; 56 unsigned char *outbuf, *outbuf_end; 57 PyObject *excobj, *outobj; 58} MultibyteEncodeBuffer; 59 60typedef struct { 61 const unsigned char *inbuf, *inbuf_top, *inbuf_end; 62 PyObject *excobj; 63 _PyUnicodeWriter writer; 64} MultibyteDecodeBuffer; 65 66static char *incnewkwarglist[] = {"errors", NULL}; 67static char *streamkwarglist[] = {"stream", "errors", NULL}; 68 69static PyObject *multibytecodec_encode(MultibyteCodec *, 70 MultibyteCodec_State *, PyObject *, Py_ssize_t *, 71 PyObject *, int); 72 73#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */ 74 75static PyObject * 76make_tuple(PyObject *object, Py_ssize_t len) 77{ 78 PyObject *v, *w; 79 80 if (object == NULL) 81 return NULL; 82 83 v = PyTuple_New(2); 84 if (v == NULL) { 85 Py_DECREF(object); 86 return NULL; 87 } 88 PyTuple_SET_ITEM(v, 0, object); 89 90 w = PyLong_FromSsize_t(len); 91 if (w == NULL) { 92 Py_DECREF(v); 93 return NULL; 94 } 95 PyTuple_SET_ITEM(v, 1, w); 96 97 return v; 98} 99 100static PyObject * 101internal_error_callback(const char *errors) 102{ 103 if (errors == NULL || strcmp(errors, "strict") == 0) 104 return ERROR_STRICT; 105 else if (strcmp(errors, "ignore") == 0) 106 return ERROR_IGNORE; 107 else if (strcmp(errors, "replace") == 0) 108 return ERROR_REPLACE; 109 else 110 return PyUnicode_FromString(errors); 111} 112 113static PyObject * 114call_error_callback(PyObject *errors, PyObject *exc) 115{ 116 PyObject *cb, *r; 117 const char *str; 118 119 assert(PyUnicode_Check(errors)); 120 str = PyUnicode_AsUTF8(errors); 121 if (str == NULL) 122 return NULL; 123 cb = PyCodec_LookupError(str); 124 if (cb == NULL) 125 return NULL; 126 127 r = PyObject_CallOneArg(cb, exc); 128 Py_DECREF(cb); 129 return r; 130} 131 132static PyObject * 133codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored)) 134{ 135 const char *errors; 136 137 if (self->errors == ERROR_STRICT) 138 errors = "strict"; 139 else if (self->errors == ERROR_IGNORE) 140 errors = "ignore"; 141 else if (self->errors == ERROR_REPLACE) 142 errors = "replace"; 143 else { 144 Py_INCREF(self->errors); 145 return self->errors; 146 } 147 148 return PyUnicode_FromString(errors); 149} 150 151static int 152codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value, 153 void *closure) 154{ 155 PyObject *cb; 156 const char *str; 157 158 if (value == NULL) { 159 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute"); 160 return -1; 161 } 162 if (!PyUnicode_Check(value)) { 163 PyErr_SetString(PyExc_TypeError, "errors must be a string"); 164 return -1; 165 } 166 167 str = PyUnicode_AsUTF8(value); 168 if (str == NULL) 169 return -1; 170 171 cb = internal_error_callback(str); 172 if (cb == NULL) 173 return -1; 174 175 ERROR_DECREF(self->errors); 176 self->errors = cb; 177 return 0; 178} 179 180/* This getset handlers list is used by all the stateful codec objects */ 181static PyGetSetDef codecctx_getsets[] = { 182 {"errors", (getter)codecctx_errors_get, 183 (setter)codecctx_errors_set, 184 PyDoc_STR("how to treat errors")}, 185 {NULL,} 186}; 187 188static int 189expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize) 190{ 191 Py_ssize_t orgpos, orgsize, incsize; 192 193 orgpos = (Py_ssize_t)((char *)buf->outbuf - 194 PyBytes_AS_STRING(buf->outobj)); 195 orgsize = PyBytes_GET_SIZE(buf->outobj); 196 incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize); 197 198 if (orgsize > PY_SSIZE_T_MAX - incsize) { 199 PyErr_NoMemory(); 200 return -1; 201 } 202 203 if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1) 204 return -1; 205 206 buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos; 207 buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj) 208 + PyBytes_GET_SIZE(buf->outobj); 209 210 return 0; 211} 212#define REQUIRE_ENCODEBUFFER(buf, s) do { \ 213 if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf) \ 214 if (expand_encodebuffer(buf, s) == -1) \ 215 goto errorexit; \ 216} while(0) 217 218 219/** 220 * MultibyteCodec object 221 */ 222 223static int 224multibytecodec_encerror(MultibyteCodec *codec, 225 MultibyteCodec_State *state, 226 MultibyteEncodeBuffer *buf, 227 PyObject *errors, Py_ssize_t e) 228{ 229 PyObject *retobj = NULL, *retstr = NULL, *tobj; 230 Py_ssize_t retstrsize, newpos; 231 Py_ssize_t esize, start, end; 232 const char *reason; 233 234 if (e > 0) { 235 reason = "illegal multibyte sequence"; 236 esize = e; 237 } 238 else { 239 switch (e) { 240 case MBERR_TOOSMALL: 241 REQUIRE_ENCODEBUFFER(buf, -1); 242 return 0; /* retry it */ 243 case MBERR_TOOFEW: 244 reason = "incomplete multibyte sequence"; 245 esize = (Py_ssize_t)buf->inpos; 246 break; 247 case MBERR_INTERNAL: 248 PyErr_SetString(PyExc_RuntimeError, 249 "internal codec error"); 250 return -1; 251 default: 252 PyErr_SetString(PyExc_RuntimeError, 253 "unknown runtime error"); 254 return -1; 255 } 256 } 257 258 if (errors == ERROR_REPLACE) { 259 PyObject *replchar; 260 Py_ssize_t r; 261 Py_ssize_t inpos; 262 int kind; 263 const void *data; 264 265 replchar = PyUnicode_FromOrdinal('?'); 266 if (replchar == NULL) 267 goto errorexit; 268 kind = PyUnicode_KIND(replchar); 269 data = PyUnicode_DATA(replchar); 270 271 inpos = 0; 272 for (;;) { 273 Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); 274 275 r = codec->encode(state, codec->config, 276 kind, data, &inpos, 1, 277 &buf->outbuf, outleft, 0); 278 if (r == MBERR_TOOSMALL) { 279 REQUIRE_ENCODEBUFFER(buf, -1); 280 continue; 281 } 282 else 283 break; 284 } 285 286 Py_DECREF(replchar); 287 288 if (r != 0) { 289 REQUIRE_ENCODEBUFFER(buf, 1); 290 *buf->outbuf++ = '?'; 291 } 292 } 293 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { 294 buf->inpos += esize; 295 return 0; 296 } 297 298 start = (Py_ssize_t)buf->inpos; 299 end = start + esize; 300 301 /* use cached exception object if available */ 302 if (buf->excobj == NULL) { 303 buf->excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError, 304 "sOnns", 305 codec->encoding, buf->inobj, 306 start, end, reason); 307 if (buf->excobj == NULL) 308 goto errorexit; 309 } 310 else 311 if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 || 312 PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 || 313 PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0) 314 goto errorexit; 315 316 if (errors == ERROR_STRICT) { 317 PyCodec_StrictErrors(buf->excobj); 318 goto errorexit; 319 } 320 321 retobj = call_error_callback(errors, buf->excobj); 322 if (retobj == NULL) 323 goto errorexit; 324 325 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || 326 (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) || 327 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) { 328 PyErr_SetString(PyExc_TypeError, 329 "encoding error handler must return " 330 "(str, int) tuple"); 331 goto errorexit; 332 } 333 334 if (PyUnicode_Check(tobj)) { 335 Py_ssize_t inpos; 336 337 retstr = multibytecodec_encode(codec, state, tobj, 338 &inpos, ERROR_STRICT, 339 MBENC_FLUSH); 340 if (retstr == NULL) 341 goto errorexit; 342 } 343 else { 344 Py_INCREF(tobj); 345 retstr = tobj; 346 } 347 348 assert(PyBytes_Check(retstr)); 349 retstrsize = PyBytes_GET_SIZE(retstr); 350 if (retstrsize > 0) { 351 REQUIRE_ENCODEBUFFER(buf, retstrsize); 352 memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize); 353 buf->outbuf += retstrsize; 354 } 355 356 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); 357 if (newpos < 0 && !PyErr_Occurred()) 358 newpos += (Py_ssize_t)buf->inlen; 359 if (newpos < 0 || newpos > buf->inlen) { 360 PyErr_Clear(); 361 PyErr_Format(PyExc_IndexError, 362 "position %zd from error handler out of bounds", 363 newpos); 364 goto errorexit; 365 } 366 buf->inpos = newpos; 367 368 Py_DECREF(retobj); 369 Py_DECREF(retstr); 370 return 0; 371 372errorexit: 373 Py_XDECREF(retobj); 374 Py_XDECREF(retstr); 375 return -1; 376} 377 378static int 379multibytecodec_decerror(MultibyteCodec *codec, 380 MultibyteCodec_State *state, 381 MultibyteDecodeBuffer *buf, 382 PyObject *errors, Py_ssize_t e) 383{ 384 PyObject *retobj = NULL, *retuni = NULL; 385 Py_ssize_t newpos; 386 const char *reason; 387 Py_ssize_t esize, start, end; 388 389 if (e > 0) { 390 reason = "illegal multibyte sequence"; 391 esize = e; 392 } 393 else { 394 switch (e) { 395 case MBERR_TOOSMALL: 396 return 0; /* retry it */ 397 case MBERR_TOOFEW: 398 reason = "incomplete multibyte sequence"; 399 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); 400 break; 401 case MBERR_INTERNAL: 402 PyErr_SetString(PyExc_RuntimeError, 403 "internal codec error"); 404 return -1; 405 case MBERR_EXCEPTION: 406 return -1; 407 default: 408 PyErr_SetString(PyExc_RuntimeError, 409 "unknown runtime error"); 410 return -1; 411 } 412 } 413 414 if (errors == ERROR_REPLACE) { 415 if (_PyUnicodeWriter_WriteChar(&buf->writer, 416 Py_UNICODE_REPLACEMENT_CHARACTER) < 0) 417 goto errorexit; 418 } 419 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { 420 buf->inbuf += esize; 421 return 0; 422 } 423 424 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); 425 end = start + esize; 426 427 /* use cached exception object if available */ 428 if (buf->excobj == NULL) { 429 buf->excobj = PyUnicodeDecodeError_Create(codec->encoding, 430 (const char *)buf->inbuf_top, 431 (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top), 432 start, end, reason); 433 if (buf->excobj == NULL) 434 goto errorexit; 435 } 436 else 437 if (PyUnicodeDecodeError_SetStart(buf->excobj, start) || 438 PyUnicodeDecodeError_SetEnd(buf->excobj, end) || 439 PyUnicodeDecodeError_SetReason(buf->excobj, reason)) 440 goto errorexit; 441 442 if (errors == ERROR_STRICT) { 443 PyCodec_StrictErrors(buf->excobj); 444 goto errorexit; 445 } 446 447 retobj = call_error_callback(errors, buf->excobj); 448 if (retobj == NULL) 449 goto errorexit; 450 451 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || 452 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || 453 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) { 454 PyErr_SetString(PyExc_TypeError, 455 "decoding error handler must return " 456 "(str, int) tuple"); 457 goto errorexit; 458 } 459 460 if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0) 461 goto errorexit; 462 463 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); 464 if (newpos < 0 && !PyErr_Occurred()) 465 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); 466 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { 467 PyErr_Clear(); 468 PyErr_Format(PyExc_IndexError, 469 "position %zd from error handler out of bounds", 470 newpos); 471 goto errorexit; 472 } 473 buf->inbuf = buf->inbuf_top + newpos; 474 Py_DECREF(retobj); 475 return 0; 476 477errorexit: 478 Py_XDECREF(retobj); 479 return -1; 480} 481 482static PyObject * 483multibytecodec_encode(MultibyteCodec *codec, 484 MultibyteCodec_State *state, 485 PyObject *text, Py_ssize_t *inpos_t, 486 PyObject *errors, int flags) 487{ 488 MultibyteEncodeBuffer buf; 489 Py_ssize_t finalsize, r = 0; 490 Py_ssize_t datalen; 491 int kind; 492 const void *data; 493 494 if (PyUnicode_READY(text) < 0) 495 return NULL; 496 datalen = PyUnicode_GET_LENGTH(text); 497 498 if (datalen == 0 && !(flags & MBENC_RESET)) 499 return PyBytes_FromStringAndSize(NULL, 0); 500 501 buf.excobj = NULL; 502 buf.outobj = NULL; 503 buf.inobj = text; /* borrowed reference */ 504 buf.inpos = 0; 505 buf.inlen = datalen; 506 kind = PyUnicode_KIND(buf.inobj); 507 data = PyUnicode_DATA(buf.inobj); 508 509 if (datalen > (PY_SSIZE_T_MAX - 16) / 2) { 510 PyErr_NoMemory(); 511 goto errorexit; 512 } 513 514 buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16); 515 if (buf.outobj == NULL) 516 goto errorexit; 517 buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj); 518 buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj); 519 520 while (buf.inpos < buf.inlen) { 521 /* we don't reuse inleft and outleft here. 522 * error callbacks can relocate the cursor anywhere on buffer*/ 523 Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); 524 525 r = codec->encode(state, codec->config, 526 kind, data, 527 &buf.inpos, buf.inlen, 528 &buf.outbuf, outleft, flags); 529 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH))) 530 break; 531 else if (multibytecodec_encerror(codec, state, &buf, errors,r)) 532 goto errorexit; 533 else if (r == MBERR_TOOFEW) 534 break; 535 } 536 537 if (codec->encreset != NULL && (flags & MBENC_RESET)) 538 for (;;) { 539 Py_ssize_t outleft; 540 541 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); 542 r = codec->encreset(state, codec->config, &buf.outbuf, 543 outleft); 544 if (r == 0) 545 break; 546 else if (multibytecodec_encerror(codec, state, 547 &buf, errors, r)) 548 goto errorexit; 549 } 550 551 finalsize = (Py_ssize_t)((char *)buf.outbuf - 552 PyBytes_AS_STRING(buf.outobj)); 553 554 if (finalsize != PyBytes_GET_SIZE(buf.outobj)) 555 if (_PyBytes_Resize(&buf.outobj, finalsize) == -1) 556 goto errorexit; 557 558 if (inpos_t) 559 *inpos_t = buf.inpos; 560 Py_XDECREF(buf.excobj); 561 return buf.outobj; 562 563errorexit: 564 Py_XDECREF(buf.excobj); 565 Py_XDECREF(buf.outobj); 566 return NULL; 567} 568 569/*[clinic input] 570_multibytecodec.MultibyteCodec.encode 571 572 input: object 573 errors: str(accept={str, NoneType}) = None 574 575Return an encoded string version of `input'. 576 577'errors' may be given to set a different error handling scheme. Default is 578'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible 579values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name 580registered with codecs.register_error that can handle UnicodeEncodeErrors. 581[clinic start generated code]*/ 582 583static PyObject * 584_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self, 585 PyObject *input, 586 const char *errors) 587/*[clinic end generated code: output=7b26652045ba56a9 input=606d0e128a577bae]*/ 588{ 589 MultibyteCodec_State state; 590 PyObject *errorcb, *r, *ucvt; 591 Py_ssize_t datalen; 592 593 if (PyUnicode_Check(input)) 594 ucvt = NULL; 595 else { 596 input = ucvt = PyObject_Str(input); 597 if (input == NULL) 598 return NULL; 599 else if (!PyUnicode_Check(input)) { 600 PyErr_SetString(PyExc_TypeError, 601 "couldn't convert the object to unicode."); 602 Py_DECREF(ucvt); 603 return NULL; 604 } 605 } 606 607 if (PyUnicode_READY(input) < 0) { 608 Py_XDECREF(ucvt); 609 return NULL; 610 } 611 datalen = PyUnicode_GET_LENGTH(input); 612 613 errorcb = internal_error_callback(errors); 614 if (errorcb == NULL) { 615 Py_XDECREF(ucvt); 616 return NULL; 617 } 618 619 if (self->codec->encinit != NULL && 620 self->codec->encinit(&state, self->codec->config) != 0) 621 goto errorexit; 622 r = multibytecodec_encode(self->codec, &state, 623 input, NULL, errorcb, 624 MBENC_FLUSH | MBENC_RESET); 625 if (r == NULL) 626 goto errorexit; 627 628 ERROR_DECREF(errorcb); 629 Py_XDECREF(ucvt); 630 return make_tuple(r, datalen); 631 632errorexit: 633 ERROR_DECREF(errorcb); 634 Py_XDECREF(ucvt); 635 return NULL; 636} 637 638/*[clinic input] 639_multibytecodec.MultibyteCodec.decode 640 641 input: Py_buffer 642 errors: str(accept={str, NoneType}) = None 643 644Decodes 'input'. 645 646'errors' may be given to set a different error handling scheme. Default is 647'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible 648values are 'ignore' and 'replace' as well as any other name registered with 649codecs.register_error that is able to handle UnicodeDecodeErrors." 650[clinic start generated code]*/ 651 652static PyObject * 653_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self, 654 Py_buffer *input, 655 const char *errors) 656/*[clinic end generated code: output=ff419f65bad6cc77 input=e0c78fc7ab190def]*/ 657{ 658 MultibyteCodec_State state; 659 MultibyteDecodeBuffer buf; 660 PyObject *errorcb, *res; 661 const char *data; 662 Py_ssize_t datalen; 663 664 data = input->buf; 665 datalen = input->len; 666 667 errorcb = internal_error_callback(errors); 668 if (errorcb == NULL) { 669 return NULL; 670 } 671 672 if (datalen == 0) { 673 ERROR_DECREF(errorcb); 674 return make_tuple(PyUnicode_New(0, 0), 0); 675 } 676 677 _PyUnicodeWriter_Init(&buf.writer); 678 buf.writer.min_length = datalen; 679 buf.excobj = NULL; 680 buf.inbuf = buf.inbuf_top = (unsigned char *)data; 681 buf.inbuf_end = buf.inbuf_top + datalen; 682 683 if (self->codec->decinit != NULL && 684 self->codec->decinit(&state, self->codec->config) != 0) 685 goto errorexit; 686 687 while (buf.inbuf < buf.inbuf_end) { 688 Py_ssize_t inleft, r; 689 690 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); 691 692 r = self->codec->decode(&state, self->codec->config, 693 &buf.inbuf, inleft, &buf.writer); 694 if (r == 0) 695 break; 696 else if (multibytecodec_decerror(self->codec, &state, 697 &buf, errorcb, r)) 698 goto errorexit; 699 } 700 701 res = _PyUnicodeWriter_Finish(&buf.writer); 702 if (res == NULL) 703 goto errorexit; 704 705 Py_XDECREF(buf.excobj); 706 ERROR_DECREF(errorcb); 707 return make_tuple(res, datalen); 708 709errorexit: 710 ERROR_DECREF(errorcb); 711 Py_XDECREF(buf.excobj); 712 _PyUnicodeWriter_Dealloc(&buf.writer); 713 714 return NULL; 715} 716 717static struct PyMethodDef multibytecodec_methods[] = { 718 _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF 719 _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF 720 {NULL, NULL}, 721}; 722 723static int 724multibytecodec_traverse(PyObject *self, visitproc visit, void *arg) 725{ 726 Py_VISIT(Py_TYPE(self)); 727 return 0; 728} 729 730static void 731multibytecodec_dealloc(MultibyteCodecObject *self) 732{ 733 PyObject_GC_UnTrack(self); 734 PyTypeObject *tp = Py_TYPE(self); 735 tp->tp_free(self); 736 Py_DECREF(tp); 737} 738 739static PyType_Slot multibytecodec_slots[] = { 740 {Py_tp_dealloc, multibytecodec_dealloc}, 741 {Py_tp_getattro, PyObject_GenericGetAttr}, 742 {Py_tp_methods, multibytecodec_methods}, 743 {Py_tp_traverse, multibytecodec_traverse}, 744 {0, NULL}, 745}; 746 747static PyType_Spec multibytecodec_spec = { 748 .name = MODULE_NAME ".MultibyteCodec", 749 .basicsize = sizeof(MultibyteCodecObject), 750 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | 751 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE), 752 .slots = multibytecodec_slots, 753}; 754 755 756/** 757 * Utility functions for stateful codec mechanism 758 */ 759 760#define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o)) 761#define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o)) 762 763static PyObject * 764encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, 765 PyObject *unistr, int final) 766{ 767 PyObject *ucvt, *r = NULL; 768 PyObject *inbuf = NULL; 769 Py_ssize_t inpos, datalen; 770 PyObject *origpending = NULL; 771 772 if (PyUnicode_Check(unistr)) 773 ucvt = NULL; 774 else { 775 unistr = ucvt = PyObject_Str(unistr); 776 if (unistr == NULL) 777 return NULL; 778 else if (!PyUnicode_Check(unistr)) { 779 PyErr_SetString(PyExc_TypeError, 780 "couldn't convert the object to str."); 781 Py_DECREF(ucvt); 782 return NULL; 783 } 784 } 785 786 if (ctx->pending) { 787 PyObject *inbuf_tmp; 788 789 Py_INCREF(ctx->pending); 790 origpending = ctx->pending; 791 792 Py_INCREF(ctx->pending); 793 inbuf_tmp = ctx->pending; 794 PyUnicode_Append(&inbuf_tmp, unistr); 795 if (inbuf_tmp == NULL) 796 goto errorexit; 797 Py_CLEAR(ctx->pending); 798 inbuf = inbuf_tmp; 799 } 800 else { 801 origpending = NULL; 802 803 Py_INCREF(unistr); 804 inbuf = unistr; 805 } 806 if (PyUnicode_READY(inbuf) < 0) 807 goto errorexit; 808 inpos = 0; 809 datalen = PyUnicode_GET_LENGTH(inbuf); 810 811 r = multibytecodec_encode(ctx->codec, &ctx->state, 812 inbuf, &inpos, 813 ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0); 814 if (r == NULL) { 815 /* recover the original pending buffer */ 816 Py_XSETREF(ctx->pending, origpending); 817 origpending = NULL; 818 goto errorexit; 819 } 820 Py_XDECREF(origpending); 821 822 if (inpos < datalen) { 823 if (datalen - inpos > MAXENCPENDING) { 824 /* normal codecs can't reach here */ 825 PyErr_SetString(PyExc_UnicodeError, 826 "pending buffer overflow"); 827 goto errorexit; 828 } 829 ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen); 830 if (ctx->pending == NULL) { 831 /* normal codecs can't reach here */ 832 goto errorexit; 833 } 834 } 835 836 Py_DECREF(inbuf); 837 Py_XDECREF(ucvt); 838 return r; 839 840errorexit: 841 Py_XDECREF(r); 842 Py_XDECREF(ucvt); 843 Py_XDECREF(origpending); 844 Py_XDECREF(inbuf); 845 return NULL; 846} 847 848static int 849decoder_append_pending(MultibyteStatefulDecoderContext *ctx, 850 MultibyteDecodeBuffer *buf) 851{ 852 Py_ssize_t npendings; 853 854 npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); 855 if (npendings + ctx->pendingsize > MAXDECPENDING || 856 npendings > PY_SSIZE_T_MAX - ctx->pendingsize) { 857 PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); 858 return -1; 859 } 860 memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings); 861 ctx->pendingsize += npendings; 862 return 0; 863} 864 865static int 866decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data, 867 Py_ssize_t size) 868{ 869 buf->inbuf = buf->inbuf_top = (const unsigned char *)data; 870 buf->inbuf_end = buf->inbuf_top + size; 871 buf->writer.min_length += size; 872 return 0; 873} 874 875static int 876decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx, 877 MultibyteDecodeBuffer *buf) 878{ 879 while (buf->inbuf < buf->inbuf_end) { 880 Py_ssize_t inleft; 881 Py_ssize_t r; 882 883 inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); 884 885 r = ctx->codec->decode(&ctx->state, ctx->codec->config, 886 &buf->inbuf, inleft, &buf->writer); 887 if (r == 0 || r == MBERR_TOOFEW) 888 break; 889 else if (multibytecodec_decerror(ctx->codec, &ctx->state, 890 buf, ctx->errors, r)) 891 return -1; 892 } 893 return 0; 894} 895 896 897/*[clinic input] 898_multibytecodec.MultibyteIncrementalEncoder.encode 899 900 input: object 901 final: bool(accept={int}) = False 902[clinic start generated code]*/ 903 904static PyObject * 905_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self, 906 PyObject *input, 907 int final) 908/*[clinic end generated code: output=123361b6c505e2c1 input=093a1ddbb2fc6721]*/ 909{ 910 return encoder_encode_stateful(STATEFUL_ECTX(self), input, final); 911} 912 913/*[clinic input] 914_multibytecodec.MultibyteIncrementalEncoder.getstate 915[clinic start generated code]*/ 916 917static PyObject * 918_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self) 919/*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/ 920{ 921 /* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes 922 for UTF-8 encoded buffer (each character can use up to 4 923 bytes), and required bytes for MultibyteCodec_State.c. A byte 924 array is used to avoid different compilers generating different 925 values for the same state, e.g. as a result of struct padding. 926 */ 927 unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)]; 928 Py_ssize_t statesize; 929 const char *pendingbuffer = NULL; 930 Py_ssize_t pendingsize; 931 932 if (self->pending != NULL) { 933 pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize); 934 if (pendingbuffer == NULL) { 935 return NULL; 936 } 937 if (pendingsize > MAXENCPENDING*4) { 938 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large"); 939 return NULL; 940 } 941 statebytes[0] = (unsigned char)pendingsize; 942 memcpy(statebytes + 1, pendingbuffer, pendingsize); 943 statesize = 1 + pendingsize; 944 } else { 945 statebytes[0] = 0; 946 statesize = 1; 947 } 948 memcpy(statebytes+statesize, self->state.c, 949 sizeof(self->state.c)); 950 statesize += sizeof(self->state.c); 951 952 return (PyObject *)_PyLong_FromByteArray(statebytes, statesize, 953 1 /* little-endian */ , 954 0 /* unsigned */ ); 955} 956 957/*[clinic input] 958_multibytecodec.MultibyteIncrementalEncoder.setstate 959 state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type') 960 / 961[clinic start generated code]*/ 962 963static PyObject * 964_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self, 965 PyLongObject *statelong) 966/*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/ 967{ 968 PyObject *pending = NULL; 969 unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)]; 970 971 if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes), 972 1 /* little-endian */ , 973 0 /* unsigned */ ) < 0) { 974 goto errorexit; 975 } 976 977 if (statebytes[0] > MAXENCPENDING*4) { 978 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large"); 979 return NULL; 980 } 981 982 pending = PyUnicode_DecodeUTF8((const char *)statebytes+1, 983 statebytes[0], "strict"); 984 if (pending == NULL) { 985 goto errorexit; 986 } 987 988 Py_CLEAR(self->pending); 989 self->pending = pending; 990 memcpy(self->state.c, statebytes+1+statebytes[0], 991 sizeof(self->state.c)); 992 993 Py_RETURN_NONE; 994 995errorexit: 996 Py_XDECREF(pending); 997 return NULL; 998} 999 1000/*[clinic input] 1001_multibytecodec.MultibyteIncrementalEncoder.reset 1002[clinic start generated code]*/ 1003 1004static PyObject * 1005_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self) 1006/*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/ 1007{ 1008 /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */ 1009 unsigned char buffer[4], *outbuf; 1010 Py_ssize_t r; 1011 if (self->codec->encreset != NULL) { 1012 outbuf = buffer; 1013 r = self->codec->encreset(&self->state, self->codec->config, 1014 &outbuf, sizeof(buffer)); 1015 if (r != 0) 1016 return NULL; 1017 } 1018 Py_CLEAR(self->pending); 1019 Py_RETURN_NONE; 1020} 1021 1022static struct PyMethodDef mbiencoder_methods[] = { 1023 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF 1024 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF 1025 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF 1026 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF 1027 {NULL, NULL}, 1028}; 1029 1030static PyObject * 1031mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1032{ 1033 MultibyteIncrementalEncoderObject *self; 1034 PyObject *codec = NULL; 1035 char *errors = NULL; 1036 1037 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder", 1038 incnewkwarglist, &errors)) 1039 return NULL; 1040 1041 self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0); 1042 if (self == NULL) 1043 return NULL; 1044 1045 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 1046 if (codec == NULL) 1047 goto errorexit; 1048 1049 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type); 1050 if (!MultibyteCodec_Check(state, codec)) { 1051 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 1052 goto errorexit; 1053 } 1054 1055 self->codec = ((MultibyteCodecObject *)codec)->codec; 1056 self->pending = NULL; 1057 self->errors = internal_error_callback(errors); 1058 if (self->errors == NULL) 1059 goto errorexit; 1060 if (self->codec->encinit != NULL && 1061 self->codec->encinit(&self->state, self->codec->config) != 0) 1062 goto errorexit; 1063 1064 Py_DECREF(codec); 1065 return (PyObject *)self; 1066 1067errorexit: 1068 Py_XDECREF(self); 1069 Py_XDECREF(codec); 1070 return NULL; 1071} 1072 1073static int 1074mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds) 1075{ 1076 return 0; 1077} 1078 1079static int 1080mbiencoder_traverse(MultibyteIncrementalEncoderObject *self, 1081 visitproc visit, void *arg) 1082{ 1083 if (ERROR_ISCUSTOM(self->errors)) 1084 Py_VISIT(self->errors); 1085 return 0; 1086} 1087 1088static void 1089mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self) 1090{ 1091 PyTypeObject *tp = Py_TYPE(self); 1092 PyObject_GC_UnTrack(self); 1093 ERROR_DECREF(self->errors); 1094 Py_CLEAR(self->pending); 1095 tp->tp_free(self); 1096 Py_DECREF(tp); 1097} 1098 1099static PyType_Slot encoder_slots[] = { 1100 {Py_tp_dealloc, mbiencoder_dealloc}, 1101 {Py_tp_getattro, PyObject_GenericGetAttr}, 1102 {Py_tp_traverse, mbiencoder_traverse}, 1103 {Py_tp_methods, mbiencoder_methods}, 1104 {Py_tp_getset, codecctx_getsets}, 1105 {Py_tp_init, mbiencoder_init}, 1106 {Py_tp_new, mbiencoder_new}, 1107 {0, NULL}, 1108}; 1109 1110static PyType_Spec encoder_spec = { 1111 .name = MODULE_NAME ".MultibyteIncrementalEncoder", 1112 .basicsize = sizeof(MultibyteIncrementalEncoderObject), 1113 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE | 1114 Py_TPFLAGS_IMMUTABLETYPE), 1115 .slots = encoder_slots, 1116}; 1117 1118 1119/*[clinic input] 1120_multibytecodec.MultibyteIncrementalDecoder.decode 1121 1122 input: Py_buffer 1123 final: bool(accept={int}) = False 1124[clinic start generated code]*/ 1125 1126static PyObject * 1127_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self, 1128 Py_buffer *input, 1129 int final) 1130/*[clinic end generated code: output=b9b9090e8a9ce2ba input=c9132b24d503eb1d]*/ 1131{ 1132 MultibyteDecodeBuffer buf; 1133 char *data, *wdata = NULL; 1134 Py_ssize_t wsize, size, origpending; 1135 PyObject *res; 1136 1137 data = input->buf; 1138 size = input->len; 1139 1140 _PyUnicodeWriter_Init(&buf.writer); 1141 buf.excobj = NULL; 1142 origpending = self->pendingsize; 1143 1144 if (self->pendingsize == 0) { 1145 wsize = size; 1146 wdata = data; 1147 } 1148 else { 1149 if (size > PY_SSIZE_T_MAX - self->pendingsize) { 1150 PyErr_NoMemory(); 1151 goto errorexit; 1152 } 1153 wsize = size + self->pendingsize; 1154 wdata = PyMem_Malloc(wsize); 1155 if (wdata == NULL) { 1156 PyErr_NoMemory(); 1157 goto errorexit; 1158 } 1159 memcpy(wdata, self->pending, self->pendingsize); 1160 memcpy(wdata + self->pendingsize, data, size); 1161 self->pendingsize = 0; 1162 } 1163 1164 if (decoder_prepare_buffer(&buf, wdata, wsize) != 0) 1165 goto errorexit; 1166 1167 if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf)) 1168 goto errorexit; 1169 1170 if (final && buf.inbuf < buf.inbuf_end) { 1171 if (multibytecodec_decerror(self->codec, &self->state, 1172 &buf, self->errors, MBERR_TOOFEW)) { 1173 /* recover the original pending buffer */ 1174 memcpy(self->pending, wdata, origpending); 1175 self->pendingsize = origpending; 1176 goto errorexit; 1177 } 1178 } 1179 1180 if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */ 1181 if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0) 1182 goto errorexit; 1183 } 1184 1185 res = _PyUnicodeWriter_Finish(&buf.writer); 1186 if (res == NULL) 1187 goto errorexit; 1188 1189 if (wdata != data) 1190 PyMem_Free(wdata); 1191 Py_XDECREF(buf.excobj); 1192 return res; 1193 1194errorexit: 1195 if (wdata != NULL && wdata != data) 1196 PyMem_Free(wdata); 1197 Py_XDECREF(buf.excobj); 1198 _PyUnicodeWriter_Dealloc(&buf.writer); 1199 return NULL; 1200} 1201 1202/*[clinic input] 1203_multibytecodec.MultibyteIncrementalDecoder.getstate 1204[clinic start generated code]*/ 1205 1206static PyObject * 1207_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self) 1208/*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/ 1209{ 1210 PyObject *buffer; 1211 PyObject *statelong; 1212 1213 buffer = PyBytes_FromStringAndSize((const char *)self->pending, 1214 self->pendingsize); 1215 if (buffer == NULL) { 1216 return NULL; 1217 } 1218 1219 statelong = (PyObject *)_PyLong_FromByteArray(self->state.c, 1220 sizeof(self->state.c), 1221 1 /* little-endian */ , 1222 0 /* unsigned */ ); 1223 if (statelong == NULL) { 1224 Py_DECREF(buffer); 1225 return NULL; 1226 } 1227 1228 return Py_BuildValue("NN", buffer, statelong); 1229} 1230 1231/*[clinic input] 1232_multibytecodec.MultibyteIncrementalDecoder.setstate 1233 state: object(subclass_of='&PyTuple_Type') 1234 / 1235[clinic start generated code]*/ 1236 1237static PyObject * 1238_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self, 1239 PyObject *state) 1240/*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/ 1241{ 1242 PyObject *buffer; 1243 PyLongObject *statelong; 1244 Py_ssize_t buffersize; 1245 const char *bufferstr; 1246 unsigned char statebytes[8]; 1247 1248 if (!PyArg_ParseTuple(state, "SO!;setstate(): illegal state argument", 1249 &buffer, &PyLong_Type, &statelong)) 1250 { 1251 return NULL; 1252 } 1253 1254 if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes), 1255 1 /* little-endian */ , 1256 0 /* unsigned */ ) < 0) { 1257 return NULL; 1258 } 1259 1260 buffersize = PyBytes_Size(buffer); 1261 if (buffersize == -1) { 1262 return NULL; 1263 } 1264 1265 if (buffersize > MAXDECPENDING) { 1266 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large"); 1267 return NULL; 1268 } 1269 1270 bufferstr = PyBytes_AsString(buffer); 1271 if (bufferstr == NULL) { 1272 return NULL; 1273 } 1274 self->pendingsize = buffersize; 1275 memcpy(self->pending, bufferstr, self->pendingsize); 1276 memcpy(self->state.c, statebytes, sizeof(statebytes)); 1277 1278 Py_RETURN_NONE; 1279} 1280 1281/*[clinic input] 1282_multibytecodec.MultibyteIncrementalDecoder.reset 1283[clinic start generated code]*/ 1284 1285static PyObject * 1286_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self) 1287/*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/ 1288{ 1289 if (self->codec->decreset != NULL && 1290 self->codec->decreset(&self->state, self->codec->config) != 0) 1291 return NULL; 1292 self->pendingsize = 0; 1293 1294 Py_RETURN_NONE; 1295} 1296 1297static struct PyMethodDef mbidecoder_methods[] = { 1298 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF 1299 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF 1300 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF 1301 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF 1302 {NULL, NULL}, 1303}; 1304 1305static PyObject * 1306mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1307{ 1308 MultibyteIncrementalDecoderObject *self; 1309 PyObject *codec = NULL; 1310 char *errors = NULL; 1311 1312 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder", 1313 incnewkwarglist, &errors)) 1314 return NULL; 1315 1316 self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0); 1317 if (self == NULL) 1318 return NULL; 1319 1320 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 1321 if (codec == NULL) 1322 goto errorexit; 1323 1324 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type); 1325 if (!MultibyteCodec_Check(state, codec)) { 1326 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 1327 goto errorexit; 1328 } 1329 1330 self->codec = ((MultibyteCodecObject *)codec)->codec; 1331 self->pendingsize = 0; 1332 self->errors = internal_error_callback(errors); 1333 if (self->errors == NULL) 1334 goto errorexit; 1335 if (self->codec->decinit != NULL && 1336 self->codec->decinit(&self->state, self->codec->config) != 0) 1337 goto errorexit; 1338 1339 Py_DECREF(codec); 1340 return (PyObject *)self; 1341 1342errorexit: 1343 Py_XDECREF(self); 1344 Py_XDECREF(codec); 1345 return NULL; 1346} 1347 1348static int 1349mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds) 1350{ 1351 return 0; 1352} 1353 1354static int 1355mbidecoder_traverse(MultibyteIncrementalDecoderObject *self, 1356 visitproc visit, void *arg) 1357{ 1358 if (ERROR_ISCUSTOM(self->errors)) 1359 Py_VISIT(self->errors); 1360 return 0; 1361} 1362 1363static void 1364mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self) 1365{ 1366 PyTypeObject *tp = Py_TYPE(self); 1367 PyObject_GC_UnTrack(self); 1368 ERROR_DECREF(self->errors); 1369 tp->tp_free(self); 1370 Py_DECREF(tp); 1371} 1372 1373static PyType_Slot decoder_slots[] = { 1374 {Py_tp_dealloc, mbidecoder_dealloc}, 1375 {Py_tp_getattro, PyObject_GenericGetAttr}, 1376 {Py_tp_traverse, mbidecoder_traverse}, 1377 {Py_tp_methods, mbidecoder_methods}, 1378 {Py_tp_getset, codecctx_getsets}, 1379 {Py_tp_init, mbidecoder_init}, 1380 {Py_tp_new, mbidecoder_new}, 1381 {0, NULL}, 1382}; 1383 1384static PyType_Spec decoder_spec = { 1385 .name = MODULE_NAME ".MultibyteIncrementalDecoder", 1386 .basicsize = sizeof(MultibyteIncrementalDecoderObject), 1387 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE | 1388 Py_TPFLAGS_IMMUTABLETYPE), 1389 .slots = decoder_slots, 1390}; 1391 1392static PyObject * 1393mbstreamreader_iread(MultibyteStreamReaderObject *self, 1394 const char *method, Py_ssize_t sizehint) 1395{ 1396 MultibyteDecodeBuffer buf; 1397 PyObject *cres, *res; 1398 Py_ssize_t rsize; 1399 1400 if (sizehint == 0) 1401 return PyUnicode_New(0, 0); 1402 1403 _PyUnicodeWriter_Init(&buf.writer); 1404 buf.excobj = NULL; 1405 cres = NULL; 1406 1407 for (;;) { 1408 int endoffile; 1409 1410 if (sizehint < 0) 1411 cres = PyObject_CallMethod(self->stream, 1412 method, NULL); 1413 else 1414 cres = PyObject_CallMethod(self->stream, 1415 method, "i", sizehint); 1416 if (cres == NULL) 1417 goto errorexit; 1418 1419 if (!PyBytes_Check(cres)) { 1420 PyErr_Format(PyExc_TypeError, 1421 "stream function returned a " 1422 "non-bytes object (%.100s)", 1423 Py_TYPE(cres)->tp_name); 1424 goto errorexit; 1425 } 1426 1427 endoffile = (PyBytes_GET_SIZE(cres) == 0); 1428 1429 if (self->pendingsize > 0) { 1430 PyObject *ctr; 1431 char *ctrdata; 1432 1433 if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) { 1434 PyErr_NoMemory(); 1435 goto errorexit; 1436 } 1437 rsize = PyBytes_GET_SIZE(cres) + self->pendingsize; 1438 ctr = PyBytes_FromStringAndSize(NULL, rsize); 1439 if (ctr == NULL) 1440 goto errorexit; 1441 ctrdata = PyBytes_AS_STRING(ctr); 1442 memcpy(ctrdata, self->pending, self->pendingsize); 1443 memcpy(ctrdata + self->pendingsize, 1444 PyBytes_AS_STRING(cres), 1445 PyBytes_GET_SIZE(cres)); 1446 Py_DECREF(cres); 1447 cres = ctr; 1448 self->pendingsize = 0; 1449 } 1450 1451 rsize = PyBytes_GET_SIZE(cres); 1452 if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres), 1453 rsize) != 0) 1454 goto errorexit; 1455 1456 if (rsize > 0 && decoder_feed_buffer( 1457 (MultibyteStatefulDecoderContext *)self, &buf)) 1458 goto errorexit; 1459 1460 if (endoffile || sizehint < 0) { 1461 if (buf.inbuf < buf.inbuf_end && 1462 multibytecodec_decerror(self->codec, &self->state, 1463 &buf, self->errors, MBERR_TOOFEW)) 1464 goto errorexit; 1465 } 1466 1467 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */ 1468 if (decoder_append_pending(STATEFUL_DCTX(self), 1469 &buf) != 0) 1470 goto errorexit; 1471 } 1472 1473 Py_DECREF(cres); 1474 cres = NULL; 1475 1476 if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0) 1477 break; 1478 1479 sizehint = 1; /* read 1 more byte and retry */ 1480 } 1481 1482 res = _PyUnicodeWriter_Finish(&buf.writer); 1483 if (res == NULL) 1484 goto errorexit; 1485 1486 Py_XDECREF(cres); 1487 Py_XDECREF(buf.excobj); 1488 return res; 1489 1490errorexit: 1491 Py_XDECREF(cres); 1492 Py_XDECREF(buf.excobj); 1493 _PyUnicodeWriter_Dealloc(&buf.writer); 1494 return NULL; 1495} 1496 1497/*[clinic input] 1498 _multibytecodec.MultibyteStreamReader.read 1499 1500 sizeobj: object = None 1501 / 1502[clinic start generated code]*/ 1503 1504static PyObject * 1505_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self, 1506 PyObject *sizeobj) 1507/*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/ 1508{ 1509 Py_ssize_t size; 1510 1511 if (sizeobj == Py_None) 1512 size = -1; 1513 else if (PyLong_Check(sizeobj)) 1514 size = PyLong_AsSsize_t(sizeobj); 1515 else { 1516 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); 1517 return NULL; 1518 } 1519 1520 if (size == -1 && PyErr_Occurred()) 1521 return NULL; 1522 1523 return mbstreamreader_iread(self, "read", size); 1524} 1525 1526/*[clinic input] 1527 _multibytecodec.MultibyteStreamReader.readline 1528 1529 sizeobj: object = None 1530 / 1531[clinic start generated code]*/ 1532 1533static PyObject * 1534_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self, 1535 PyObject *sizeobj) 1536/*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/ 1537{ 1538 Py_ssize_t size; 1539 1540 if (sizeobj == Py_None) 1541 size = -1; 1542 else if (PyLong_Check(sizeobj)) 1543 size = PyLong_AsSsize_t(sizeobj); 1544 else { 1545 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); 1546 return NULL; 1547 } 1548 1549 if (size == -1 && PyErr_Occurred()) 1550 return NULL; 1551 1552 return mbstreamreader_iread(self, "readline", size); 1553} 1554 1555/*[clinic input] 1556 _multibytecodec.MultibyteStreamReader.readlines 1557 1558 sizehintobj: object = None 1559 / 1560[clinic start generated code]*/ 1561 1562static PyObject * 1563_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self, 1564 PyObject *sizehintobj) 1565/*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/ 1566{ 1567 PyObject *r, *sr; 1568 Py_ssize_t sizehint; 1569 1570 if (sizehintobj == Py_None) 1571 sizehint = -1; 1572 else if (PyLong_Check(sizehintobj)) 1573 sizehint = PyLong_AsSsize_t(sizehintobj); 1574 else { 1575 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); 1576 return NULL; 1577 } 1578 1579 if (sizehint == -1 && PyErr_Occurred()) 1580 return NULL; 1581 1582 r = mbstreamreader_iread(self, "read", sizehint); 1583 if (r == NULL) 1584 return NULL; 1585 1586 sr = PyUnicode_Splitlines(r, 1); 1587 Py_DECREF(r); 1588 return sr; 1589} 1590 1591/*[clinic input] 1592 _multibytecodec.MultibyteStreamReader.reset 1593[clinic start generated code]*/ 1594 1595static PyObject * 1596_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self) 1597/*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/ 1598{ 1599 if (self->codec->decreset != NULL && 1600 self->codec->decreset(&self->state, self->codec->config) != 0) 1601 return NULL; 1602 self->pendingsize = 0; 1603 1604 Py_RETURN_NONE; 1605} 1606 1607static struct PyMethodDef mbstreamreader_methods[] = { 1608 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF 1609 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF 1610 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF 1611 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF 1612 {NULL, NULL}, 1613}; 1614 1615static PyMemberDef mbstreamreader_members[] = { 1616 {"stream", T_OBJECT, 1617 offsetof(MultibyteStreamReaderObject, stream), 1618 READONLY, NULL}, 1619 {NULL,} 1620}; 1621 1622static PyObject * 1623mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1624{ 1625 MultibyteStreamReaderObject *self; 1626 PyObject *stream, *codec = NULL; 1627 char *errors = NULL; 1628 1629 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader", 1630 streamkwarglist, &stream, &errors)) 1631 return NULL; 1632 1633 self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0); 1634 if (self == NULL) 1635 return NULL; 1636 1637 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 1638 if (codec == NULL) 1639 goto errorexit; 1640 1641 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type); 1642 if (!MultibyteCodec_Check(state, codec)) { 1643 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 1644 goto errorexit; 1645 } 1646 1647 self->codec = ((MultibyteCodecObject *)codec)->codec; 1648 self->stream = stream; 1649 Py_INCREF(stream); 1650 self->pendingsize = 0; 1651 self->errors = internal_error_callback(errors); 1652 if (self->errors == NULL) 1653 goto errorexit; 1654 if (self->codec->decinit != NULL && 1655 self->codec->decinit(&self->state, self->codec->config) != 0) 1656 goto errorexit; 1657 1658 Py_DECREF(codec); 1659 return (PyObject *)self; 1660 1661errorexit: 1662 Py_XDECREF(self); 1663 Py_XDECREF(codec); 1664 return NULL; 1665} 1666 1667static int 1668mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds) 1669{ 1670 return 0; 1671} 1672 1673static int 1674mbstreamreader_traverse(MultibyteStreamReaderObject *self, 1675 visitproc visit, void *arg) 1676{ 1677 if (ERROR_ISCUSTOM(self->errors)) 1678 Py_VISIT(self->errors); 1679 Py_VISIT(self->stream); 1680 return 0; 1681} 1682 1683static void 1684mbstreamreader_dealloc(MultibyteStreamReaderObject *self) 1685{ 1686 PyTypeObject *tp = Py_TYPE(self); 1687 PyObject_GC_UnTrack(self); 1688 ERROR_DECREF(self->errors); 1689 Py_XDECREF(self->stream); 1690 tp->tp_free(self); 1691 Py_DECREF(tp); 1692} 1693 1694static PyType_Slot reader_slots[] = { 1695 {Py_tp_dealloc, mbstreamreader_dealloc}, 1696 {Py_tp_getattro, PyObject_GenericGetAttr}, 1697 {Py_tp_traverse, mbstreamreader_traverse}, 1698 {Py_tp_methods, mbstreamreader_methods}, 1699 {Py_tp_members, mbstreamreader_members}, 1700 {Py_tp_getset, codecctx_getsets}, 1701 {Py_tp_init, mbstreamreader_init}, 1702 {Py_tp_new, mbstreamreader_new}, 1703 {0, NULL}, 1704}; 1705 1706static PyType_Spec reader_spec = { 1707 .name = MODULE_NAME ".MultibyteStreamReader", 1708 .basicsize = sizeof(MultibyteStreamReaderObject), 1709 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE | 1710 Py_TPFLAGS_IMMUTABLETYPE), 1711 .slots = reader_slots, 1712}; 1713 1714static int 1715mbstreamwriter_iwrite(MultibyteStreamWriterObject *self, 1716 PyObject *unistr, PyObject *str_write) 1717{ 1718 PyObject *str, *wr; 1719 1720 str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0); 1721 if (str == NULL) 1722 return -1; 1723 1724 wr = _PyObject_CallMethodOneArg(self->stream, str_write, str); 1725 Py_DECREF(str); 1726 if (wr == NULL) 1727 return -1; 1728 1729 Py_DECREF(wr); 1730 return 0; 1731} 1732 1733/*[clinic input] 1734 _multibytecodec.MultibyteStreamWriter.write 1735 1736 cls: defining_class 1737 strobj: object 1738 / 1739[clinic start generated code]*/ 1740 1741static PyObject * 1742_multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject *self, 1743 PyTypeObject *cls, 1744 PyObject *strobj) 1745/*[clinic end generated code: output=68ade3aea26410ac input=199f26f68bd8425a]*/ 1746{ 1747 _multibytecodec_state *state = PyType_GetModuleState(cls); 1748 assert(state != NULL); 1749 if (mbstreamwriter_iwrite(self, strobj, state->str_write)) { 1750 return NULL; 1751 } 1752 Py_RETURN_NONE; 1753} 1754 1755/*[clinic input] 1756 _multibytecodec.MultibyteStreamWriter.writelines 1757 1758 cls: defining_class 1759 lines: object 1760 / 1761[clinic start generated code]*/ 1762 1763static PyObject * 1764_multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObject *self, 1765 PyTypeObject *cls, 1766 PyObject *lines) 1767/*[clinic end generated code: output=b4c99d2cf23ffb88 input=a6d5fe7c74972a34]*/ 1768{ 1769 PyObject *strobj; 1770 int i, r; 1771 1772 if (!PySequence_Check(lines)) { 1773 PyErr_SetString(PyExc_TypeError, 1774 "arg must be a sequence object"); 1775 return NULL; 1776 } 1777 1778 _multibytecodec_state *state = PyType_GetModuleState(cls); 1779 assert(state != NULL); 1780 for (i = 0; i < PySequence_Length(lines); i++) { 1781 /* length can be changed even within this loop */ 1782 strobj = PySequence_GetItem(lines, i); 1783 if (strobj == NULL) 1784 return NULL; 1785 1786 r = mbstreamwriter_iwrite(self, strobj, state->str_write); 1787 Py_DECREF(strobj); 1788 if (r == -1) 1789 return NULL; 1790 } 1791 /* PySequence_Length() can fail */ 1792 if (PyErr_Occurred()) 1793 return NULL; 1794 1795 Py_RETURN_NONE; 1796} 1797 1798/*[clinic input] 1799 _multibytecodec.MultibyteStreamWriter.reset 1800 1801 cls: defining_class 1802 / 1803 1804[clinic start generated code]*/ 1805 1806static PyObject * 1807_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self, 1808 PyTypeObject *cls) 1809/*[clinic end generated code: output=32ef224c2a38aa3d input=28af6a9cd38d1979]*/ 1810{ 1811 PyObject *pwrt; 1812 1813 if (!self->pending) 1814 Py_RETURN_NONE; 1815 1816 pwrt = multibytecodec_encode(self->codec, &self->state, 1817 self->pending, NULL, self->errors, 1818 MBENC_FLUSH | MBENC_RESET); 1819 /* some pending buffer can be truncated when UnicodeEncodeError is 1820 * raised on 'strict' mode. but, 'reset' method is designed to 1821 * reset the pending buffer or states so failed string sequence 1822 * ought to be missed */ 1823 Py_CLEAR(self->pending); 1824 if (pwrt == NULL) 1825 return NULL; 1826 1827 assert(PyBytes_Check(pwrt)); 1828 1829 _multibytecodec_state *state = PyType_GetModuleState(cls); 1830 assert(state != NULL); 1831 1832 if (PyBytes_Size(pwrt) > 0) { 1833 PyObject *wr; 1834 1835 wr = _PyObject_CallMethodOneArg(self->stream, state->str_write, pwrt); 1836 if (wr == NULL) { 1837 Py_DECREF(pwrt); 1838 return NULL; 1839 } 1840 } 1841 Py_DECREF(pwrt); 1842 1843 Py_RETURN_NONE; 1844} 1845 1846static PyObject * 1847mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1848{ 1849 MultibyteStreamWriterObject *self; 1850 PyObject *stream, *codec = NULL; 1851 char *errors = NULL; 1852 1853 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter", 1854 streamkwarglist, &stream, &errors)) 1855 return NULL; 1856 1857 self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0); 1858 if (self == NULL) 1859 return NULL; 1860 1861 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 1862 if (codec == NULL) 1863 goto errorexit; 1864 1865 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type); 1866 if (!MultibyteCodec_Check(state, codec)) { 1867 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 1868 goto errorexit; 1869 } 1870 1871 self->codec = ((MultibyteCodecObject *)codec)->codec; 1872 self->stream = stream; 1873 Py_INCREF(stream); 1874 self->pending = NULL; 1875 self->errors = internal_error_callback(errors); 1876 if (self->errors == NULL) 1877 goto errorexit; 1878 if (self->codec->encinit != NULL && 1879 self->codec->encinit(&self->state, self->codec->config) != 0) 1880 goto errorexit; 1881 1882 Py_DECREF(codec); 1883 return (PyObject *)self; 1884 1885errorexit: 1886 Py_XDECREF(self); 1887 Py_XDECREF(codec); 1888 return NULL; 1889} 1890 1891static int 1892mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds) 1893{ 1894 return 0; 1895} 1896 1897static int 1898mbstreamwriter_traverse(MultibyteStreamWriterObject *self, 1899 visitproc visit, void *arg) 1900{ 1901 if (ERROR_ISCUSTOM(self->errors)) 1902 Py_VISIT(self->errors); 1903 Py_VISIT(self->stream); 1904 return 0; 1905} 1906 1907static void 1908mbstreamwriter_dealloc(MultibyteStreamWriterObject *self) 1909{ 1910 PyTypeObject *tp = Py_TYPE(self); 1911 PyObject_GC_UnTrack(self); 1912 ERROR_DECREF(self->errors); 1913 Py_XDECREF(self->stream); 1914 tp->tp_free(self); 1915 Py_DECREF(tp); 1916} 1917 1918static struct PyMethodDef mbstreamwriter_methods[] = { 1919 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF 1920 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF 1921 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF 1922 {NULL, NULL}, 1923}; 1924 1925static PyMemberDef mbstreamwriter_members[] = { 1926 {"stream", T_OBJECT, 1927 offsetof(MultibyteStreamWriterObject, stream), 1928 READONLY, NULL}, 1929 {NULL,} 1930}; 1931 1932static PyType_Slot writer_slots[] = { 1933 {Py_tp_dealloc, mbstreamwriter_dealloc}, 1934 {Py_tp_getattro, PyObject_GenericGetAttr}, 1935 {Py_tp_traverse, mbstreamwriter_traverse}, 1936 {Py_tp_methods, mbstreamwriter_methods}, 1937 {Py_tp_members, mbstreamwriter_members}, 1938 {Py_tp_getset, codecctx_getsets}, 1939 {Py_tp_init, mbstreamwriter_init}, 1940 {Py_tp_new, mbstreamwriter_new}, 1941 {0, NULL}, 1942}; 1943 1944static PyType_Spec writer_spec = { 1945 .name = MODULE_NAME ".MultibyteStreamWriter", 1946 .basicsize = sizeof(MultibyteStreamWriterObject), 1947 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE | 1948 Py_TPFLAGS_IMMUTABLETYPE), 1949 .slots = writer_slots, 1950}; 1951 1952 1953/*[clinic input] 1954_multibytecodec.__create_codec 1955 1956 arg: object 1957 / 1958[clinic start generated code]*/ 1959 1960static PyObject * 1961_multibytecodec___create_codec(PyObject *module, PyObject *arg) 1962/*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/ 1963{ 1964 MultibyteCodecObject *self; 1965 MultibyteCodec *codec; 1966 1967 if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) { 1968 PyErr_SetString(PyExc_ValueError, "argument type invalid"); 1969 return NULL; 1970 } 1971 1972 codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME); 1973 if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0) 1974 return NULL; 1975 1976 _multibytecodec_state *state = _multibytecodec_get_state(module); 1977 self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type); 1978 if (self == NULL) 1979 return NULL; 1980 self->codec = codec; 1981 1982 PyObject_GC_Track(self); 1983 return (PyObject *)self; 1984} 1985 1986static int 1987_multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg) 1988{ 1989 _multibytecodec_state *state = _multibytecodec_get_state(mod); 1990 Py_VISIT(state->multibytecodec_type); 1991 Py_VISIT(state->encoder_type); 1992 Py_VISIT(state->decoder_type); 1993 Py_VISIT(state->reader_type); 1994 Py_VISIT(state->writer_type); 1995 return 0; 1996} 1997 1998static int 1999_multibytecodec_clear(PyObject *mod) 2000{ 2001 _multibytecodec_state *state = _multibytecodec_get_state(mod); 2002 Py_CLEAR(state->multibytecodec_type); 2003 Py_CLEAR(state->encoder_type); 2004 Py_CLEAR(state->decoder_type); 2005 Py_CLEAR(state->reader_type); 2006 Py_CLEAR(state->writer_type); 2007 Py_CLEAR(state->str_write); 2008 return 0; 2009} 2010 2011static void 2012_multibytecodec_free(void *mod) 2013{ 2014 _multibytecodec_clear((PyObject *)mod); 2015} 2016 2017#define CREATE_TYPE(module, type, spec) \ 2018 do { \ 2019 type = (PyTypeObject *)PyType_FromModuleAndSpec(module, spec, NULL); \ 2020 if (!type) { \ 2021 return -1; \ 2022 } \ 2023 } while (0) 2024 2025#define ADD_TYPE(module, type) \ 2026 do { \ 2027 if (PyModule_AddType(module, type) < 0) { \ 2028 return -1; \ 2029 } \ 2030 } while (0) 2031 2032static int 2033_multibytecodec_exec(PyObject *mod) 2034{ 2035 _multibytecodec_state *state = _multibytecodec_get_state(mod); 2036 state->str_write = PyUnicode_InternFromString("write"); 2037 if (state->str_write == NULL) { 2038 return -1; 2039 } 2040 CREATE_TYPE(mod, state->multibytecodec_type, &multibytecodec_spec); 2041 CREATE_TYPE(mod, state->encoder_type, &encoder_spec); 2042 CREATE_TYPE(mod, state->decoder_type, &decoder_spec); 2043 CREATE_TYPE(mod, state->reader_type, &reader_spec); 2044 CREATE_TYPE(mod, state->writer_type, &writer_spec); 2045 2046 ADD_TYPE(mod, state->encoder_type); 2047 ADD_TYPE(mod, state->decoder_type); 2048 ADD_TYPE(mod, state->reader_type); 2049 ADD_TYPE(mod, state->writer_type); 2050 return 0; 2051} 2052 2053#undef CREATE_TYPE 2054#undef ADD_TYPE 2055 2056static struct PyMethodDef _multibytecodec_methods[] = { 2057 _MULTIBYTECODEC___CREATE_CODEC_METHODDEF 2058 {NULL, NULL}, 2059}; 2060 2061static PyModuleDef_Slot _multibytecodec_slots[] = { 2062 {Py_mod_exec, _multibytecodec_exec}, 2063 {0, NULL} 2064}; 2065 2066static struct PyModuleDef _multibytecodecmodule = { 2067 .m_base = PyModuleDef_HEAD_INIT, 2068 .m_name = "_multibytecodec", 2069 .m_size = sizeof(_multibytecodec_state), 2070 .m_methods = _multibytecodec_methods, 2071 .m_slots = _multibytecodec_slots, 2072 .m_traverse = _multibytecodec_traverse, 2073 .m_clear = _multibytecodec_clear, 2074 .m_free = _multibytecodec_free, 2075}; 2076 2077PyMODINIT_FUNC 2078PyInit__multibytecodec(void) 2079{ 2080 return PyModuleDef_Init(&_multibytecodecmodule); 2081} 2082