1/* ------------------------------------------------------------------------ 2 3 _codecs -- Provides access to the codec registry and the builtin 4 codecs. 5 6 This module should never be imported directly. The standard library 7 module "codecs" wraps this builtin module for use within Python. 8 9 The codec registry is accessible via: 10 11 register(search_function) -> None 12 13 lookup(encoding) -> CodecInfo object 14 15 The builtin Unicode codecs use the following interface: 16 17 <encoding>_encode(Unicode_object[,errors='strict']) -> 18 (string object, bytes consumed) 19 20 <encoding>_decode(char_buffer_obj[,errors='strict']) -> 21 (Unicode object, bytes consumed) 22 23 These <encoding>s are available: utf_8, unicode_escape, 24 raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32). 25 26 27Written by Marc-Andre Lemburg (mal@lemburg.com). 28 29Copyright (c) Corporation for National Research Initiatives. 30 31 ------------------------------------------------------------------------ */ 32 33#define PY_SSIZE_T_CLEAN 34#include "Python.h" 35 36#ifdef MS_WINDOWS 37#include <windows.h> 38#endif 39 40/*[clinic input] 41module _codecs 42[clinic start generated code]*/ 43/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/ 44 45#include "clinic/_codecsmodule.c.h" 46 47/* --- Registry ----------------------------------------------------------- */ 48 49/*[clinic input] 50_codecs.register 51 search_function: object 52 / 53 54Register a codec search function. 55 56Search functions are expected to take one argument, the encoding name in 57all lower case letters, and either return None, or a tuple of functions 58(encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object). 59[clinic start generated code]*/ 60 61static PyObject * 62_codecs_register(PyObject *module, PyObject *search_function) 63/*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/ 64{ 65 if (PyCodec_Register(search_function)) 66 return NULL; 67 68 Py_RETURN_NONE; 69} 70 71/*[clinic input] 72_codecs.unregister 73 search_function: object 74 / 75 76Unregister a codec search function and clear the registry's cache. 77 78If the search function is not registered, do nothing. 79[clinic start generated code]*/ 80 81static PyObject * 82_codecs_unregister(PyObject *module, PyObject *search_function) 83/*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/ 84{ 85 if (PyCodec_Unregister(search_function) < 0) { 86 return NULL; 87 } 88 89 Py_RETURN_NONE; 90} 91 92/*[clinic input] 93_codecs.lookup 94 encoding: str 95 / 96 97Looks up a codec tuple in the Python codec registry and returns a CodecInfo object. 98[clinic start generated code]*/ 99 100static PyObject * 101_codecs_lookup_impl(PyObject *module, const char *encoding) 102/*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/ 103{ 104 return _PyCodec_Lookup(encoding); 105} 106 107/*[clinic input] 108_codecs.encode 109 obj: object 110 encoding: str(c_default="NULL") = "utf-8" 111 errors: str(c_default="NULL") = "strict" 112 113Encodes obj using the codec registered for encoding. 114 115The default encoding is 'utf-8'. errors may be given to set a 116different error handling scheme. Default is 'strict' meaning that encoding 117errors raise a ValueError. Other possible values are 'ignore', 'replace' 118and 'backslashreplace' as well as any other name registered with 119codecs.register_error that can handle ValueErrors. 120[clinic start generated code]*/ 121 122static PyObject * 123_codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding, 124 const char *errors) 125/*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/ 126{ 127 if (encoding == NULL) 128 encoding = PyUnicode_GetDefaultEncoding(); 129 130 /* Encode via the codec registry */ 131 return PyCodec_Encode(obj, encoding, errors); 132} 133 134/*[clinic input] 135_codecs.decode 136 obj: object 137 encoding: str(c_default="NULL") = "utf-8" 138 errors: str(c_default="NULL") = "strict" 139 140Decodes obj using the codec registered for encoding. 141 142Default encoding is 'utf-8'. errors may be given to set a 143different error handling scheme. Default is 'strict' meaning that encoding 144errors raise a ValueError. Other possible values are 'ignore', 'replace' 145and 'backslashreplace' as well as any other name registered with 146codecs.register_error that can handle ValueErrors. 147[clinic start generated code]*/ 148 149static PyObject * 150_codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding, 151 const char *errors) 152/*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/ 153{ 154 if (encoding == NULL) 155 encoding = PyUnicode_GetDefaultEncoding(); 156 157 /* Decode via the codec registry */ 158 return PyCodec_Decode(obj, encoding, errors); 159} 160 161/* --- Helpers ------------------------------------------------------------ */ 162 163static 164PyObject *codec_tuple(PyObject *decoded, 165 Py_ssize_t len) 166{ 167 if (decoded == NULL) 168 return NULL; 169 return Py_BuildValue("Nn", decoded, len); 170} 171 172/* --- String codecs ------------------------------------------------------ */ 173/*[clinic input] 174_codecs.escape_decode 175 data: Py_buffer(accept={str, buffer}) 176 errors: str(accept={str, NoneType}) = None 177 / 178[clinic start generated code]*/ 179 180static PyObject * 181_codecs_escape_decode_impl(PyObject *module, Py_buffer *data, 182 const char *errors) 183/*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/ 184{ 185 PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len, 186 errors, 0, NULL); 187 return codec_tuple(decoded, data->len); 188} 189 190/*[clinic input] 191_codecs.escape_encode 192 data: object(subclass_of='&PyBytes_Type') 193 errors: str(accept={str, NoneType}) = None 194 / 195[clinic start generated code]*/ 196 197static PyObject * 198_codecs_escape_encode_impl(PyObject *module, PyObject *data, 199 const char *errors) 200/*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/ 201{ 202 Py_ssize_t size; 203 Py_ssize_t newsize; 204 PyObject *v; 205 206 size = PyBytes_GET_SIZE(data); 207 if (size > PY_SSIZE_T_MAX / 4) { 208 PyErr_SetString(PyExc_OverflowError, 209 "string is too large to encode"); 210 return NULL; 211 } 212 newsize = 4*size; 213 v = PyBytes_FromStringAndSize(NULL, newsize); 214 215 if (v == NULL) { 216 return NULL; 217 } 218 else { 219 Py_ssize_t i; 220 char c; 221 char *p = PyBytes_AS_STRING(v); 222 223 for (i = 0; i < size; i++) { 224 /* There's at least enough room for a hex escape */ 225 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4); 226 c = PyBytes_AS_STRING(data)[i]; 227 if (c == '\'' || c == '\\') 228 *p++ = '\\', *p++ = c; 229 else if (c == '\t') 230 *p++ = '\\', *p++ = 't'; 231 else if (c == '\n') 232 *p++ = '\\', *p++ = 'n'; 233 else if (c == '\r') 234 *p++ = '\\', *p++ = 'r'; 235 else if (c < ' ' || c >= 0x7f) { 236 *p++ = '\\'; 237 *p++ = 'x'; 238 *p++ = Py_hexdigits[(c & 0xf0) >> 4]; 239 *p++ = Py_hexdigits[c & 0xf]; 240 } 241 else 242 *p++ = c; 243 } 244 *p = '\0'; 245 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) { 246 return NULL; 247 } 248 } 249 250 return codec_tuple(v, size); 251} 252 253/* --- Decoder ------------------------------------------------------------ */ 254/*[clinic input] 255_codecs.utf_7_decode 256 data: Py_buffer 257 errors: str(accept={str, NoneType}) = None 258 final: bool(accept={int}) = False 259 / 260[clinic start generated code]*/ 261 262static PyObject * 263_codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data, 264 const char *errors, int final) 265/*[clinic end generated code: output=0cd3a944a32a4089 input=22c395d357815d26]*/ 266{ 267 Py_ssize_t consumed = data->len; 268 PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len, 269 errors, 270 final ? NULL : &consumed); 271 return codec_tuple(decoded, consumed); 272} 273 274/*[clinic input] 275_codecs.utf_8_decode 276 data: Py_buffer 277 errors: str(accept={str, NoneType}) = None 278 final: bool(accept={int}) = False 279 / 280[clinic start generated code]*/ 281 282static PyObject * 283_codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data, 284 const char *errors, int final) 285/*[clinic end generated code: output=10f74dec8d9bb8bf input=f611b3867352ba59]*/ 286{ 287 Py_ssize_t consumed = data->len; 288 PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len, 289 errors, 290 final ? NULL : &consumed); 291 return codec_tuple(decoded, consumed); 292} 293 294/*[clinic input] 295_codecs.utf_16_decode 296 data: Py_buffer 297 errors: str(accept={str, NoneType}) = None 298 final: bool(accept={int}) = False 299 / 300[clinic start generated code]*/ 301 302static PyObject * 303_codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data, 304 const char *errors, int final) 305/*[clinic end generated code: output=783b442abcbcc2d0 input=191d360bd7309180]*/ 306{ 307 int byteorder = 0; 308 /* This is overwritten unless final is true. */ 309 Py_ssize_t consumed = data->len; 310 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, 311 errors, &byteorder, 312 final ? NULL : &consumed); 313 return codec_tuple(decoded, consumed); 314} 315 316/*[clinic input] 317_codecs.utf_16_le_decode 318 data: Py_buffer 319 errors: str(accept={str, NoneType}) = None 320 final: bool(accept={int}) = False 321 / 322[clinic start generated code]*/ 323 324static PyObject * 325_codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data, 326 const char *errors, int final) 327/*[clinic end generated code: output=899b9e6364379dcd input=c6904fdc27fb4724]*/ 328{ 329 int byteorder = -1; 330 /* This is overwritten unless final is true. */ 331 Py_ssize_t consumed = data->len; 332 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, 333 errors, &byteorder, 334 final ? NULL : &consumed); 335 return codec_tuple(decoded, consumed); 336} 337 338/*[clinic input] 339_codecs.utf_16_be_decode 340 data: Py_buffer 341 errors: str(accept={str, NoneType}) = None 342 final: bool(accept={int}) = False 343 / 344[clinic start generated code]*/ 345 346static PyObject * 347_codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data, 348 const char *errors, int final) 349/*[clinic end generated code: output=49f6465ea07669c8 input=e49012400974649b]*/ 350{ 351 int byteorder = 1; 352 /* This is overwritten unless final is true. */ 353 Py_ssize_t consumed = data->len; 354 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, 355 errors, &byteorder, 356 final ? NULL : &consumed); 357 return codec_tuple(decoded, consumed); 358} 359 360/* This non-standard version also provides access to the byteorder 361 parameter of the builtin UTF-16 codec. 362 363 It returns a tuple (unicode, bytesread, byteorder) with byteorder 364 being the value in effect at the end of data. 365 366*/ 367/*[clinic input] 368_codecs.utf_16_ex_decode 369 data: Py_buffer 370 errors: str(accept={str, NoneType}) = None 371 byteorder: int = 0 372 final: bool(accept={int}) = False 373 / 374[clinic start generated code]*/ 375 376static PyObject * 377_codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data, 378 const char *errors, int byteorder, int final) 379/*[clinic end generated code: output=0f385f251ecc1988 input=5a9c19f2e6b6cf0e]*/ 380{ 381 /* This is overwritten unless final is true. */ 382 Py_ssize_t consumed = data->len; 383 384 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, 385 errors, &byteorder, 386 final ? NULL : &consumed); 387 if (decoded == NULL) 388 return NULL; 389 return Py_BuildValue("Nni", decoded, consumed, byteorder); 390} 391 392/*[clinic input] 393_codecs.utf_32_decode 394 data: Py_buffer 395 errors: str(accept={str, NoneType}) = None 396 final: bool(accept={int}) = False 397 / 398[clinic start generated code]*/ 399 400static PyObject * 401_codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data, 402 const char *errors, int final) 403/*[clinic end generated code: output=2fc961807f7b145f input=fd7193965627eb58]*/ 404{ 405 int byteorder = 0; 406 /* This is overwritten unless final is true. */ 407 Py_ssize_t consumed = data->len; 408 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, 409 errors, &byteorder, 410 final ? NULL : &consumed); 411 return codec_tuple(decoded, consumed); 412} 413 414/*[clinic input] 415_codecs.utf_32_le_decode 416 data: Py_buffer 417 errors: str(accept={str, NoneType}) = None 418 final: bool(accept={int}) = False 419 / 420[clinic start generated code]*/ 421 422static PyObject * 423_codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data, 424 const char *errors, int final) 425/*[clinic end generated code: output=ec8f46b67a94f3e6 input=9078ec70acfe7613]*/ 426{ 427 int byteorder = -1; 428 /* This is overwritten unless final is true. */ 429 Py_ssize_t consumed = data->len; 430 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, 431 errors, &byteorder, 432 final ? NULL : &consumed); 433 return codec_tuple(decoded, consumed); 434} 435 436/*[clinic input] 437_codecs.utf_32_be_decode 438 data: Py_buffer 439 errors: str(accept={str, NoneType}) = None 440 final: bool(accept={int}) = False 441 / 442[clinic start generated code]*/ 443 444static PyObject * 445_codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data, 446 const char *errors, int final) 447/*[clinic end generated code: output=ff82bae862c92c4e input=f1ae1bbbb86648ff]*/ 448{ 449 int byteorder = 1; 450 /* This is overwritten unless final is true. */ 451 Py_ssize_t consumed = data->len; 452 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, 453 errors, &byteorder, 454 final ? NULL : &consumed); 455 return codec_tuple(decoded, consumed); 456} 457 458/* This non-standard version also provides access to the byteorder 459 parameter of the builtin UTF-32 codec. 460 461 It returns a tuple (unicode, bytesread, byteorder) with byteorder 462 being the value in effect at the end of data. 463 464*/ 465/*[clinic input] 466_codecs.utf_32_ex_decode 467 data: Py_buffer 468 errors: str(accept={str, NoneType}) = None 469 byteorder: int = 0 470 final: bool(accept={int}) = False 471 / 472[clinic start generated code]*/ 473 474static PyObject * 475_codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data, 476 const char *errors, int byteorder, int final) 477/*[clinic end generated code: output=6bfb177dceaf4848 input=e46a73bc859d0bd0]*/ 478{ 479 Py_ssize_t consumed = data->len; 480 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, 481 errors, &byteorder, 482 final ? NULL : &consumed); 483 if (decoded == NULL) 484 return NULL; 485 return Py_BuildValue("Nni", decoded, consumed, byteorder); 486} 487 488/*[clinic input] 489_codecs.unicode_escape_decode 490 data: Py_buffer(accept={str, buffer}) 491 errors: str(accept={str, NoneType}) = None 492 final: bool(accept={int}) = True 493 / 494[clinic start generated code]*/ 495 496static PyObject * 497_codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data, 498 const char *errors, int final) 499/*[clinic end generated code: output=b284f97b12c635ee input=6154f039a9f7c639]*/ 500{ 501 Py_ssize_t consumed = data->len; 502 PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len, 503 errors, 504 final ? NULL : &consumed); 505 return codec_tuple(decoded, consumed); 506} 507 508/*[clinic input] 509_codecs.raw_unicode_escape_decode 510 data: Py_buffer(accept={str, buffer}) 511 errors: str(accept={str, NoneType}) = None 512 final: bool(accept={int}) = True 513 / 514[clinic start generated code]*/ 515 516static PyObject * 517_codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data, 518 const char *errors, int final) 519/*[clinic end generated code: output=11dbd96301e2879e input=2d166191beb3235a]*/ 520{ 521 Py_ssize_t consumed = data->len; 522 PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len, 523 errors, 524 final ? NULL : &consumed); 525 return codec_tuple(decoded, consumed); 526} 527 528/*[clinic input] 529_codecs.latin_1_decode 530 data: Py_buffer 531 errors: str(accept={str, NoneType}) = None 532 / 533[clinic start generated code]*/ 534 535static PyObject * 536_codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data, 537 const char *errors) 538/*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/ 539{ 540 PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors); 541 return codec_tuple(decoded, data->len); 542} 543 544/*[clinic input] 545_codecs.ascii_decode 546 data: Py_buffer 547 errors: str(accept={str, NoneType}) = None 548 / 549[clinic start generated code]*/ 550 551static PyObject * 552_codecs_ascii_decode_impl(PyObject *module, Py_buffer *data, 553 const char *errors) 554/*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/ 555{ 556 PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors); 557 return codec_tuple(decoded, data->len); 558} 559 560/*[clinic input] 561_codecs.charmap_decode 562 data: Py_buffer 563 errors: str(accept={str, NoneType}) = None 564 mapping: object = None 565 / 566[clinic start generated code]*/ 567 568static PyObject * 569_codecs_charmap_decode_impl(PyObject *module, Py_buffer *data, 570 const char *errors, PyObject *mapping) 571/*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/ 572{ 573 PyObject *decoded; 574 575 if (mapping == Py_None) 576 mapping = NULL; 577 578 decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors); 579 return codec_tuple(decoded, data->len); 580} 581 582#ifdef MS_WINDOWS 583 584/*[clinic input] 585_codecs.mbcs_decode 586 data: Py_buffer 587 errors: str(accept={str, NoneType}) = None 588 final: bool(accept={int}) = False 589 / 590[clinic start generated code]*/ 591 592static PyObject * 593_codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data, 594 const char *errors, int final) 595/*[clinic end generated code: output=39b65b8598938c4b input=1c1d50f08fa53789]*/ 596{ 597 Py_ssize_t consumed = data->len; 598 PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len, 599 errors, final ? NULL : &consumed); 600 return codec_tuple(decoded, consumed); 601} 602 603/*[clinic input] 604_codecs.oem_decode 605 data: Py_buffer 606 errors: str(accept={str, NoneType}) = None 607 final: bool(accept={int}) = False 608 / 609[clinic start generated code]*/ 610 611static PyObject * 612_codecs_oem_decode_impl(PyObject *module, Py_buffer *data, 613 const char *errors, int final) 614/*[clinic end generated code: output=da1617612f3fcad8 input=81b67cba811022e5]*/ 615{ 616 Py_ssize_t consumed = data->len; 617 PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP, 618 data->buf, data->len, errors, final ? NULL : &consumed); 619 return codec_tuple(decoded, consumed); 620} 621 622/*[clinic input] 623_codecs.code_page_decode 624 codepage: int 625 data: Py_buffer 626 errors: str(accept={str, NoneType}) = None 627 final: bool(accept={int}) = False 628 / 629[clinic start generated code]*/ 630 631static PyObject * 632_codecs_code_page_decode_impl(PyObject *module, int codepage, 633 Py_buffer *data, const char *errors, int final) 634/*[clinic end generated code: output=53008ea967da3fff input=c5f58d036cb63575]*/ 635{ 636 Py_ssize_t consumed = data->len; 637 PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage, 638 data->buf, data->len, 639 errors, 640 final ? NULL : &consumed); 641 return codec_tuple(decoded, consumed); 642} 643 644#endif /* MS_WINDOWS */ 645 646/* --- Encoder ------------------------------------------------------------ */ 647 648/*[clinic input] 649_codecs.readbuffer_encode 650 data: Py_buffer(accept={str, buffer}) 651 errors: str(accept={str, NoneType}) = None 652 / 653[clinic start generated code]*/ 654 655static PyObject * 656_codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data, 657 const char *errors) 658/*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/ 659{ 660 PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len); 661 return codec_tuple(result, data->len); 662} 663 664/*[clinic input] 665_codecs.utf_7_encode 666 str: unicode 667 errors: str(accept={str, NoneType}) = None 668 / 669[clinic start generated code]*/ 670 671static PyObject * 672_codecs_utf_7_encode_impl(PyObject *module, PyObject *str, 673 const char *errors) 674/*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/ 675{ 676 return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors), 677 PyUnicode_GET_LENGTH(str)); 678} 679 680/*[clinic input] 681_codecs.utf_8_encode 682 str: unicode 683 errors: str(accept={str, NoneType}) = None 684 / 685[clinic start generated code]*/ 686 687static PyObject * 688_codecs_utf_8_encode_impl(PyObject *module, PyObject *str, 689 const char *errors) 690/*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/ 691{ 692 return codec_tuple(_PyUnicode_AsUTF8String(str, errors), 693 PyUnicode_GET_LENGTH(str)); 694} 695 696/* This version provides access to the byteorder parameter of the 697 builtin UTF-16 codecs as optional third argument. It defaults to 0 698 which means: use the native byte order and prepend the data with a 699 BOM mark. 700 701*/ 702 703/*[clinic input] 704_codecs.utf_16_encode 705 str: unicode 706 errors: str(accept={str, NoneType}) = None 707 byteorder: int = 0 708 / 709[clinic start generated code]*/ 710 711static PyObject * 712_codecs_utf_16_encode_impl(PyObject *module, PyObject *str, 713 const char *errors, int byteorder) 714/*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/ 715{ 716 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder), 717 PyUnicode_GET_LENGTH(str)); 718} 719 720/*[clinic input] 721_codecs.utf_16_le_encode 722 str: unicode 723 errors: str(accept={str, NoneType}) = None 724 / 725[clinic start generated code]*/ 726 727static PyObject * 728_codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str, 729 const char *errors) 730/*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/ 731{ 732 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1), 733 PyUnicode_GET_LENGTH(str)); 734} 735 736/*[clinic input] 737_codecs.utf_16_be_encode 738 str: unicode 739 errors: str(accept={str, NoneType}) = None 740 / 741[clinic start generated code]*/ 742 743static PyObject * 744_codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str, 745 const char *errors) 746/*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/ 747{ 748 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1), 749 PyUnicode_GET_LENGTH(str)); 750} 751 752/* This version provides access to the byteorder parameter of the 753 builtin UTF-32 codecs as optional third argument. It defaults to 0 754 which means: use the native byte order and prepend the data with a 755 BOM mark. 756 757*/ 758 759/*[clinic input] 760_codecs.utf_32_encode 761 str: unicode 762 errors: str(accept={str, NoneType}) = None 763 byteorder: int = 0 764 / 765[clinic start generated code]*/ 766 767static PyObject * 768_codecs_utf_32_encode_impl(PyObject *module, PyObject *str, 769 const char *errors, int byteorder) 770/*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/ 771{ 772 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder), 773 PyUnicode_GET_LENGTH(str)); 774} 775 776/*[clinic input] 777_codecs.utf_32_le_encode 778 str: unicode 779 errors: str(accept={str, NoneType}) = None 780 / 781[clinic start generated code]*/ 782 783static PyObject * 784_codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str, 785 const char *errors) 786/*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/ 787{ 788 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1), 789 PyUnicode_GET_LENGTH(str)); 790} 791 792/*[clinic input] 793_codecs.utf_32_be_encode 794 str: unicode 795 errors: str(accept={str, NoneType}) = None 796 / 797[clinic start generated code]*/ 798 799static PyObject * 800_codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str, 801 const char *errors) 802/*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/ 803{ 804 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1), 805 PyUnicode_GET_LENGTH(str)); 806} 807 808/*[clinic input] 809_codecs.unicode_escape_encode 810 str: unicode 811 errors: str(accept={str, NoneType}) = None 812 / 813[clinic start generated code]*/ 814 815static PyObject * 816_codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str, 817 const char *errors) 818/*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/ 819{ 820 return codec_tuple(PyUnicode_AsUnicodeEscapeString(str), 821 PyUnicode_GET_LENGTH(str)); 822} 823 824/*[clinic input] 825_codecs.raw_unicode_escape_encode 826 str: unicode 827 errors: str(accept={str, NoneType}) = None 828 / 829[clinic start generated code]*/ 830 831static PyObject * 832_codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str, 833 const char *errors) 834/*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/ 835{ 836 return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str), 837 PyUnicode_GET_LENGTH(str)); 838} 839 840/*[clinic input] 841_codecs.latin_1_encode 842 str: unicode 843 errors: str(accept={str, NoneType}) = None 844 / 845[clinic start generated code]*/ 846 847static PyObject * 848_codecs_latin_1_encode_impl(PyObject *module, PyObject *str, 849 const char *errors) 850/*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/ 851{ 852 return codec_tuple(_PyUnicode_AsLatin1String(str, errors), 853 PyUnicode_GET_LENGTH(str)); 854} 855 856/*[clinic input] 857_codecs.ascii_encode 858 str: unicode 859 errors: str(accept={str, NoneType}) = None 860 / 861[clinic start generated code]*/ 862 863static PyObject * 864_codecs_ascii_encode_impl(PyObject *module, PyObject *str, 865 const char *errors) 866/*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/ 867{ 868 return codec_tuple(_PyUnicode_AsASCIIString(str, errors), 869 PyUnicode_GET_LENGTH(str)); 870} 871 872/*[clinic input] 873_codecs.charmap_encode 874 str: unicode 875 errors: str(accept={str, NoneType}) = None 876 mapping: object = None 877 / 878[clinic start generated code]*/ 879 880static PyObject * 881_codecs_charmap_encode_impl(PyObject *module, PyObject *str, 882 const char *errors, PyObject *mapping) 883/*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/ 884{ 885 if (mapping == Py_None) 886 mapping = NULL; 887 888 return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors), 889 PyUnicode_GET_LENGTH(str)); 890} 891 892/*[clinic input] 893_codecs.charmap_build 894 map: unicode 895 / 896[clinic start generated code]*/ 897 898static PyObject * 899_codecs_charmap_build_impl(PyObject *module, PyObject *map) 900/*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/ 901{ 902 return PyUnicode_BuildEncodingMap(map); 903} 904 905#ifdef MS_WINDOWS 906 907/*[clinic input] 908_codecs.mbcs_encode 909 str: unicode 910 errors: str(accept={str, NoneType}) = None 911 / 912[clinic start generated code]*/ 913 914static PyObject * 915_codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors) 916/*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/ 917{ 918 return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors), 919 PyUnicode_GET_LENGTH(str)); 920} 921 922/*[clinic input] 923_codecs.oem_encode 924 str: unicode 925 errors: str(accept={str, NoneType}) = None 926 / 927[clinic start generated code]*/ 928 929static PyObject * 930_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors) 931/*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/ 932{ 933 return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors), 934 PyUnicode_GET_LENGTH(str)); 935} 936 937/*[clinic input] 938_codecs.code_page_encode 939 code_page: int 940 str: unicode 941 errors: str(accept={str, NoneType}) = None 942 / 943[clinic start generated code]*/ 944 945static PyObject * 946_codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str, 947 const char *errors) 948/*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/ 949{ 950 return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors), 951 PyUnicode_GET_LENGTH(str)); 952} 953 954#endif /* MS_WINDOWS */ 955 956/* --- Error handler registry --------------------------------------------- */ 957 958/*[clinic input] 959_codecs.register_error 960 errors: str 961 handler: object 962 / 963 964Register the specified error handler under the name errors. 965 966handler must be a callable object, that will be called with an exception 967instance containing information about the location of the encoding/decoding 968error and must return a (replacement, new position) tuple. 969[clinic start generated code]*/ 970 971static PyObject * 972_codecs_register_error_impl(PyObject *module, const char *errors, 973 PyObject *handler) 974/*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/ 975{ 976 if (PyCodec_RegisterError(errors, handler)) 977 return NULL; 978 Py_RETURN_NONE; 979} 980 981/*[clinic input] 982_codecs.lookup_error 983 name: str 984 / 985 986lookup_error(errors) -> handler 987 988Return the error handler for the specified error handling name or raise a 989LookupError, if no handler exists under this name. 990[clinic start generated code]*/ 991 992static PyObject * 993_codecs_lookup_error_impl(PyObject *module, const char *name) 994/*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/ 995{ 996 return PyCodec_LookupError(name); 997} 998 999/* --- Module API --------------------------------------------------------- */ 1000 1001static PyMethodDef _codecs_functions[] = { 1002 _CODECS_REGISTER_METHODDEF 1003 _CODECS_UNREGISTER_METHODDEF 1004 _CODECS_LOOKUP_METHODDEF 1005 _CODECS_ENCODE_METHODDEF 1006 _CODECS_DECODE_METHODDEF 1007 _CODECS_ESCAPE_ENCODE_METHODDEF 1008 _CODECS_ESCAPE_DECODE_METHODDEF 1009 _CODECS_UTF_8_ENCODE_METHODDEF 1010 _CODECS_UTF_8_DECODE_METHODDEF 1011 _CODECS_UTF_7_ENCODE_METHODDEF 1012 _CODECS_UTF_7_DECODE_METHODDEF 1013 _CODECS_UTF_16_ENCODE_METHODDEF 1014 _CODECS_UTF_16_LE_ENCODE_METHODDEF 1015 _CODECS_UTF_16_BE_ENCODE_METHODDEF 1016 _CODECS_UTF_16_DECODE_METHODDEF 1017 _CODECS_UTF_16_LE_DECODE_METHODDEF 1018 _CODECS_UTF_16_BE_DECODE_METHODDEF 1019 _CODECS_UTF_16_EX_DECODE_METHODDEF 1020 _CODECS_UTF_32_ENCODE_METHODDEF 1021 _CODECS_UTF_32_LE_ENCODE_METHODDEF 1022 _CODECS_UTF_32_BE_ENCODE_METHODDEF 1023 _CODECS_UTF_32_DECODE_METHODDEF 1024 _CODECS_UTF_32_LE_DECODE_METHODDEF 1025 _CODECS_UTF_32_BE_DECODE_METHODDEF 1026 _CODECS_UTF_32_EX_DECODE_METHODDEF 1027 _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF 1028 _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF 1029 _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF 1030 _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF 1031 _CODECS_LATIN_1_ENCODE_METHODDEF 1032 _CODECS_LATIN_1_DECODE_METHODDEF 1033 _CODECS_ASCII_ENCODE_METHODDEF 1034 _CODECS_ASCII_DECODE_METHODDEF 1035 _CODECS_CHARMAP_ENCODE_METHODDEF 1036 _CODECS_CHARMAP_DECODE_METHODDEF 1037 _CODECS_CHARMAP_BUILD_METHODDEF 1038 _CODECS_READBUFFER_ENCODE_METHODDEF 1039 _CODECS_MBCS_ENCODE_METHODDEF 1040 _CODECS_MBCS_DECODE_METHODDEF 1041 _CODECS_OEM_ENCODE_METHODDEF 1042 _CODECS_OEM_DECODE_METHODDEF 1043 _CODECS_CODE_PAGE_ENCODE_METHODDEF 1044 _CODECS_CODE_PAGE_DECODE_METHODDEF 1045 _CODECS_REGISTER_ERROR_METHODDEF 1046 _CODECS_LOOKUP_ERROR_METHODDEF 1047 {NULL, NULL} /* sentinel */ 1048}; 1049 1050static PyModuleDef_Slot _codecs_slots[] = { 1051 {0, NULL} 1052}; 1053 1054static struct PyModuleDef codecsmodule = { 1055 PyModuleDef_HEAD_INIT, 1056 "_codecs", 1057 NULL, 1058 0, 1059 _codecs_functions, 1060 _codecs_slots, 1061 NULL, 1062 NULL, 1063 NULL 1064}; 1065 1066PyMODINIT_FUNC 1067PyInit__codecs(void) 1068{ 1069 return PyModuleDef_Init(&codecsmodule); 1070} 1071