1/* ------------------------------------------------------------------------
2
3   _codecs -- Provides access to the codec registry and the builtin
4              codecs.
5
6   This module should never be imported directly. The standard library
7   module "codecs" wraps this builtin module for use within Python.
8
9   The codec registry is accessible via:
10
11     register(search_function) -> None
12
13     lookup(encoding) -> CodecInfo object
14
15   The builtin Unicode codecs use the following interface:
16
17     <encoding>_encode(Unicode_object[,errors='strict']) ->
18        (string object, bytes consumed)
19
20     <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21        (Unicode object, bytes consumed)
22
23   These <encoding>s are available: utf_8, unicode_escape,
24   raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
25
26
27Written by Marc-Andre Lemburg (mal@lemburg.com).
28
29Copyright (c) Corporation for National Research Initiatives.
30
31   ------------------------------------------------------------------------ */
32
33#define PY_SSIZE_T_CLEAN
34#include "Python.h"
35
36#ifdef MS_WINDOWS
37#include <windows.h>
38#endif
39
40/*[clinic input]
41module _codecs
42[clinic start generated code]*/
43/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
44
45#include "clinic/_codecsmodule.c.h"
46
47/* --- Registry ----------------------------------------------------------- */
48
49/*[clinic input]
50_codecs.register
51    search_function: object
52    /
53
54Register a codec search function.
55
56Search functions are expected to take one argument, the encoding name in
57all lower case letters, and either return None, or a tuple of functions
58(encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
59[clinic start generated code]*/
60
61static PyObject *
62_codecs_register(PyObject *module, PyObject *search_function)
63/*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
64{
65    if (PyCodec_Register(search_function))
66        return NULL;
67
68    Py_RETURN_NONE;
69}
70
71/*[clinic input]
72_codecs.unregister
73    search_function: object
74    /
75
76Unregister a codec search function and clear the registry's cache.
77
78If the search function is not registered, do nothing.
79[clinic start generated code]*/
80
81static PyObject *
82_codecs_unregister(PyObject *module, PyObject *search_function)
83/*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/
84{
85    if (PyCodec_Unregister(search_function) < 0) {
86        return NULL;
87    }
88
89    Py_RETURN_NONE;
90}
91
92/*[clinic input]
93_codecs.lookup
94    encoding: str
95    /
96
97Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
98[clinic start generated code]*/
99
100static PyObject *
101_codecs_lookup_impl(PyObject *module, const char *encoding)
102/*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
103{
104    return _PyCodec_Lookup(encoding);
105}
106
107/*[clinic input]
108_codecs.encode
109    obj: object
110    encoding: str(c_default="NULL") = "utf-8"
111    errors: str(c_default="NULL") = "strict"
112
113Encodes obj using the codec registered for encoding.
114
115The default encoding is 'utf-8'.  errors may be given to set a
116different error handling scheme.  Default is 'strict' meaning that encoding
117errors raise a ValueError.  Other possible values are 'ignore', 'replace'
118and 'backslashreplace' as well as any other name registered with
119codecs.register_error that can handle ValueErrors.
120[clinic start generated code]*/
121
122static PyObject *
123_codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
124                    const char *errors)
125/*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
126{
127    if (encoding == NULL)
128        encoding = PyUnicode_GetDefaultEncoding();
129
130    /* Encode via the codec registry */
131    return PyCodec_Encode(obj, encoding, errors);
132}
133
134/*[clinic input]
135_codecs.decode
136    obj: object
137    encoding: str(c_default="NULL") = "utf-8"
138    errors: str(c_default="NULL") = "strict"
139
140Decodes obj using the codec registered for encoding.
141
142Default encoding is 'utf-8'.  errors may be given to set a
143different error handling scheme.  Default is 'strict' meaning that encoding
144errors raise a ValueError.  Other possible values are 'ignore', 'replace'
145and 'backslashreplace' as well as any other name registered with
146codecs.register_error that can handle ValueErrors.
147[clinic start generated code]*/
148
149static PyObject *
150_codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
151                    const char *errors)
152/*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
153{
154    if (encoding == NULL)
155        encoding = PyUnicode_GetDefaultEncoding();
156
157    /* Decode via the codec registry */
158    return PyCodec_Decode(obj, encoding, errors);
159}
160
161/* --- Helpers ------------------------------------------------------------ */
162
163static
164PyObject *codec_tuple(PyObject *decoded,
165                      Py_ssize_t len)
166{
167    if (decoded == NULL)
168        return NULL;
169    return Py_BuildValue("Nn", decoded, len);
170}
171
172/* --- String codecs ------------------------------------------------------ */
173/*[clinic input]
174_codecs.escape_decode
175    data: Py_buffer(accept={str, buffer})
176    errors: str(accept={str, NoneType}) = None
177    /
178[clinic start generated code]*/
179
180static PyObject *
181_codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
182                           const char *errors)
183/*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
184{
185    PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
186                                             errors, 0, NULL);
187    return codec_tuple(decoded, data->len);
188}
189
190/*[clinic input]
191_codecs.escape_encode
192    data: object(subclass_of='&PyBytes_Type')
193    errors: str(accept={str, NoneType}) = None
194    /
195[clinic start generated code]*/
196
197static PyObject *
198_codecs_escape_encode_impl(PyObject *module, PyObject *data,
199                           const char *errors)
200/*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
201{
202    Py_ssize_t size;
203    Py_ssize_t newsize;
204    PyObject *v;
205
206    size = PyBytes_GET_SIZE(data);
207    if (size > PY_SSIZE_T_MAX / 4) {
208        PyErr_SetString(PyExc_OverflowError,
209            "string is too large to encode");
210            return NULL;
211    }
212    newsize = 4*size;
213    v = PyBytes_FromStringAndSize(NULL, newsize);
214
215    if (v == NULL) {
216        return NULL;
217    }
218    else {
219        Py_ssize_t i;
220        char c;
221        char *p = PyBytes_AS_STRING(v);
222
223        for (i = 0; i < size; i++) {
224            /* There's at least enough room for a hex escape */
225            assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
226            c = PyBytes_AS_STRING(data)[i];
227            if (c == '\'' || c == '\\')
228                *p++ = '\\', *p++ = c;
229            else if (c == '\t')
230                *p++ = '\\', *p++ = 't';
231            else if (c == '\n')
232                *p++ = '\\', *p++ = 'n';
233            else if (c == '\r')
234                *p++ = '\\', *p++ = 'r';
235            else if (c < ' ' || c >= 0x7f) {
236                *p++ = '\\';
237                *p++ = 'x';
238                *p++ = Py_hexdigits[(c & 0xf0) >> 4];
239                *p++ = Py_hexdigits[c & 0xf];
240            }
241            else
242                *p++ = c;
243        }
244        *p = '\0';
245        if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
246            return NULL;
247        }
248    }
249
250    return codec_tuple(v, size);
251}
252
253/* --- Decoder ------------------------------------------------------------ */
254/*[clinic input]
255_codecs.utf_7_decode
256    data: Py_buffer
257    errors: str(accept={str, NoneType}) = None
258    final: bool(accept={int}) = False
259    /
260[clinic start generated code]*/
261
262static PyObject *
263_codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
264                          const char *errors, int final)
265/*[clinic end generated code: output=0cd3a944a32a4089 input=22c395d357815d26]*/
266{
267    Py_ssize_t consumed = data->len;
268    PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
269                                                     errors,
270                                                     final ? NULL : &consumed);
271    return codec_tuple(decoded, consumed);
272}
273
274/*[clinic input]
275_codecs.utf_8_decode
276    data: Py_buffer
277    errors: str(accept={str, NoneType}) = None
278    final: bool(accept={int}) = False
279    /
280[clinic start generated code]*/
281
282static PyObject *
283_codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
284                          const char *errors, int final)
285/*[clinic end generated code: output=10f74dec8d9bb8bf input=f611b3867352ba59]*/
286{
287    Py_ssize_t consumed = data->len;
288    PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
289                                                     errors,
290                                                     final ? NULL : &consumed);
291    return codec_tuple(decoded, consumed);
292}
293
294/*[clinic input]
295_codecs.utf_16_decode
296    data: Py_buffer
297    errors: str(accept={str, NoneType}) = None
298    final: bool(accept={int}) = False
299    /
300[clinic start generated code]*/
301
302static PyObject *
303_codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
304                           const char *errors, int final)
305/*[clinic end generated code: output=783b442abcbcc2d0 input=191d360bd7309180]*/
306{
307    int byteorder = 0;
308    /* This is overwritten unless final is true. */
309    Py_ssize_t consumed = data->len;
310    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
311                                                      errors, &byteorder,
312                                                      final ? NULL : &consumed);
313    return codec_tuple(decoded, consumed);
314}
315
316/*[clinic input]
317_codecs.utf_16_le_decode
318    data: Py_buffer
319    errors: str(accept={str, NoneType}) = None
320    final: bool(accept={int}) = False
321    /
322[clinic start generated code]*/
323
324static PyObject *
325_codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
326                              const char *errors, int final)
327/*[clinic end generated code: output=899b9e6364379dcd input=c6904fdc27fb4724]*/
328{
329    int byteorder = -1;
330    /* This is overwritten unless final is true. */
331    Py_ssize_t consumed = data->len;
332    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
333                                                      errors, &byteorder,
334                                                      final ? NULL : &consumed);
335    return codec_tuple(decoded, consumed);
336}
337
338/*[clinic input]
339_codecs.utf_16_be_decode
340    data: Py_buffer
341    errors: str(accept={str, NoneType}) = None
342    final: bool(accept={int}) = False
343    /
344[clinic start generated code]*/
345
346static PyObject *
347_codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
348                              const char *errors, int final)
349/*[clinic end generated code: output=49f6465ea07669c8 input=e49012400974649b]*/
350{
351    int byteorder = 1;
352    /* This is overwritten unless final is true. */
353    Py_ssize_t consumed = data->len;
354    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
355                                                      errors, &byteorder,
356                                                      final ? NULL : &consumed);
357    return codec_tuple(decoded, consumed);
358}
359
360/* This non-standard version also provides access to the byteorder
361   parameter of the builtin UTF-16 codec.
362
363   It returns a tuple (unicode, bytesread, byteorder) with byteorder
364   being the value in effect at the end of data.
365
366*/
367/*[clinic input]
368_codecs.utf_16_ex_decode
369    data: Py_buffer
370    errors: str(accept={str, NoneType}) = None
371    byteorder: int = 0
372    final: bool(accept={int}) = False
373    /
374[clinic start generated code]*/
375
376static PyObject *
377_codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
378                              const char *errors, int byteorder, int final)
379/*[clinic end generated code: output=0f385f251ecc1988 input=5a9c19f2e6b6cf0e]*/
380{
381    /* This is overwritten unless final is true. */
382    Py_ssize_t consumed = data->len;
383
384    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
385                                                      errors, &byteorder,
386                                                      final ? NULL : &consumed);
387    if (decoded == NULL)
388        return NULL;
389    return Py_BuildValue("Nni", decoded, consumed, byteorder);
390}
391
392/*[clinic input]
393_codecs.utf_32_decode
394    data: Py_buffer
395    errors: str(accept={str, NoneType}) = None
396    final: bool(accept={int}) = False
397    /
398[clinic start generated code]*/
399
400static PyObject *
401_codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
402                           const char *errors, int final)
403/*[clinic end generated code: output=2fc961807f7b145f input=fd7193965627eb58]*/
404{
405    int byteorder = 0;
406    /* This is overwritten unless final is true. */
407    Py_ssize_t consumed = data->len;
408    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
409                                                      errors, &byteorder,
410                                                      final ? NULL : &consumed);
411    return codec_tuple(decoded, consumed);
412}
413
414/*[clinic input]
415_codecs.utf_32_le_decode
416    data: Py_buffer
417    errors: str(accept={str, NoneType}) = None
418    final: bool(accept={int}) = False
419    /
420[clinic start generated code]*/
421
422static PyObject *
423_codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
424                              const char *errors, int final)
425/*[clinic end generated code: output=ec8f46b67a94f3e6 input=9078ec70acfe7613]*/
426{
427    int byteorder = -1;
428    /* This is overwritten unless final is true. */
429    Py_ssize_t consumed = data->len;
430    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
431                                                      errors, &byteorder,
432                                                      final ? NULL : &consumed);
433    return codec_tuple(decoded, consumed);
434}
435
436/*[clinic input]
437_codecs.utf_32_be_decode
438    data: Py_buffer
439    errors: str(accept={str, NoneType}) = None
440    final: bool(accept={int}) = False
441    /
442[clinic start generated code]*/
443
444static PyObject *
445_codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
446                              const char *errors, int final)
447/*[clinic end generated code: output=ff82bae862c92c4e input=f1ae1bbbb86648ff]*/
448{
449    int byteorder = 1;
450    /* This is overwritten unless final is true. */
451    Py_ssize_t consumed = data->len;
452    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
453                                                      errors, &byteorder,
454                                                      final ? NULL : &consumed);
455    return codec_tuple(decoded, consumed);
456}
457
458/* This non-standard version also provides access to the byteorder
459   parameter of the builtin UTF-32 codec.
460
461   It returns a tuple (unicode, bytesread, byteorder) with byteorder
462   being the value in effect at the end of data.
463
464*/
465/*[clinic input]
466_codecs.utf_32_ex_decode
467    data: Py_buffer
468    errors: str(accept={str, NoneType}) = None
469    byteorder: int = 0
470    final: bool(accept={int}) = False
471    /
472[clinic start generated code]*/
473
474static PyObject *
475_codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
476                              const char *errors, int byteorder, int final)
477/*[clinic end generated code: output=6bfb177dceaf4848 input=e46a73bc859d0bd0]*/
478{
479    Py_ssize_t consumed = data->len;
480    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
481                                                      errors, &byteorder,
482                                                      final ? NULL : &consumed);
483    if (decoded == NULL)
484        return NULL;
485    return Py_BuildValue("Nni", decoded, consumed, byteorder);
486}
487
488/*[clinic input]
489_codecs.unicode_escape_decode
490    data: Py_buffer(accept={str, buffer})
491    errors: str(accept={str, NoneType}) = None
492    final: bool(accept={int}) = True
493    /
494[clinic start generated code]*/
495
496static PyObject *
497_codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
498                                   const char *errors, int final)
499/*[clinic end generated code: output=b284f97b12c635ee input=6154f039a9f7c639]*/
500{
501    Py_ssize_t consumed = data->len;
502    PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len,
503                                                               errors,
504                                                               final ? NULL : &consumed);
505    return codec_tuple(decoded, consumed);
506}
507
508/*[clinic input]
509_codecs.raw_unicode_escape_decode
510    data: Py_buffer(accept={str, buffer})
511    errors: str(accept={str, NoneType}) = None
512    final: bool(accept={int}) = True
513    /
514[clinic start generated code]*/
515
516static PyObject *
517_codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
518                                       const char *errors, int final)
519/*[clinic end generated code: output=11dbd96301e2879e input=2d166191beb3235a]*/
520{
521    Py_ssize_t consumed = data->len;
522    PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len,
523                                                                  errors,
524                                                                  final ? NULL : &consumed);
525    return codec_tuple(decoded, consumed);
526}
527
528/*[clinic input]
529_codecs.latin_1_decode
530    data: Py_buffer
531    errors: str(accept={str, NoneType}) = None
532    /
533[clinic start generated code]*/
534
535static PyObject *
536_codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
537                            const char *errors)
538/*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
539{
540    PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
541    return codec_tuple(decoded, data->len);
542}
543
544/*[clinic input]
545_codecs.ascii_decode
546    data: Py_buffer
547    errors: str(accept={str, NoneType}) = None
548    /
549[clinic start generated code]*/
550
551static PyObject *
552_codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
553                          const char *errors)
554/*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
555{
556    PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
557    return codec_tuple(decoded, data->len);
558}
559
560/*[clinic input]
561_codecs.charmap_decode
562    data: Py_buffer
563    errors: str(accept={str, NoneType}) = None
564    mapping: object = None
565    /
566[clinic start generated code]*/
567
568static PyObject *
569_codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
570                            const char *errors, PyObject *mapping)
571/*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
572{
573    PyObject *decoded;
574
575    if (mapping == Py_None)
576        mapping = NULL;
577
578    decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
579    return codec_tuple(decoded, data->len);
580}
581
582#ifdef MS_WINDOWS
583
584/*[clinic input]
585_codecs.mbcs_decode
586    data: Py_buffer
587    errors: str(accept={str, NoneType}) = None
588    final: bool(accept={int}) = False
589    /
590[clinic start generated code]*/
591
592static PyObject *
593_codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
594                         const char *errors, int final)
595/*[clinic end generated code: output=39b65b8598938c4b input=1c1d50f08fa53789]*/
596{
597    Py_ssize_t consumed = data->len;
598    PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
599            errors, final ? NULL : &consumed);
600    return codec_tuple(decoded, consumed);
601}
602
603/*[clinic input]
604_codecs.oem_decode
605    data: Py_buffer
606    errors: str(accept={str, NoneType}) = None
607    final: bool(accept={int}) = False
608    /
609[clinic start generated code]*/
610
611static PyObject *
612_codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
613                        const char *errors, int final)
614/*[clinic end generated code: output=da1617612f3fcad8 input=81b67cba811022e5]*/
615{
616    Py_ssize_t consumed = data->len;
617    PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
618        data->buf, data->len, errors, final ? NULL : &consumed);
619    return codec_tuple(decoded, consumed);
620}
621
622/*[clinic input]
623_codecs.code_page_decode
624    codepage: int
625    data: Py_buffer
626    errors: str(accept={str, NoneType}) = None
627    final: bool(accept={int}) = False
628    /
629[clinic start generated code]*/
630
631static PyObject *
632_codecs_code_page_decode_impl(PyObject *module, int codepage,
633                              Py_buffer *data, const char *errors, int final)
634/*[clinic end generated code: output=53008ea967da3fff input=c5f58d036cb63575]*/
635{
636    Py_ssize_t consumed = data->len;
637    PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
638                                                         data->buf, data->len,
639                                                         errors,
640                                                         final ? NULL : &consumed);
641    return codec_tuple(decoded, consumed);
642}
643
644#endif /* MS_WINDOWS */
645
646/* --- Encoder ------------------------------------------------------------ */
647
648/*[clinic input]
649_codecs.readbuffer_encode
650    data: Py_buffer(accept={str, buffer})
651    errors: str(accept={str, NoneType}) = None
652    /
653[clinic start generated code]*/
654
655static PyObject *
656_codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
657                               const char *errors)
658/*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
659{
660    PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
661    return codec_tuple(result, data->len);
662}
663
664/*[clinic input]
665_codecs.utf_7_encode
666    str: unicode
667    errors: str(accept={str, NoneType}) = None
668    /
669[clinic start generated code]*/
670
671static PyObject *
672_codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
673                          const char *errors)
674/*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
675{
676    return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
677                       PyUnicode_GET_LENGTH(str));
678}
679
680/*[clinic input]
681_codecs.utf_8_encode
682    str: unicode
683    errors: str(accept={str, NoneType}) = None
684    /
685[clinic start generated code]*/
686
687static PyObject *
688_codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
689                          const char *errors)
690/*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
691{
692    return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
693                       PyUnicode_GET_LENGTH(str));
694}
695
696/* This version provides access to the byteorder parameter of the
697   builtin UTF-16 codecs as optional third argument. It defaults to 0
698   which means: use the native byte order and prepend the data with a
699   BOM mark.
700
701*/
702
703/*[clinic input]
704_codecs.utf_16_encode
705    str: unicode
706    errors: str(accept={str, NoneType}) = None
707    byteorder: int = 0
708    /
709[clinic start generated code]*/
710
711static PyObject *
712_codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
713                           const char *errors, int byteorder)
714/*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
715{
716    return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
717                       PyUnicode_GET_LENGTH(str));
718}
719
720/*[clinic input]
721_codecs.utf_16_le_encode
722    str: unicode
723    errors: str(accept={str, NoneType}) = None
724    /
725[clinic start generated code]*/
726
727static PyObject *
728_codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
729                              const char *errors)
730/*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
731{
732    return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
733                       PyUnicode_GET_LENGTH(str));
734}
735
736/*[clinic input]
737_codecs.utf_16_be_encode
738    str: unicode
739    errors: str(accept={str, NoneType}) = None
740    /
741[clinic start generated code]*/
742
743static PyObject *
744_codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
745                              const char *errors)
746/*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
747{
748    return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
749                       PyUnicode_GET_LENGTH(str));
750}
751
752/* This version provides access to the byteorder parameter of the
753   builtin UTF-32 codecs as optional third argument. It defaults to 0
754   which means: use the native byte order and prepend the data with a
755   BOM mark.
756
757*/
758
759/*[clinic input]
760_codecs.utf_32_encode
761    str: unicode
762    errors: str(accept={str, NoneType}) = None
763    byteorder: int = 0
764    /
765[clinic start generated code]*/
766
767static PyObject *
768_codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
769                           const char *errors, int byteorder)
770/*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
771{
772    return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
773                       PyUnicode_GET_LENGTH(str));
774}
775
776/*[clinic input]
777_codecs.utf_32_le_encode
778    str: unicode
779    errors: str(accept={str, NoneType}) = None
780    /
781[clinic start generated code]*/
782
783static PyObject *
784_codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
785                              const char *errors)
786/*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
787{
788    return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
789                       PyUnicode_GET_LENGTH(str));
790}
791
792/*[clinic input]
793_codecs.utf_32_be_encode
794    str: unicode
795    errors: str(accept={str, NoneType}) = None
796    /
797[clinic start generated code]*/
798
799static PyObject *
800_codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
801                              const char *errors)
802/*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
803{
804    return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
805                       PyUnicode_GET_LENGTH(str));
806}
807
808/*[clinic input]
809_codecs.unicode_escape_encode
810    str: unicode
811    errors: str(accept={str, NoneType}) = None
812    /
813[clinic start generated code]*/
814
815static PyObject *
816_codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
817                                   const char *errors)
818/*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
819{
820    return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
821                       PyUnicode_GET_LENGTH(str));
822}
823
824/*[clinic input]
825_codecs.raw_unicode_escape_encode
826    str: unicode
827    errors: str(accept={str, NoneType}) = None
828    /
829[clinic start generated code]*/
830
831static PyObject *
832_codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
833                                       const char *errors)
834/*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
835{
836    return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
837                       PyUnicode_GET_LENGTH(str));
838}
839
840/*[clinic input]
841_codecs.latin_1_encode
842    str: unicode
843    errors: str(accept={str, NoneType}) = None
844    /
845[clinic start generated code]*/
846
847static PyObject *
848_codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
849                            const char *errors)
850/*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
851{
852    return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
853                       PyUnicode_GET_LENGTH(str));
854}
855
856/*[clinic input]
857_codecs.ascii_encode
858    str: unicode
859    errors: str(accept={str, NoneType}) = None
860    /
861[clinic start generated code]*/
862
863static PyObject *
864_codecs_ascii_encode_impl(PyObject *module, PyObject *str,
865                          const char *errors)
866/*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
867{
868    return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
869                       PyUnicode_GET_LENGTH(str));
870}
871
872/*[clinic input]
873_codecs.charmap_encode
874    str: unicode
875    errors: str(accept={str, NoneType}) = None
876    mapping: object = None
877    /
878[clinic start generated code]*/
879
880static PyObject *
881_codecs_charmap_encode_impl(PyObject *module, PyObject *str,
882                            const char *errors, PyObject *mapping)
883/*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
884{
885    if (mapping == Py_None)
886        mapping = NULL;
887
888    return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
889                       PyUnicode_GET_LENGTH(str));
890}
891
892/*[clinic input]
893_codecs.charmap_build
894    map: unicode
895    /
896[clinic start generated code]*/
897
898static PyObject *
899_codecs_charmap_build_impl(PyObject *module, PyObject *map)
900/*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
901{
902    return PyUnicode_BuildEncodingMap(map);
903}
904
905#ifdef MS_WINDOWS
906
907/*[clinic input]
908_codecs.mbcs_encode
909    str: unicode
910    errors: str(accept={str, NoneType}) = None
911    /
912[clinic start generated code]*/
913
914static PyObject *
915_codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
916/*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
917{
918    return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
919                       PyUnicode_GET_LENGTH(str));
920}
921
922/*[clinic input]
923_codecs.oem_encode
924    str: unicode
925    errors: str(accept={str, NoneType}) = None
926    /
927[clinic start generated code]*/
928
929static PyObject *
930_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
931/*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
932{
933    return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
934        PyUnicode_GET_LENGTH(str));
935}
936
937/*[clinic input]
938_codecs.code_page_encode
939    code_page: int
940    str: unicode
941    errors: str(accept={str, NoneType}) = None
942    /
943[clinic start generated code]*/
944
945static PyObject *
946_codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
947                              const char *errors)
948/*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
949{
950    return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
951                       PyUnicode_GET_LENGTH(str));
952}
953
954#endif /* MS_WINDOWS */
955
956/* --- Error handler registry --------------------------------------------- */
957
958/*[clinic input]
959_codecs.register_error
960    errors: str
961    handler: object
962    /
963
964Register the specified error handler under the name errors.
965
966handler must be a callable object, that will be called with an exception
967instance containing information about the location of the encoding/decoding
968error and must return a (replacement, new position) tuple.
969[clinic start generated code]*/
970
971static PyObject *
972_codecs_register_error_impl(PyObject *module, const char *errors,
973                            PyObject *handler)
974/*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
975{
976    if (PyCodec_RegisterError(errors, handler))
977        return NULL;
978    Py_RETURN_NONE;
979}
980
981/*[clinic input]
982_codecs.lookup_error
983    name: str
984    /
985
986lookup_error(errors) -> handler
987
988Return the error handler for the specified error handling name or raise a
989LookupError, if no handler exists under this name.
990[clinic start generated code]*/
991
992static PyObject *
993_codecs_lookup_error_impl(PyObject *module, const char *name)
994/*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
995{
996    return PyCodec_LookupError(name);
997}
998
999/* --- Module API --------------------------------------------------------- */
1000
1001static PyMethodDef _codecs_functions[] = {
1002    _CODECS_REGISTER_METHODDEF
1003    _CODECS_UNREGISTER_METHODDEF
1004    _CODECS_LOOKUP_METHODDEF
1005    _CODECS_ENCODE_METHODDEF
1006    _CODECS_DECODE_METHODDEF
1007    _CODECS_ESCAPE_ENCODE_METHODDEF
1008    _CODECS_ESCAPE_DECODE_METHODDEF
1009    _CODECS_UTF_8_ENCODE_METHODDEF
1010    _CODECS_UTF_8_DECODE_METHODDEF
1011    _CODECS_UTF_7_ENCODE_METHODDEF
1012    _CODECS_UTF_7_DECODE_METHODDEF
1013    _CODECS_UTF_16_ENCODE_METHODDEF
1014    _CODECS_UTF_16_LE_ENCODE_METHODDEF
1015    _CODECS_UTF_16_BE_ENCODE_METHODDEF
1016    _CODECS_UTF_16_DECODE_METHODDEF
1017    _CODECS_UTF_16_LE_DECODE_METHODDEF
1018    _CODECS_UTF_16_BE_DECODE_METHODDEF
1019    _CODECS_UTF_16_EX_DECODE_METHODDEF
1020    _CODECS_UTF_32_ENCODE_METHODDEF
1021    _CODECS_UTF_32_LE_ENCODE_METHODDEF
1022    _CODECS_UTF_32_BE_ENCODE_METHODDEF
1023    _CODECS_UTF_32_DECODE_METHODDEF
1024    _CODECS_UTF_32_LE_DECODE_METHODDEF
1025    _CODECS_UTF_32_BE_DECODE_METHODDEF
1026    _CODECS_UTF_32_EX_DECODE_METHODDEF
1027    _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1028    _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1029    _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1030    _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1031    _CODECS_LATIN_1_ENCODE_METHODDEF
1032    _CODECS_LATIN_1_DECODE_METHODDEF
1033    _CODECS_ASCII_ENCODE_METHODDEF
1034    _CODECS_ASCII_DECODE_METHODDEF
1035    _CODECS_CHARMAP_ENCODE_METHODDEF
1036    _CODECS_CHARMAP_DECODE_METHODDEF
1037    _CODECS_CHARMAP_BUILD_METHODDEF
1038    _CODECS_READBUFFER_ENCODE_METHODDEF
1039    _CODECS_MBCS_ENCODE_METHODDEF
1040    _CODECS_MBCS_DECODE_METHODDEF
1041    _CODECS_OEM_ENCODE_METHODDEF
1042    _CODECS_OEM_DECODE_METHODDEF
1043    _CODECS_CODE_PAGE_ENCODE_METHODDEF
1044    _CODECS_CODE_PAGE_DECODE_METHODDEF
1045    _CODECS_REGISTER_ERROR_METHODDEF
1046    _CODECS_LOOKUP_ERROR_METHODDEF
1047    {NULL, NULL}                /* sentinel */
1048};
1049
1050static PyModuleDef_Slot _codecs_slots[] = {
1051    {0, NULL}
1052};
1053
1054static struct PyModuleDef codecsmodule = {
1055        PyModuleDef_HEAD_INIT,
1056        "_codecs",
1057        NULL,
1058        0,
1059        _codecs_functions,
1060        _codecs_slots,
1061        NULL,
1062        NULL,
1063        NULL
1064};
1065
1066PyMODINIT_FUNC
1067PyInit__codecs(void)
1068{
1069    return PyModuleDef_Init(&codecsmodule);
1070}
1071