xref: /third_party/python/Modules/binascii.c (revision 7db96d56)
1/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6**      each line encodes 45 bytes (except possibly the last)
7**      First char encodes (binary) length, rest data
8**      each char encodes 6 bits, as follows:
9**      binary: 01234567 abcdefgh ijklmnop
10**      ascii:  012345 67abcd efghij klmnop
11**      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12**      short binary data is zero-extended (so the bits are always in the
13**      right place), this does *not* reflect in the length.
14** base64:
15**      Line breaks are insignificant, but lines are at most 76 chars
16**      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17**      is done via a table.
18**      Short binary data is filled (in ASCII) with '='.
19** hqx:
20**      File starts with introductory text, real data starts and ends
21**      with colons.
22**      Data consists of three similar parts: info, datafork, resourcefork.
23**      Each part is protected (at the end) with a 16-bit crc
24**      The binary data is run-length encoded, and then ascii-fied:
25**      binary: 01234567 abcdefgh ijklmnop
26**      ascii:  012345 67abcd efghij klmnop
27**      ASCII encoding is table-driven, see the code.
28**      Short binary data results in the runt ascii-byte being output with
29**      the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34**      Programs that encode binary data in ASCII are written in
35**      such a style that they are as unreadable as possible. Devices used
36**      include unnecessary global variables, burying important tables
37**      in unrelated sourcefiles, putting functions in include files,
38**      using seemingly-descriptive variable names for different purposes,
39**      calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
45**
46** Added support for quoted-printable encoding, based on rfc 1521 et al
47** quoted-printable encoding specifies that non printable characters (anything
48** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character.  It also specifies some other behavior to enable 8bit data
50** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
52**
53** Brandon Long, September 2001.
54*/
55
56#ifndef Py_BUILD_CORE_BUILTIN
57#  define Py_BUILD_CORE_MODULE 1
58#endif
59
60#define PY_SSIZE_T_CLEAN
61
62#include "Python.h"
63#include "pycore_long.h"          // _PyLong_DigitValue
64#include "pycore_strhex.h"        // _Py_strhex_bytes_with_sep()
65#ifdef USE_ZLIB_CRC32
66#  include "zlib.h"
67#endif
68
69typedef struct binascii_state {
70    PyObject *Error;
71    PyObject *Incomplete;
72} binascii_state;
73
74static inline binascii_state *
75get_binascii_state(PyObject *module)
76{
77    return (binascii_state *)PyModule_GetState(module);
78}
79
80
81static const unsigned char table_a2b_base64[] = {
82    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
83    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
84    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
85    52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
86    -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
87    15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
88    -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
89    41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
90
91    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
92    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
93    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
94    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
95    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
96    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
97    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
98    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
99};
100
101#define BASE64_PAD '='
102
103/* Max binary chunk size; limited only by available memory */
104#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
105
106static const unsigned char table_b2a_base64[] =
107"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
108
109
110static const unsigned short crctab_hqx[256] = {
111    0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
112    0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
113    0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
114    0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
115    0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
116    0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
117    0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
118    0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
119    0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
120    0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
121    0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
122    0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
123    0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
124    0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
125    0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
126    0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
127    0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
128    0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
129    0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
130    0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
131    0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
132    0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
133    0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
134    0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
135    0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
136    0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
137    0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
138    0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
139    0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
140    0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
141    0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
142    0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
143};
144
145/*[clinic input]
146module binascii
147[clinic start generated code]*/
148/*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
149
150/*[python input]
151
152class ascii_buffer_converter(CConverter):
153    type = 'Py_buffer'
154    converter = 'ascii_buffer_converter'
155    impl_by_reference = True
156    c_default = "{NULL, NULL}"
157
158    def cleanup(self):
159        name = self.name
160        return "".join(["if (", name, ".obj)\n   PyBuffer_Release(&", name, ");\n"])
161
162[python start generated code]*/
163/*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
164
165static int
166ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
167{
168    if (arg == NULL) {
169        PyBuffer_Release(buf);
170        return 1;
171    }
172    if (PyUnicode_Check(arg)) {
173        if (PyUnicode_READY(arg) < 0)
174            return 0;
175        if (!PyUnicode_IS_ASCII(arg)) {
176            PyErr_SetString(PyExc_ValueError,
177                            "string argument should contain only ASCII characters");
178            return 0;
179        }
180        assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
181        buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
182        buf->len = PyUnicode_GET_LENGTH(arg);
183        buf->obj = NULL;
184        return 1;
185    }
186    if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
187        PyErr_Format(PyExc_TypeError,
188                     "argument should be bytes, buffer or ASCII string, "
189                     "not '%.100s'", Py_TYPE(arg)->tp_name);
190        return 0;
191    }
192    if (!PyBuffer_IsContiguous(buf, 'C')) {
193        PyErr_Format(PyExc_TypeError,
194                     "argument should be a contiguous buffer, "
195                     "not '%.100s'", Py_TYPE(arg)->tp_name);
196        PyBuffer_Release(buf);
197        return 0;
198    }
199    return Py_CLEANUP_SUPPORTED;
200}
201
202#include "clinic/binascii.c.h"
203
204/*[clinic input]
205binascii.a2b_uu
206
207    data: ascii_buffer
208    /
209
210Decode a line of uuencoded data.
211[clinic start generated code]*/
212
213static PyObject *
214binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
215/*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
216{
217    const unsigned char *ascii_data;
218    unsigned char *bin_data;
219    int leftbits = 0;
220    unsigned char this_ch;
221    unsigned int leftchar = 0;
222    PyObject *rv;
223    Py_ssize_t ascii_len, bin_len;
224    binascii_state *state;
225
226    ascii_data = data->buf;
227    ascii_len = data->len;
228
229    assert(ascii_len >= 0);
230
231    /* First byte: binary data length (in bytes) */
232    bin_len = (*ascii_data++ - ' ') & 077;
233    ascii_len--;
234
235    /* Allocate the buffer */
236    if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
237        return NULL;
238    bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
239
240    for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
241        /* XXX is it really best to add NULs if there's no more data */
242        this_ch = (ascii_len > 0) ? *ascii_data : 0;
243        if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
244            /*
245            ** Whitespace. Assume some spaces got eaten at
246            ** end-of-line. (We check this later)
247            */
248            this_ch = 0;
249        } else {
250            /* Check the character for legality
251            ** The 64 in stead of the expected 63 is because
252            ** there are a few uuencodes out there that use
253            ** '`' as zero instead of space.
254            */
255            if ( this_ch < ' ' || this_ch > (' ' + 64)) {
256                state = get_binascii_state(module);
257                if (state == NULL) {
258                    return NULL;
259                }
260                PyErr_SetString(state->Error, "Illegal char");
261                Py_DECREF(rv);
262                return NULL;
263            }
264            this_ch = (this_ch - ' ') & 077;
265        }
266        /*
267        ** Shift it in on the low end, and see if there's
268        ** a byte ready for output.
269        */
270        leftchar = (leftchar << 6) | (this_ch);
271        leftbits += 6;
272        if ( leftbits >= 8 ) {
273            leftbits -= 8;
274            *bin_data++ = (leftchar >> leftbits) & 0xff;
275            leftchar &= ((1 << leftbits) - 1);
276            bin_len--;
277        }
278    }
279    /*
280    ** Finally, check that if there's anything left on the line
281    ** that it's whitespace only.
282    */
283    while( ascii_len-- > 0 ) {
284        this_ch = *ascii_data++;
285        /* Extra '`' may be written as padding in some cases */
286        if ( this_ch != ' ' && this_ch != ' '+64 &&
287             this_ch != '\n' && this_ch != '\r' ) {
288            state = get_binascii_state(module);
289            if (state == NULL) {
290                return NULL;
291            }
292            PyErr_SetString(state->Error, "Trailing garbage");
293            Py_DECREF(rv);
294            return NULL;
295        }
296    }
297    return rv;
298}
299
300/*[clinic input]
301binascii.b2a_uu
302
303    data: Py_buffer
304    /
305    *
306    backtick: bool(accept={int}) = False
307
308Uuencode line of data.
309[clinic start generated code]*/
310
311static PyObject *
312binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
313/*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/
314{
315    unsigned char *ascii_data;
316    const unsigned char *bin_data;
317    int leftbits = 0;
318    unsigned char this_ch;
319    unsigned int leftchar = 0;
320    binascii_state *state;
321    Py_ssize_t bin_len, out_len;
322    _PyBytesWriter writer;
323
324    _PyBytesWriter_Init(&writer);
325    bin_data = data->buf;
326    bin_len = data->len;
327    if ( bin_len > 45 ) {
328        /* The 45 is a limit that appears in all uuencode's */
329        state = get_binascii_state(module);
330        if (state == NULL) {
331            return NULL;
332        }
333        PyErr_SetString(state->Error, "At most 45 bytes at once");
334        return NULL;
335    }
336
337    /* We're lazy and allocate to much (fixed up later) */
338    out_len = 2 + (bin_len + 2) / 3 * 4;
339    ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
340    if (ascii_data == NULL)
341        return NULL;
342
343    /* Store the length */
344    if (backtick && !bin_len)
345        *ascii_data++ = '`';
346    else
347        *ascii_data++ = ' ' + (unsigned char)bin_len;
348
349    for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
350        /* Shift the data (or padding) into our buffer */
351        if ( bin_len > 0 )              /* Data */
352            leftchar = (leftchar << 8) | *bin_data;
353        else                            /* Padding */
354            leftchar <<= 8;
355        leftbits += 8;
356
357        /* See if there are 6-bit groups ready */
358        while ( leftbits >= 6 ) {
359            this_ch = (leftchar >> (leftbits-6)) & 0x3f;
360            leftbits -= 6;
361            if (backtick && !this_ch)
362                *ascii_data++ = '`';
363            else
364                *ascii_data++ = this_ch + ' ';
365        }
366    }
367    *ascii_data++ = '\n';       /* Append a courtesy newline */
368
369    return _PyBytesWriter_Finish(&writer, ascii_data);
370}
371
372/*[clinic input]
373binascii.a2b_base64
374
375    data: ascii_buffer
376    /
377    *
378    strict_mode: bool(accept={int}) = False
379
380Decode a line of base64 data.
381
382  strict_mode
383    When set to True, bytes that are not part of the base64 standard are not allowed.
384    The same applies to excess data after padding (= / ==).
385[clinic start generated code]*/
386
387static PyObject *
388binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
389/*[clinic end generated code: output=5409557788d4f975 input=3a30c4e3528317c6]*/
390{
391    assert(data->len >= 0);
392
393    const unsigned char *ascii_data = data->buf;
394    size_t ascii_len = data->len;
395    binascii_state *state = NULL;
396    char padding_started = 0;
397
398    /* Allocate the buffer */
399    Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
400    _PyBytesWriter writer;
401    _PyBytesWriter_Init(&writer);
402    unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
403    if (bin_data == NULL)
404        return NULL;
405    unsigned char *bin_data_start = bin_data;
406
407    if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
408        state = get_binascii_state(module);
409        if (state) {
410            PyErr_SetString(state->Error, "Leading padding not allowed");
411        }
412        goto error_end;
413    }
414
415    int quad_pos = 0;
416    unsigned char leftchar = 0;
417    int pads = 0;
418    for (size_t i = 0; i < ascii_len; i++) {
419        unsigned char this_ch = ascii_data[i];
420
421        /* Check for pad sequences and ignore
422        ** the invalid ones.
423        */
424        if (this_ch == BASE64_PAD) {
425            padding_started = 1;
426
427            if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
428                /* A pad sequence means we should not parse more input.
429                ** We've already interpreted the data from the quad at this point.
430                ** in strict mode, an error should raise if there's excess data after the padding.
431                */
432                if (strict_mode && i + 1 < ascii_len) {
433                    state = get_binascii_state(module);
434                    if (state) {
435                        PyErr_SetString(state->Error, "Excess data after padding");
436                    }
437                    goto error_end;
438                }
439
440                goto done;
441            }
442            continue;
443        }
444
445        this_ch = table_a2b_base64[this_ch];
446        if (this_ch >= 64) {
447            if (strict_mode) {
448                state = get_binascii_state(module);
449                if (state) {
450                    PyErr_SetString(state->Error, "Only base64 data is allowed");
451                }
452                goto error_end;
453            }
454            continue;
455        }
456
457        // Characters that are not '=', in the middle of the padding, are not allowed
458        if (strict_mode && padding_started) {
459            state = get_binascii_state(module);
460            if (state) {
461                PyErr_SetString(state->Error, "Discontinuous padding not allowed");
462            }
463            goto error_end;
464        }
465        pads = 0;
466
467        switch (quad_pos) {
468            case 0:
469                quad_pos = 1;
470                leftchar = this_ch;
471                break;
472            case 1:
473                quad_pos = 2;
474                *bin_data++ = (leftchar << 2) | (this_ch >> 4);
475                leftchar = this_ch & 0x0f;
476                break;
477            case 2:
478                quad_pos = 3;
479                *bin_data++ = (leftchar << 4) | (this_ch >> 2);
480                leftchar = this_ch & 0x03;
481                break;
482            case 3:
483                quad_pos = 0;
484                *bin_data++ = (leftchar << 6) | (this_ch);
485                leftchar = 0;
486                break;
487        }
488    }
489
490    if (quad_pos != 0) {
491        state = get_binascii_state(module);
492        if (state == NULL) {
493            /* error already set, from get_binascii_state */
494        } else if (quad_pos == 1) {
495            /*
496            ** There is exactly one extra valid, non-padding, base64 character.
497            ** This is an invalid length, as there is no possible input that
498            ** could encoded into such a base64 string.
499            */
500            PyErr_Format(state->Error,
501                         "Invalid base64-encoded string: "
502                         "number of data characters (%zd) cannot be 1 more "
503                         "than a multiple of 4",
504                         (bin_data - bin_data_start) / 3 * 4 + 1);
505        } else {
506            PyErr_SetString(state->Error, "Incorrect padding");
507        }
508        error_end:
509        _PyBytesWriter_Dealloc(&writer);
510        return NULL;
511    }
512
513done:
514    return _PyBytesWriter_Finish(&writer, bin_data);
515}
516
517
518/*[clinic input]
519binascii.b2a_base64
520
521    data: Py_buffer
522    /
523    *
524    newline: bool(accept={int}) = True
525
526Base64-code line of data.
527[clinic start generated code]*/
528
529static PyObject *
530binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
531/*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
532{
533    unsigned char *ascii_data;
534    const unsigned char *bin_data;
535    int leftbits = 0;
536    unsigned char this_ch;
537    unsigned int leftchar = 0;
538    Py_ssize_t bin_len, out_len;
539    _PyBytesWriter writer;
540    binascii_state *state;
541
542    bin_data = data->buf;
543    bin_len = data->len;
544    _PyBytesWriter_Init(&writer);
545
546    assert(bin_len >= 0);
547
548    if ( bin_len > BASE64_MAXBIN ) {
549        state = get_binascii_state(module);
550        if (state == NULL) {
551            return NULL;
552        }
553        PyErr_SetString(state->Error, "Too much data for base64 line");
554        return NULL;
555    }
556
557    /* We're lazy and allocate too much (fixed up later).
558       "+2" leaves room for up to two pad characters.
559       Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
560    out_len = bin_len*2 + 2;
561    if (newline)
562        out_len++;
563    ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
564    if (ascii_data == NULL)
565        return NULL;
566
567    for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
568        /* Shift the data into our buffer */
569        leftchar = (leftchar << 8) | *bin_data;
570        leftbits += 8;
571
572        /* See if there are 6-bit groups ready */
573        while ( leftbits >= 6 ) {
574            this_ch = (leftchar >> (leftbits-6)) & 0x3f;
575            leftbits -= 6;
576            *ascii_data++ = table_b2a_base64[this_ch];
577        }
578    }
579    if ( leftbits == 2 ) {
580        *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
581        *ascii_data++ = BASE64_PAD;
582        *ascii_data++ = BASE64_PAD;
583    } else if ( leftbits == 4 ) {
584        *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
585        *ascii_data++ = BASE64_PAD;
586    }
587    if (newline)
588        *ascii_data++ = '\n';       /* Append a courtesy newline */
589
590    return _PyBytesWriter_Finish(&writer, ascii_data);
591}
592
593
594/*[clinic input]
595binascii.crc_hqx
596
597    data: Py_buffer
598    crc: unsigned_int(bitwise=True)
599    /
600
601Compute CRC-CCITT incrementally.
602[clinic start generated code]*/
603
604static PyObject *
605binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
606/*[clinic end generated code: output=2fde213d0f547a98 input=56237755370a951c]*/
607{
608    const unsigned char *bin_data;
609    Py_ssize_t len;
610
611    crc &= 0xffff;
612    bin_data = data->buf;
613    len = data->len;
614
615    while(len-- > 0) {
616        crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
617    }
618
619    return PyLong_FromUnsignedLong(crc);
620}
621
622#ifndef USE_ZLIB_CRC32
623/*  Crc - 32 BIT ANSI X3.66 CRC checksum files
624    Also known as: ISO 3307
625**********************************************************************|
626*                                                                    *|
627* Demonstration program to compute the 32-bit CRC used as the frame  *|
628* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
629* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
630* protocol).  The 32-bit FCS was added via the Federal Register,     *|
631* 1 June 1982, p.23798.  I presume but don't know for certain that   *|
632* this polynomial is or will be included in CCITT V.41, which        *|
633* defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
634* PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
635* errors by a factor of 10^-5 over 16-bit FCS.                       *|
636*                                                                    *|
637**********************************************************************|
638
639 Copyright (C) 1986 Gary S. Brown.  You may use this program, or
640 code or tables extracted from it, as desired without restriction.
641
642 First, the polynomial itself and its table of feedback terms.  The
643 polynomial is
644 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
645 Note that we take it "backwards" and put the highest-order term in
646 the lowest-order bit.  The X^32 term is "implied"; the LSB is the
647 X^31 term, etc.  The X^0 term (usually shown as "+1") results in
648 the MSB being 1.
649
650 Note that the usual hardware shift register implementation, which
651 is what we're using (we're merely optimizing it by doing eight-bit
652 chunks at a time) shifts bits into the lowest-order term.  In our
653 implementation, that means shifting towards the right.  Why do we
654 do it this way?  Because the calculated CRC must be transmitted in
655 order from highest-order term to lowest-order term.  UARTs transmit
656 characters in order from LSB to MSB.  By storing the CRC this way,
657 we hand it to the UART in the order low-byte to high-byte; the UART
658 sends each low-bit to hight-bit; and the result is transmission bit
659 by bit from highest- to lowest-order term without requiring any bit
660 shuffling on our part.  Reception works similarly.
661
662 The feedback terms table consists of 256, 32-bit entries.  Notes:
663
664  1. The table can be generated at runtime if desired; code to do so
665     is shown later.  It might not be obvious, but the feedback
666     terms simply represent the results of eight shift/xor opera-
667     tions for all combinations of data and CRC register values.
668
669  2. The CRC accumulation logic is the same for all CRC polynomials,
670     be they sixteen or thirty-two bits wide.  You simply choose the
671     appropriate table.  Alternatively, because the table can be
672     generated at runtime, you can start by generating the table for
673     the polynomial in question and use exactly the same "updcrc",
674     if your application needn't simultaneously handle two CRC
675     polynomials.  (Note, however, that XMODEM is strange.)
676
677  3. For 16-bit CRCs, the table entries need be only 16 bits wide;
678     of course, 32-bit entries work OK if the high 16 bits are zero.
679
680  4. The values must be right-shifted by eight bits by the "updcrc"
681     logic; the shift must be unsigned (bring in zeroes).  On some
682     hardware you could probably optimize the shift in assembler by
683     using byte-swap instructions.
684********************************************************************/
685
686static const unsigned int crc_32_tab[256] = {
6870x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
6880x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
6890xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
6900x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
6910x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
6920x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
6930xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
6940xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
6950x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
6960x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
6970xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
6980xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
6990x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
7000x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
7010x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
7020xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
7030x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
7040x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
7050x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
7060xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
7070x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
7080x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
7090xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
7100xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
7110x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
7120x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
7130x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
7140x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
7150xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
7160x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
7170x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
7180x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
7190xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
7200xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
7210x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
7220x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
7230xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
7240xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
7250x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
7260x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
7270x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
7280xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
7290x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
7300x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
7310x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
7320xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
7330x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
7340x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
7350xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
7360xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
7370x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
7380x2d02ef8dU
739};
740
741static unsigned int
742internal_crc32(const unsigned char *bin_data, Py_ssize_t len, unsigned int crc)
743{ /* By Jim Ahlstrom; All rights transferred to CNRI */
744    unsigned int result;
745
746    crc = ~ crc;
747    while (len-- > 0) {
748        crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
749        /* Note:  (crc >> 8) MUST zero fill on left */
750    }
751
752    result = (crc ^ 0xFFFFFFFF);
753    return result & 0xffffffff;
754}
755#endif  /* USE_ZLIB_CRC32 */
756
757/*[clinic input]
758binascii.crc32 -> unsigned_int
759
760    data: Py_buffer
761    crc: unsigned_int(bitwise=True) = 0
762    /
763
764Compute CRC-32 incrementally.
765[clinic start generated code]*/
766
767static unsigned int
768binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
769/*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
770
771#ifdef USE_ZLIB_CRC32
772/* This is the same as zlibmodule.c zlib_crc32_impl. It exists in two
773 * modules for historical reasons. */
774{
775    /* Releasing the GIL for very small buffers is inefficient
776       and may lower performance */
777    if (data->len > 1024*5) {
778        unsigned char *buf = data->buf;
779        Py_ssize_t len = data->len;
780
781        Py_BEGIN_ALLOW_THREADS
782        /* Avoid truncation of length for very large buffers. crc32() takes
783           length as an unsigned int, which may be narrower than Py_ssize_t. */
784        while ((size_t)len > UINT_MAX) {
785            crc = crc32(crc, buf, UINT_MAX);
786            buf += (size_t) UINT_MAX;
787            len -= (size_t) UINT_MAX;
788        }
789        crc = crc32(crc, buf, (unsigned int)len);
790        Py_END_ALLOW_THREADS
791    } else {
792        crc = crc32(crc, data->buf, (unsigned int)data->len);
793    }
794    return crc & 0xffffffff;
795}
796#else  /* USE_ZLIB_CRC32 */
797{
798    const unsigned char *bin_data = data->buf;
799    Py_ssize_t len = data->len;
800
801    /* Releasing the GIL for very small buffers is inefficient
802       and may lower performance */
803    if (len > 1024*5) {
804        unsigned int result;
805        Py_BEGIN_ALLOW_THREADS
806        result = internal_crc32(bin_data, len, crc);
807        Py_END_ALLOW_THREADS
808        return result;
809    } else {
810        return internal_crc32(bin_data, len, crc);
811    }
812}
813#endif  /* USE_ZLIB_CRC32 */
814
815/*[clinic input]
816binascii.b2a_hex
817
818    data: Py_buffer
819    sep: object = NULL
820        An optional single character or byte to separate hex bytes.
821    bytes_per_sep: int = 1
822        How many bytes between separators.  Positive values count from the
823        right, negative values count from the left.
824
825Hexadecimal representation of binary data.
826
827The return value is a bytes object.  This function is also
828available as "hexlify()".
829
830Example:
831>>> binascii.b2a_hex(b'\xb9\x01\xef')
832b'b901ef'
833>>> binascii.hexlify(b'\xb9\x01\xef', ':')
834b'b9:01:ef'
835>>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
836b'b9_01ef'
837[clinic start generated code]*/
838
839static PyObject *
840binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
841                      int bytes_per_sep)
842/*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
843{
844    return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
845                                     sep, bytes_per_sep);
846}
847
848/*[clinic input]
849binascii.hexlify = binascii.b2a_hex
850
851Hexadecimal representation of binary data.
852
853The return value is a bytes object.  This function is also
854available as "b2a_hex()".
855[clinic start generated code]*/
856
857static PyObject *
858binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
859                      int bytes_per_sep)
860/*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
861{
862    return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
863                                     sep, bytes_per_sep);
864}
865
866/*[clinic input]
867binascii.a2b_hex
868
869    hexstr: ascii_buffer
870    /
871
872Binary data of hexadecimal representation.
873
874hexstr must contain an even number of hex digits (upper or lower case).
875This function is also available as "unhexlify()".
876[clinic start generated code]*/
877
878static PyObject *
879binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
880/*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
881{
882    const char* argbuf;
883    Py_ssize_t arglen;
884    PyObject *retval;
885    char* retbuf;
886    Py_ssize_t i, j;
887    binascii_state *state;
888
889    argbuf = hexstr->buf;
890    arglen = hexstr->len;
891
892    assert(arglen >= 0);
893
894    /* XXX What should we do about strings with an odd length?  Should
895     * we add an implicit leading zero, or a trailing zero?  For now,
896     * raise an exception.
897     */
898    if (arglen % 2) {
899        state = get_binascii_state(module);
900        if (state == NULL) {
901            return NULL;
902        }
903        PyErr_SetString(state->Error, "Odd-length string");
904        return NULL;
905    }
906
907    retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
908    if (!retval)
909        return NULL;
910    retbuf = PyBytes_AS_STRING(retval);
911
912    for (i=j=0; i < arglen; i += 2) {
913        unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
914        unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
915        if (top >= 16 || bot >= 16) {
916            state = get_binascii_state(module);
917            if (state == NULL) {
918                return NULL;
919            }
920            PyErr_SetString(state->Error,
921                            "Non-hexadecimal digit found");
922            goto finally;
923        }
924        retbuf[j++] = (top << 4) + bot;
925    }
926    return retval;
927
928  finally:
929    Py_DECREF(retval);
930    return NULL;
931}
932
933/*[clinic input]
934binascii.unhexlify = binascii.a2b_hex
935
936Binary data of hexadecimal representation.
937
938hexstr must contain an even number of hex digits (upper or lower case).
939[clinic start generated code]*/
940
941static PyObject *
942binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
943/*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
944{
945    return binascii_a2b_hex_impl(module, hexstr);
946}
947
948#define MAXLINESIZE 76
949
950
951/*[clinic input]
952binascii.a2b_qp
953
954    data: ascii_buffer
955    header: bool(accept={int}) = False
956
957Decode a string of qp-encoded data.
958[clinic start generated code]*/
959
960static PyObject *
961binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
962/*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
963{
964    Py_ssize_t in, out;
965    char ch;
966    const unsigned char *ascii_data;
967    unsigned char *odata;
968    Py_ssize_t datalen = 0;
969    PyObject *rv;
970
971    ascii_data = data->buf;
972    datalen = data->len;
973
974    /* We allocate the output same size as input, this is overkill.
975     */
976    odata = (unsigned char *) PyMem_Calloc(1, datalen);
977    if (odata == NULL) {
978        PyErr_NoMemory();
979        return NULL;
980    }
981
982    in = out = 0;
983    while (in < datalen) {
984        if (ascii_data[in] == '=') {
985            in++;
986            if (in >= datalen) break;
987            /* Soft line breaks */
988            if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
989                if (ascii_data[in] != '\n') {
990                    while (in < datalen && ascii_data[in] != '\n') in++;
991                }
992                if (in < datalen) in++;
993            }
994            else if (ascii_data[in] == '=') {
995                /* broken case from broken python qp */
996                odata[out++] = '=';
997                in++;
998            }
999            else if ((in + 1 < datalen) &&
1000                     ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
1001                      (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1002                      (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1003                     ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1004                      (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1005                      (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
1006                /* hexval */
1007                ch = _PyLong_DigitValue[ascii_data[in]] << 4;
1008                in++;
1009                ch |= _PyLong_DigitValue[ascii_data[in]];
1010                in++;
1011                odata[out++] = ch;
1012            }
1013            else {
1014              odata[out++] = '=';
1015            }
1016        }
1017        else if (header && ascii_data[in] == '_') {
1018            odata[out++] = ' ';
1019            in++;
1020        }
1021        else {
1022            odata[out] = ascii_data[in];
1023            in++;
1024            out++;
1025        }
1026    }
1027    if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1028        PyMem_Free(odata);
1029        return NULL;
1030    }
1031    PyMem_Free(odata);
1032    return rv;
1033}
1034
1035static int
1036to_hex (unsigned char ch, unsigned char *s)
1037{
1038    unsigned int uvalue = ch;
1039
1040    s[1] = "0123456789ABCDEF"[uvalue % 16];
1041    uvalue = (uvalue / 16);
1042    s[0] = "0123456789ABCDEF"[uvalue % 16];
1043    return 0;
1044}
1045
1046/* XXX: This is ridiculously complicated to be backward compatible
1047 * (mostly) with the quopri module.  It doesn't re-create the quopri
1048 * module bug where text ending in CRLF has the CR encoded */
1049
1050/*[clinic input]
1051binascii.b2a_qp
1052
1053    data: Py_buffer
1054    quotetabs: bool(accept={int}) = False
1055    istext: bool(accept={int}) = True
1056    header: bool(accept={int}) = False
1057
1058Encode a string using quoted-printable encoding.
1059
1060On encoding, when istext is set, newlines are not encoded, and white
1061space at end of lines is.  When istext is not set, \r and \n (CR/LF)
1062are both encoded.  When quotetabs is set, space and tabs are encoded.
1063[clinic start generated code]*/
1064
1065static PyObject *
1066binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
1067                     int istext, int header)
1068/*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
1069{
1070    Py_ssize_t in, out;
1071    const unsigned char *databuf;
1072    unsigned char *odata;
1073    Py_ssize_t datalen = 0, odatalen = 0;
1074    PyObject *rv;
1075    unsigned int linelen = 0;
1076    unsigned char ch;
1077    int crlf = 0;
1078    const unsigned char *p;
1079
1080    databuf = data->buf;
1081    datalen = data->len;
1082
1083    /* See if this string is using CRLF line ends */
1084    /* XXX: this function has the side effect of converting all of
1085     * the end of lines to be the same depending on this detection
1086     * here */
1087    p = (const unsigned char *) memchr(databuf, '\n', datalen);
1088    if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
1089        crlf = 1;
1090
1091    /* First, scan to see how many characters need to be encoded */
1092    in = 0;
1093    while (in < datalen) {
1094        Py_ssize_t delta = 0;
1095        if ((databuf[in] > 126) ||
1096            (databuf[in] == '=') ||
1097            (header && databuf[in] == '_') ||
1098            ((databuf[in] == '.') && (linelen == 0) &&
1099             (in + 1 == datalen || databuf[in+1] == '\n' ||
1100              databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1101            (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1102            ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1103            ((databuf[in] < 33) &&
1104             (databuf[in] != '\r') && (databuf[in] != '\n') &&
1105             (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1106        {
1107            if ((linelen + 3) >= MAXLINESIZE) {
1108                linelen = 0;
1109                if (crlf)
1110                    delta += 3;
1111                else
1112                    delta += 2;
1113            }
1114            linelen += 3;
1115            delta += 3;
1116            in++;
1117        }
1118        else {
1119            if (istext &&
1120                ((databuf[in] == '\n') ||
1121                 ((in+1 < datalen) && (databuf[in] == '\r') &&
1122                 (databuf[in+1] == '\n'))))
1123            {
1124                linelen = 0;
1125                /* Protect against whitespace on end of line */
1126                if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
1127                    delta += 2;
1128                if (crlf)
1129                    delta += 2;
1130                else
1131                    delta += 1;
1132                if (databuf[in] == '\r')
1133                    in += 2;
1134                else
1135                    in++;
1136            }
1137            else {
1138                if ((in + 1 != datalen) &&
1139                    (databuf[in+1] != '\n') &&
1140                    (linelen + 1) >= MAXLINESIZE) {
1141                    linelen = 0;
1142                    if (crlf)
1143                        delta += 3;
1144                    else
1145                        delta += 2;
1146                }
1147                linelen++;
1148                delta++;
1149                in++;
1150            }
1151        }
1152        if (PY_SSIZE_T_MAX - delta < odatalen) {
1153            PyErr_NoMemory();
1154            return NULL;
1155        }
1156        odatalen += delta;
1157    }
1158
1159    /* We allocate the output same size as input, this is overkill.
1160     */
1161    odata = (unsigned char *) PyMem_Calloc(1, odatalen);
1162    if (odata == NULL) {
1163        PyErr_NoMemory();
1164        return NULL;
1165    }
1166
1167    in = out = linelen = 0;
1168    while (in < datalen) {
1169        if ((databuf[in] > 126) ||
1170            (databuf[in] == '=') ||
1171            (header && databuf[in] == '_') ||
1172            ((databuf[in] == '.') && (linelen == 0) &&
1173             (in + 1 == datalen || databuf[in+1] == '\n' ||
1174              databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1175            (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1176            ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1177            ((databuf[in] < 33) &&
1178             (databuf[in] != '\r') && (databuf[in] != '\n') &&
1179             (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1180        {
1181            if ((linelen + 3 )>= MAXLINESIZE) {
1182                odata[out++] = '=';
1183                if (crlf) odata[out++] = '\r';
1184                odata[out++] = '\n';
1185                linelen = 0;
1186            }
1187            odata[out++] = '=';
1188            to_hex(databuf[in], &odata[out]);
1189            out += 2;
1190            in++;
1191            linelen += 3;
1192        }
1193        else {
1194            if (istext &&
1195                ((databuf[in] == '\n') ||
1196                 ((in+1 < datalen) && (databuf[in] == '\r') &&
1197                 (databuf[in+1] == '\n'))))
1198            {
1199                linelen = 0;
1200                /* Protect against whitespace on end of line */
1201                if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1202                    ch = odata[out-1];
1203                    odata[out-1] = '=';
1204                    to_hex(ch, &odata[out]);
1205                    out += 2;
1206                }
1207
1208                if (crlf) odata[out++] = '\r';
1209                odata[out++] = '\n';
1210                if (databuf[in] == '\r')
1211                    in += 2;
1212                else
1213                    in++;
1214            }
1215            else {
1216                if ((in + 1 != datalen) &&
1217                    (databuf[in+1] != '\n') &&
1218                    (linelen + 1) >= MAXLINESIZE) {
1219                    odata[out++] = '=';
1220                    if (crlf) odata[out++] = '\r';
1221                    odata[out++] = '\n';
1222                    linelen = 0;
1223                }
1224                linelen++;
1225                if (header && databuf[in] == ' ') {
1226                    odata[out++] = '_';
1227                    in++;
1228                }
1229                else {
1230                    odata[out++] = databuf[in++];
1231                }
1232            }
1233        }
1234    }
1235    if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1236        PyMem_Free(odata);
1237        return NULL;
1238    }
1239    PyMem_Free(odata);
1240    return rv;
1241}
1242
1243/* List of functions defined in the module */
1244
1245static struct PyMethodDef binascii_module_methods[] = {
1246    BINASCII_A2B_UU_METHODDEF
1247    BINASCII_B2A_UU_METHODDEF
1248    BINASCII_A2B_BASE64_METHODDEF
1249    BINASCII_B2A_BASE64_METHODDEF
1250    BINASCII_A2B_HEX_METHODDEF
1251    BINASCII_B2A_HEX_METHODDEF
1252    BINASCII_HEXLIFY_METHODDEF
1253    BINASCII_UNHEXLIFY_METHODDEF
1254    BINASCII_CRC_HQX_METHODDEF
1255    BINASCII_CRC32_METHODDEF
1256    BINASCII_A2B_QP_METHODDEF
1257    BINASCII_B2A_QP_METHODDEF
1258    {NULL, NULL}                             /* sentinel */
1259};
1260
1261
1262/* Initialization function for the module (*must* be called PyInit_binascii) */
1263PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1264
1265static int
1266binascii_exec(PyObject *module) {
1267    int result;
1268    binascii_state *state = PyModule_GetState(module);
1269    if (state == NULL) {
1270        return -1;
1271    }
1272
1273    state->Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1274    if (state->Error == NULL) {
1275        return -1;
1276    }
1277    Py_INCREF(state->Error);
1278    result = PyModule_AddObject(module, "Error", state->Error);
1279    if (result == -1) {
1280        Py_DECREF(state->Error);
1281        return -1;
1282    }
1283
1284    state->Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1285    if (state->Incomplete == NULL) {
1286        return -1;
1287    }
1288    Py_INCREF(state->Incomplete);
1289    result = PyModule_AddObject(module, "Incomplete", state->Incomplete);
1290    if (result == -1) {
1291        Py_DECREF(state->Incomplete);
1292        return -1;
1293    }
1294
1295    return 0;
1296}
1297
1298static PyModuleDef_Slot binascii_slots[] = {
1299    {Py_mod_exec, binascii_exec},
1300    {0, NULL}
1301};
1302
1303static int
1304binascii_traverse(PyObject *module, visitproc visit, void *arg)
1305{
1306    binascii_state *state = get_binascii_state(module);
1307    Py_VISIT(state->Error);
1308    Py_VISIT(state->Incomplete);
1309    return 0;
1310}
1311
1312static int
1313binascii_clear(PyObject *module)
1314{
1315    binascii_state *state = get_binascii_state(module);
1316    Py_CLEAR(state->Error);
1317    Py_CLEAR(state->Incomplete);
1318    return 0;
1319}
1320
1321static void
1322binascii_free(void *module)
1323{
1324    binascii_clear((PyObject *)module);
1325}
1326
1327static struct PyModuleDef binasciimodule = {
1328    PyModuleDef_HEAD_INIT,
1329    "binascii",
1330    doc_binascii,
1331    sizeof(binascii_state),
1332    binascii_module_methods,
1333    binascii_slots,
1334    binascii_traverse,
1335    binascii_clear,
1336    binascii_free
1337};
1338
1339PyMODINIT_FUNC
1340PyInit_binascii(void)
1341{
1342    return PyModuleDef_Init(&binasciimodule);
1343}
1344