1/* 2** Routines to represent binary data in ASCII and vice-versa 3** 4** This module currently supports the following encodings: 5** uuencode: 6** each line encodes 45 bytes (except possibly the last) 7** First char encodes (binary) length, rest data 8** each char encodes 6 bits, as follows: 9** binary: 01234567 abcdefgh ijklmnop 10** ascii: 012345 67abcd efghij klmnop 11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc. 12** short binary data is zero-extended (so the bits are always in the 13** right place), this does *not* reflect in the length. 14** base64: 15** Line breaks are insignificant, but lines are at most 76 chars 16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding 17** is done via a table. 18** Short binary data is filled (in ASCII) with '='. 19** hqx: 20** File starts with introductory text, real data starts and ends 21** with colons. 22** Data consists of three similar parts: info, datafork, resourcefork. 23** Each part is protected (at the end) with a 16-bit crc 24** The binary data is run-length encoded, and then ascii-fied: 25** binary: 01234567 abcdefgh ijklmnop 26** ascii: 012345 67abcd efghij klmnop 27** ASCII encoding is table-driven, see the code. 28** Short binary data results in the runt ascii-byte being output with 29** the bits in the right place. 30** 31** While I was reading dozens of programs that encode or decode the formats 32** here (documentation? hihi:-) I have formulated Jansen's Observation: 33** 34** Programs that encode binary data in ASCII are written in 35** such a style that they are as unreadable as possible. Devices used 36** include unnecessary global variables, burying important tables 37** in unrelated sourcefiles, putting functions in include files, 38** using seemingly-descriptive variable names for different purposes, 39** calls to empty subroutines and a host of others. 40** 41** I have attempted to break with this tradition, but I guess that that 42** does make the performance sub-optimal. Oh well, too bad... 43** 44** Jack Jansen, CWI, July 1995. 45** 46** Added support for quoted-printable encoding, based on rfc 1521 et al 47** quoted-printable encoding specifies that non printable characters (anything 48** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value 49** of the character. It also specifies some other behavior to enable 8bit data 50** in a mail message with little difficulty (maximum line sizes, protecting 51** some cases of whitespace, etc). 52** 53** Brandon Long, September 2001. 54*/ 55 56#ifndef Py_BUILD_CORE_BUILTIN 57# define Py_BUILD_CORE_MODULE 1 58#endif 59 60#define PY_SSIZE_T_CLEAN 61 62#include "Python.h" 63#include "pycore_long.h" // _PyLong_DigitValue 64#include "pycore_strhex.h" // _Py_strhex_bytes_with_sep() 65#ifdef USE_ZLIB_CRC32 66# include "zlib.h" 67#endif 68 69typedef struct binascii_state { 70 PyObject *Error; 71 PyObject *Incomplete; 72} binascii_state; 73 74static inline binascii_state * 75get_binascii_state(PyObject *module) 76{ 77 return (binascii_state *)PyModule_GetState(module); 78} 79 80 81static const unsigned char table_a2b_base64[] = { 82 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 83 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 84 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, 85 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */ 86 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 87 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, 88 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 89 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1, 90 91 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 92 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 93 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 94 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 95 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 96 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 97 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 98 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 99}; 100 101#define BASE64_PAD '=' 102 103/* Max binary chunk size; limited only by available memory */ 104#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2) 105 106static const unsigned char table_b2a_base64[] = 107"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 108 109 110static const unsigned short crctab_hqx[256] = { 111 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, 112 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, 113 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6, 114 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de, 115 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485, 116 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d, 117 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4, 118 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc, 119 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823, 120 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b, 121 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12, 122 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a, 123 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41, 124 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49, 125 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70, 126 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78, 127 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f, 128 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067, 129 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e, 130 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256, 131 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d, 132 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, 133 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c, 134 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634, 135 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab, 136 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3, 137 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a, 138 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92, 139 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9, 140 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1, 141 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8, 142 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0, 143}; 144 145/*[clinic input] 146module binascii 147[clinic start generated code]*/ 148/*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/ 149 150/*[python input] 151 152class ascii_buffer_converter(CConverter): 153 type = 'Py_buffer' 154 converter = 'ascii_buffer_converter' 155 impl_by_reference = True 156 c_default = "{NULL, NULL}" 157 158 def cleanup(self): 159 name = self.name 160 return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"]) 161 162[python start generated code]*/ 163/*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/ 164 165static int 166ascii_buffer_converter(PyObject *arg, Py_buffer *buf) 167{ 168 if (arg == NULL) { 169 PyBuffer_Release(buf); 170 return 1; 171 } 172 if (PyUnicode_Check(arg)) { 173 if (PyUnicode_READY(arg) < 0) 174 return 0; 175 if (!PyUnicode_IS_ASCII(arg)) { 176 PyErr_SetString(PyExc_ValueError, 177 "string argument should contain only ASCII characters"); 178 return 0; 179 } 180 assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND); 181 buf->buf = (void *) PyUnicode_1BYTE_DATA(arg); 182 buf->len = PyUnicode_GET_LENGTH(arg); 183 buf->obj = NULL; 184 return 1; 185 } 186 if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) { 187 PyErr_Format(PyExc_TypeError, 188 "argument should be bytes, buffer or ASCII string, " 189 "not '%.100s'", Py_TYPE(arg)->tp_name); 190 return 0; 191 } 192 if (!PyBuffer_IsContiguous(buf, 'C')) { 193 PyErr_Format(PyExc_TypeError, 194 "argument should be a contiguous buffer, " 195 "not '%.100s'", Py_TYPE(arg)->tp_name); 196 PyBuffer_Release(buf); 197 return 0; 198 } 199 return Py_CLEANUP_SUPPORTED; 200} 201 202#include "clinic/binascii.c.h" 203 204/*[clinic input] 205binascii.a2b_uu 206 207 data: ascii_buffer 208 / 209 210Decode a line of uuencoded data. 211[clinic start generated code]*/ 212 213static PyObject * 214binascii_a2b_uu_impl(PyObject *module, Py_buffer *data) 215/*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/ 216{ 217 const unsigned char *ascii_data; 218 unsigned char *bin_data; 219 int leftbits = 0; 220 unsigned char this_ch; 221 unsigned int leftchar = 0; 222 PyObject *rv; 223 Py_ssize_t ascii_len, bin_len; 224 binascii_state *state; 225 226 ascii_data = data->buf; 227 ascii_len = data->len; 228 229 assert(ascii_len >= 0); 230 231 /* First byte: binary data length (in bytes) */ 232 bin_len = (*ascii_data++ - ' ') & 077; 233 ascii_len--; 234 235 /* Allocate the buffer */ 236 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) 237 return NULL; 238 bin_data = (unsigned char *)PyBytes_AS_STRING(rv); 239 240 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) { 241 /* XXX is it really best to add NULs if there's no more data */ 242 this_ch = (ascii_len > 0) ? *ascii_data : 0; 243 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) { 244 /* 245 ** Whitespace. Assume some spaces got eaten at 246 ** end-of-line. (We check this later) 247 */ 248 this_ch = 0; 249 } else { 250 /* Check the character for legality 251 ** The 64 in stead of the expected 63 is because 252 ** there are a few uuencodes out there that use 253 ** '`' as zero instead of space. 254 */ 255 if ( this_ch < ' ' || this_ch > (' ' + 64)) { 256 state = get_binascii_state(module); 257 if (state == NULL) { 258 return NULL; 259 } 260 PyErr_SetString(state->Error, "Illegal char"); 261 Py_DECREF(rv); 262 return NULL; 263 } 264 this_ch = (this_ch - ' ') & 077; 265 } 266 /* 267 ** Shift it in on the low end, and see if there's 268 ** a byte ready for output. 269 */ 270 leftchar = (leftchar << 6) | (this_ch); 271 leftbits += 6; 272 if ( leftbits >= 8 ) { 273 leftbits -= 8; 274 *bin_data++ = (leftchar >> leftbits) & 0xff; 275 leftchar &= ((1 << leftbits) - 1); 276 bin_len--; 277 } 278 } 279 /* 280 ** Finally, check that if there's anything left on the line 281 ** that it's whitespace only. 282 */ 283 while( ascii_len-- > 0 ) { 284 this_ch = *ascii_data++; 285 /* Extra '`' may be written as padding in some cases */ 286 if ( this_ch != ' ' && this_ch != ' '+64 && 287 this_ch != '\n' && this_ch != '\r' ) { 288 state = get_binascii_state(module); 289 if (state == NULL) { 290 return NULL; 291 } 292 PyErr_SetString(state->Error, "Trailing garbage"); 293 Py_DECREF(rv); 294 return NULL; 295 } 296 } 297 return rv; 298} 299 300/*[clinic input] 301binascii.b2a_uu 302 303 data: Py_buffer 304 / 305 * 306 backtick: bool(accept={int}) = False 307 308Uuencode line of data. 309[clinic start generated code]*/ 310 311static PyObject * 312binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) 313/*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/ 314{ 315 unsigned char *ascii_data; 316 const unsigned char *bin_data; 317 int leftbits = 0; 318 unsigned char this_ch; 319 unsigned int leftchar = 0; 320 binascii_state *state; 321 Py_ssize_t bin_len, out_len; 322 _PyBytesWriter writer; 323 324 _PyBytesWriter_Init(&writer); 325 bin_data = data->buf; 326 bin_len = data->len; 327 if ( bin_len > 45 ) { 328 /* The 45 is a limit that appears in all uuencode's */ 329 state = get_binascii_state(module); 330 if (state == NULL) { 331 return NULL; 332 } 333 PyErr_SetString(state->Error, "At most 45 bytes at once"); 334 return NULL; 335 } 336 337 /* We're lazy and allocate to much (fixed up later) */ 338 out_len = 2 + (bin_len + 2) / 3 * 4; 339 ascii_data = _PyBytesWriter_Alloc(&writer, out_len); 340 if (ascii_data == NULL) 341 return NULL; 342 343 /* Store the length */ 344 if (backtick && !bin_len) 345 *ascii_data++ = '`'; 346 else 347 *ascii_data++ = ' ' + (unsigned char)bin_len; 348 349 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) { 350 /* Shift the data (or padding) into our buffer */ 351 if ( bin_len > 0 ) /* Data */ 352 leftchar = (leftchar << 8) | *bin_data; 353 else /* Padding */ 354 leftchar <<= 8; 355 leftbits += 8; 356 357 /* See if there are 6-bit groups ready */ 358 while ( leftbits >= 6 ) { 359 this_ch = (leftchar >> (leftbits-6)) & 0x3f; 360 leftbits -= 6; 361 if (backtick && !this_ch) 362 *ascii_data++ = '`'; 363 else 364 *ascii_data++ = this_ch + ' '; 365 } 366 } 367 *ascii_data++ = '\n'; /* Append a courtesy newline */ 368 369 return _PyBytesWriter_Finish(&writer, ascii_data); 370} 371 372/*[clinic input] 373binascii.a2b_base64 374 375 data: ascii_buffer 376 / 377 * 378 strict_mode: bool(accept={int}) = False 379 380Decode a line of base64 data. 381 382 strict_mode 383 When set to True, bytes that are not part of the base64 standard are not allowed. 384 The same applies to excess data after padding (= / ==). 385[clinic start generated code]*/ 386 387static PyObject * 388binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode) 389/*[clinic end generated code: output=5409557788d4f975 input=3a30c4e3528317c6]*/ 390{ 391 assert(data->len >= 0); 392 393 const unsigned char *ascii_data = data->buf; 394 size_t ascii_len = data->len; 395 binascii_state *state = NULL; 396 char padding_started = 0; 397 398 /* Allocate the buffer */ 399 Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */ 400 _PyBytesWriter writer; 401 _PyBytesWriter_Init(&writer); 402 unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len); 403 if (bin_data == NULL) 404 return NULL; 405 unsigned char *bin_data_start = bin_data; 406 407 if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') { 408 state = get_binascii_state(module); 409 if (state) { 410 PyErr_SetString(state->Error, "Leading padding not allowed"); 411 } 412 goto error_end; 413 } 414 415 int quad_pos = 0; 416 unsigned char leftchar = 0; 417 int pads = 0; 418 for (size_t i = 0; i < ascii_len; i++) { 419 unsigned char this_ch = ascii_data[i]; 420 421 /* Check for pad sequences and ignore 422 ** the invalid ones. 423 */ 424 if (this_ch == BASE64_PAD) { 425 padding_started = 1; 426 427 if (quad_pos >= 2 && quad_pos + ++pads >= 4) { 428 /* A pad sequence means we should not parse more input. 429 ** We've already interpreted the data from the quad at this point. 430 ** in strict mode, an error should raise if there's excess data after the padding. 431 */ 432 if (strict_mode && i + 1 < ascii_len) { 433 state = get_binascii_state(module); 434 if (state) { 435 PyErr_SetString(state->Error, "Excess data after padding"); 436 } 437 goto error_end; 438 } 439 440 goto done; 441 } 442 continue; 443 } 444 445 this_ch = table_a2b_base64[this_ch]; 446 if (this_ch >= 64) { 447 if (strict_mode) { 448 state = get_binascii_state(module); 449 if (state) { 450 PyErr_SetString(state->Error, "Only base64 data is allowed"); 451 } 452 goto error_end; 453 } 454 continue; 455 } 456 457 // Characters that are not '=', in the middle of the padding, are not allowed 458 if (strict_mode && padding_started) { 459 state = get_binascii_state(module); 460 if (state) { 461 PyErr_SetString(state->Error, "Discontinuous padding not allowed"); 462 } 463 goto error_end; 464 } 465 pads = 0; 466 467 switch (quad_pos) { 468 case 0: 469 quad_pos = 1; 470 leftchar = this_ch; 471 break; 472 case 1: 473 quad_pos = 2; 474 *bin_data++ = (leftchar << 2) | (this_ch >> 4); 475 leftchar = this_ch & 0x0f; 476 break; 477 case 2: 478 quad_pos = 3; 479 *bin_data++ = (leftchar << 4) | (this_ch >> 2); 480 leftchar = this_ch & 0x03; 481 break; 482 case 3: 483 quad_pos = 0; 484 *bin_data++ = (leftchar << 6) | (this_ch); 485 leftchar = 0; 486 break; 487 } 488 } 489 490 if (quad_pos != 0) { 491 state = get_binascii_state(module); 492 if (state == NULL) { 493 /* error already set, from get_binascii_state */ 494 } else if (quad_pos == 1) { 495 /* 496 ** There is exactly one extra valid, non-padding, base64 character. 497 ** This is an invalid length, as there is no possible input that 498 ** could encoded into such a base64 string. 499 */ 500 PyErr_Format(state->Error, 501 "Invalid base64-encoded string: " 502 "number of data characters (%zd) cannot be 1 more " 503 "than a multiple of 4", 504 (bin_data - bin_data_start) / 3 * 4 + 1); 505 } else { 506 PyErr_SetString(state->Error, "Incorrect padding"); 507 } 508 error_end: 509 _PyBytesWriter_Dealloc(&writer); 510 return NULL; 511 } 512 513done: 514 return _PyBytesWriter_Finish(&writer, bin_data); 515} 516 517 518/*[clinic input] 519binascii.b2a_base64 520 521 data: Py_buffer 522 / 523 * 524 newline: bool(accept={int}) = True 525 526Base64-code line of data. 527[clinic start generated code]*/ 528 529static PyObject * 530binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) 531/*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/ 532{ 533 unsigned char *ascii_data; 534 const unsigned char *bin_data; 535 int leftbits = 0; 536 unsigned char this_ch; 537 unsigned int leftchar = 0; 538 Py_ssize_t bin_len, out_len; 539 _PyBytesWriter writer; 540 binascii_state *state; 541 542 bin_data = data->buf; 543 bin_len = data->len; 544 _PyBytesWriter_Init(&writer); 545 546 assert(bin_len >= 0); 547 548 if ( bin_len > BASE64_MAXBIN ) { 549 state = get_binascii_state(module); 550 if (state == NULL) { 551 return NULL; 552 } 553 PyErr_SetString(state->Error, "Too much data for base64 line"); 554 return NULL; 555 } 556 557 /* We're lazy and allocate too much (fixed up later). 558 "+2" leaves room for up to two pad characters. 559 Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */ 560 out_len = bin_len*2 + 2; 561 if (newline) 562 out_len++; 563 ascii_data = _PyBytesWriter_Alloc(&writer, out_len); 564 if (ascii_data == NULL) 565 return NULL; 566 567 for( ; bin_len > 0 ; bin_len--, bin_data++ ) { 568 /* Shift the data into our buffer */ 569 leftchar = (leftchar << 8) | *bin_data; 570 leftbits += 8; 571 572 /* See if there are 6-bit groups ready */ 573 while ( leftbits >= 6 ) { 574 this_ch = (leftchar >> (leftbits-6)) & 0x3f; 575 leftbits -= 6; 576 *ascii_data++ = table_b2a_base64[this_ch]; 577 } 578 } 579 if ( leftbits == 2 ) { 580 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4]; 581 *ascii_data++ = BASE64_PAD; 582 *ascii_data++ = BASE64_PAD; 583 } else if ( leftbits == 4 ) { 584 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2]; 585 *ascii_data++ = BASE64_PAD; 586 } 587 if (newline) 588 *ascii_data++ = '\n'; /* Append a courtesy newline */ 589 590 return _PyBytesWriter_Finish(&writer, ascii_data); 591} 592 593 594/*[clinic input] 595binascii.crc_hqx 596 597 data: Py_buffer 598 crc: unsigned_int(bitwise=True) 599 / 600 601Compute CRC-CCITT incrementally. 602[clinic start generated code]*/ 603 604static PyObject * 605binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc) 606/*[clinic end generated code: output=2fde213d0f547a98 input=56237755370a951c]*/ 607{ 608 const unsigned char *bin_data; 609 Py_ssize_t len; 610 611 crc &= 0xffff; 612 bin_data = data->buf; 613 len = data->len; 614 615 while(len-- > 0) { 616 crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++]; 617 } 618 619 return PyLong_FromUnsignedLong(crc); 620} 621 622#ifndef USE_ZLIB_CRC32 623/* Crc - 32 BIT ANSI X3.66 CRC checksum files 624 Also known as: ISO 3307 625**********************************************************************| 626* *| 627* Demonstration program to compute the 32-bit CRC used as the frame *| 628* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *| 629* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *| 630* protocol). The 32-bit FCS was added via the Federal Register, *| 631* 1 June 1982, p.23798. I presume but don't know for certain that *| 632* this polynomial is or will be included in CCITT V.41, which *| 633* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *| 634* PUB 78 says that the 32-bit FCS reduces otherwise undetected *| 635* errors by a factor of 10^-5 over 16-bit FCS. *| 636* *| 637**********************************************************************| 638 639 Copyright (C) 1986 Gary S. Brown. You may use this program, or 640 code or tables extracted from it, as desired without restriction. 641 642 First, the polynomial itself and its table of feedback terms. The 643 polynomial is 644 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0 645 Note that we take it "backwards" and put the highest-order term in 646 the lowest-order bit. The X^32 term is "implied"; the LSB is the 647 X^31 term, etc. The X^0 term (usually shown as "+1") results in 648 the MSB being 1. 649 650 Note that the usual hardware shift register implementation, which 651 is what we're using (we're merely optimizing it by doing eight-bit 652 chunks at a time) shifts bits into the lowest-order term. In our 653 implementation, that means shifting towards the right. Why do we 654 do it this way? Because the calculated CRC must be transmitted in 655 order from highest-order term to lowest-order term. UARTs transmit 656 characters in order from LSB to MSB. By storing the CRC this way, 657 we hand it to the UART in the order low-byte to high-byte; the UART 658 sends each low-bit to hight-bit; and the result is transmission bit 659 by bit from highest- to lowest-order term without requiring any bit 660 shuffling on our part. Reception works similarly. 661 662 The feedback terms table consists of 256, 32-bit entries. Notes: 663 664 1. The table can be generated at runtime if desired; code to do so 665 is shown later. It might not be obvious, but the feedback 666 terms simply represent the results of eight shift/xor opera- 667 tions for all combinations of data and CRC register values. 668 669 2. The CRC accumulation logic is the same for all CRC polynomials, 670 be they sixteen or thirty-two bits wide. You simply choose the 671 appropriate table. Alternatively, because the table can be 672 generated at runtime, you can start by generating the table for 673 the polynomial in question and use exactly the same "updcrc", 674 if your application needn't simultaneously handle two CRC 675 polynomials. (Note, however, that XMODEM is strange.) 676 677 3. For 16-bit CRCs, the table entries need be only 16 bits wide; 678 of course, 32-bit entries work OK if the high 16 bits are zero. 679 680 4. The values must be right-shifted by eight bits by the "updcrc" 681 logic; the shift must be unsigned (bring in zeroes). On some 682 hardware you could probably optimize the shift in assembler by 683 using byte-swap instructions. 684********************************************************************/ 685 686static const unsigned int crc_32_tab[256] = { 6870x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U, 6880x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U, 6890xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U, 6900x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU, 6910x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U, 6920x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U, 6930xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U, 6940xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU, 6950x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U, 6960x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU, 6970xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U, 6980xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U, 6990x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U, 7000x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU, 7010x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU, 7020xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U, 7030x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU, 7040x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U, 7050x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U, 7060xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U, 7070x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU, 7080x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U, 7090xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U, 7100xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU, 7110x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U, 7120x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U, 7130x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U, 7140x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U, 7150xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U, 7160x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU, 7170x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU, 7180x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U, 7190xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U, 7200xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU, 7210x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU, 7220x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U, 7230xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU, 7240xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U, 7250x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU, 7260x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U, 7270x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU, 7280xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U, 7290x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U, 7300x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU, 7310x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U, 7320xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U, 7330x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U, 7340x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U, 7350xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U, 7360xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U, 7370x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU, 7380x2d02ef8dU 739}; 740 741static unsigned int 742internal_crc32(const unsigned char *bin_data, Py_ssize_t len, unsigned int crc) 743{ /* By Jim Ahlstrom; All rights transferred to CNRI */ 744 unsigned int result; 745 746 crc = ~ crc; 747 while (len-- > 0) { 748 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8); 749 /* Note: (crc >> 8) MUST zero fill on left */ 750 } 751 752 result = (crc ^ 0xFFFFFFFF); 753 return result & 0xffffffff; 754} 755#endif /* USE_ZLIB_CRC32 */ 756 757/*[clinic input] 758binascii.crc32 -> unsigned_int 759 760 data: Py_buffer 761 crc: unsigned_int(bitwise=True) = 0 762 / 763 764Compute CRC-32 incrementally. 765[clinic start generated code]*/ 766 767static unsigned int 768binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc) 769/*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/ 770 771#ifdef USE_ZLIB_CRC32 772/* This is the same as zlibmodule.c zlib_crc32_impl. It exists in two 773 * modules for historical reasons. */ 774{ 775 /* Releasing the GIL for very small buffers is inefficient 776 and may lower performance */ 777 if (data->len > 1024*5) { 778 unsigned char *buf = data->buf; 779 Py_ssize_t len = data->len; 780 781 Py_BEGIN_ALLOW_THREADS 782 /* Avoid truncation of length for very large buffers. crc32() takes 783 length as an unsigned int, which may be narrower than Py_ssize_t. */ 784 while ((size_t)len > UINT_MAX) { 785 crc = crc32(crc, buf, UINT_MAX); 786 buf += (size_t) UINT_MAX; 787 len -= (size_t) UINT_MAX; 788 } 789 crc = crc32(crc, buf, (unsigned int)len); 790 Py_END_ALLOW_THREADS 791 } else { 792 crc = crc32(crc, data->buf, (unsigned int)data->len); 793 } 794 return crc & 0xffffffff; 795} 796#else /* USE_ZLIB_CRC32 */ 797{ 798 const unsigned char *bin_data = data->buf; 799 Py_ssize_t len = data->len; 800 801 /* Releasing the GIL for very small buffers is inefficient 802 and may lower performance */ 803 if (len > 1024*5) { 804 unsigned int result; 805 Py_BEGIN_ALLOW_THREADS 806 result = internal_crc32(bin_data, len, crc); 807 Py_END_ALLOW_THREADS 808 return result; 809 } else { 810 return internal_crc32(bin_data, len, crc); 811 } 812} 813#endif /* USE_ZLIB_CRC32 */ 814 815/*[clinic input] 816binascii.b2a_hex 817 818 data: Py_buffer 819 sep: object = NULL 820 An optional single character or byte to separate hex bytes. 821 bytes_per_sep: int = 1 822 How many bytes between separators. Positive values count from the 823 right, negative values count from the left. 824 825Hexadecimal representation of binary data. 826 827The return value is a bytes object. This function is also 828available as "hexlify()". 829 830Example: 831>>> binascii.b2a_hex(b'\xb9\x01\xef') 832b'b901ef' 833>>> binascii.hexlify(b'\xb9\x01\xef', ':') 834b'b9:01:ef' 835>>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2) 836b'b9_01ef' 837[clinic start generated code]*/ 838 839static PyObject * 840binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep, 841 int bytes_per_sep) 842/*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/ 843{ 844 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len, 845 sep, bytes_per_sep); 846} 847 848/*[clinic input] 849binascii.hexlify = binascii.b2a_hex 850 851Hexadecimal representation of binary data. 852 853The return value is a bytes object. This function is also 854available as "b2a_hex()". 855[clinic start generated code]*/ 856 857static PyObject * 858binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep, 859 int bytes_per_sep) 860/*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/ 861{ 862 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len, 863 sep, bytes_per_sep); 864} 865 866/*[clinic input] 867binascii.a2b_hex 868 869 hexstr: ascii_buffer 870 / 871 872Binary data of hexadecimal representation. 873 874hexstr must contain an even number of hex digits (upper or lower case). 875This function is also available as "unhexlify()". 876[clinic start generated code]*/ 877 878static PyObject * 879binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr) 880/*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/ 881{ 882 const char* argbuf; 883 Py_ssize_t arglen; 884 PyObject *retval; 885 char* retbuf; 886 Py_ssize_t i, j; 887 binascii_state *state; 888 889 argbuf = hexstr->buf; 890 arglen = hexstr->len; 891 892 assert(arglen >= 0); 893 894 /* XXX What should we do about strings with an odd length? Should 895 * we add an implicit leading zero, or a trailing zero? For now, 896 * raise an exception. 897 */ 898 if (arglen % 2) { 899 state = get_binascii_state(module); 900 if (state == NULL) { 901 return NULL; 902 } 903 PyErr_SetString(state->Error, "Odd-length string"); 904 return NULL; 905 } 906 907 retval = PyBytes_FromStringAndSize(NULL, (arglen/2)); 908 if (!retval) 909 return NULL; 910 retbuf = PyBytes_AS_STRING(retval); 911 912 for (i=j=0; i < arglen; i += 2) { 913 unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])]; 914 unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])]; 915 if (top >= 16 || bot >= 16) { 916 state = get_binascii_state(module); 917 if (state == NULL) { 918 return NULL; 919 } 920 PyErr_SetString(state->Error, 921 "Non-hexadecimal digit found"); 922 goto finally; 923 } 924 retbuf[j++] = (top << 4) + bot; 925 } 926 return retval; 927 928 finally: 929 Py_DECREF(retval); 930 return NULL; 931} 932 933/*[clinic input] 934binascii.unhexlify = binascii.a2b_hex 935 936Binary data of hexadecimal representation. 937 938hexstr must contain an even number of hex digits (upper or lower case). 939[clinic start generated code]*/ 940 941static PyObject * 942binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr) 943/*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/ 944{ 945 return binascii_a2b_hex_impl(module, hexstr); 946} 947 948#define MAXLINESIZE 76 949 950 951/*[clinic input] 952binascii.a2b_qp 953 954 data: ascii_buffer 955 header: bool(accept={int}) = False 956 957Decode a string of qp-encoded data. 958[clinic start generated code]*/ 959 960static PyObject * 961binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header) 962/*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/ 963{ 964 Py_ssize_t in, out; 965 char ch; 966 const unsigned char *ascii_data; 967 unsigned char *odata; 968 Py_ssize_t datalen = 0; 969 PyObject *rv; 970 971 ascii_data = data->buf; 972 datalen = data->len; 973 974 /* We allocate the output same size as input, this is overkill. 975 */ 976 odata = (unsigned char *) PyMem_Calloc(1, datalen); 977 if (odata == NULL) { 978 PyErr_NoMemory(); 979 return NULL; 980 } 981 982 in = out = 0; 983 while (in < datalen) { 984 if (ascii_data[in] == '=') { 985 in++; 986 if (in >= datalen) break; 987 /* Soft line breaks */ 988 if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) { 989 if (ascii_data[in] != '\n') { 990 while (in < datalen && ascii_data[in] != '\n') in++; 991 } 992 if (in < datalen) in++; 993 } 994 else if (ascii_data[in] == '=') { 995 /* broken case from broken python qp */ 996 odata[out++] = '='; 997 in++; 998 } 999 else if ((in + 1 < datalen) && 1000 ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') || 1001 (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') || 1002 (ascii_data[in] >= '0' && ascii_data[in] <= '9')) && 1003 ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') || 1004 (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') || 1005 (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) { 1006 /* hexval */ 1007 ch = _PyLong_DigitValue[ascii_data[in]] << 4; 1008 in++; 1009 ch |= _PyLong_DigitValue[ascii_data[in]]; 1010 in++; 1011 odata[out++] = ch; 1012 } 1013 else { 1014 odata[out++] = '='; 1015 } 1016 } 1017 else if (header && ascii_data[in] == '_') { 1018 odata[out++] = ' '; 1019 in++; 1020 } 1021 else { 1022 odata[out] = ascii_data[in]; 1023 in++; 1024 out++; 1025 } 1026 } 1027 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) { 1028 PyMem_Free(odata); 1029 return NULL; 1030 } 1031 PyMem_Free(odata); 1032 return rv; 1033} 1034 1035static int 1036to_hex (unsigned char ch, unsigned char *s) 1037{ 1038 unsigned int uvalue = ch; 1039 1040 s[1] = "0123456789ABCDEF"[uvalue % 16]; 1041 uvalue = (uvalue / 16); 1042 s[0] = "0123456789ABCDEF"[uvalue % 16]; 1043 return 0; 1044} 1045 1046/* XXX: This is ridiculously complicated to be backward compatible 1047 * (mostly) with the quopri module. It doesn't re-create the quopri 1048 * module bug where text ending in CRLF has the CR encoded */ 1049 1050/*[clinic input] 1051binascii.b2a_qp 1052 1053 data: Py_buffer 1054 quotetabs: bool(accept={int}) = False 1055 istext: bool(accept={int}) = True 1056 header: bool(accept={int}) = False 1057 1058Encode a string using quoted-printable encoding. 1059 1060On encoding, when istext is set, newlines are not encoded, and white 1061space at end of lines is. When istext is not set, \r and \n (CR/LF) 1062are both encoded. When quotetabs is set, space and tabs are encoded. 1063[clinic start generated code]*/ 1064 1065static PyObject * 1066binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs, 1067 int istext, int header) 1068/*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/ 1069{ 1070 Py_ssize_t in, out; 1071 const unsigned char *databuf; 1072 unsigned char *odata; 1073 Py_ssize_t datalen = 0, odatalen = 0; 1074 PyObject *rv; 1075 unsigned int linelen = 0; 1076 unsigned char ch; 1077 int crlf = 0; 1078 const unsigned char *p; 1079 1080 databuf = data->buf; 1081 datalen = data->len; 1082 1083 /* See if this string is using CRLF line ends */ 1084 /* XXX: this function has the side effect of converting all of 1085 * the end of lines to be the same depending on this detection 1086 * here */ 1087 p = (const unsigned char *) memchr(databuf, '\n', datalen); 1088 if ((p != NULL) && (p > databuf) && (*(p-1) == '\r')) 1089 crlf = 1; 1090 1091 /* First, scan to see how many characters need to be encoded */ 1092 in = 0; 1093 while (in < datalen) { 1094 Py_ssize_t delta = 0; 1095 if ((databuf[in] > 126) || 1096 (databuf[in] == '=') || 1097 (header && databuf[in] == '_') || 1098 ((databuf[in] == '.') && (linelen == 0) && 1099 (in + 1 == datalen || databuf[in+1] == '\n' || 1100 databuf[in+1] == '\r' || databuf[in+1] == 0)) || 1101 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) || 1102 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) || 1103 ((databuf[in] < 33) && 1104 (databuf[in] != '\r') && (databuf[in] != '\n') && 1105 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' '))))) 1106 { 1107 if ((linelen + 3) >= MAXLINESIZE) { 1108 linelen = 0; 1109 if (crlf) 1110 delta += 3; 1111 else 1112 delta += 2; 1113 } 1114 linelen += 3; 1115 delta += 3; 1116 in++; 1117 } 1118 else { 1119 if (istext && 1120 ((databuf[in] == '\n') || 1121 ((in+1 < datalen) && (databuf[in] == '\r') && 1122 (databuf[in+1] == '\n')))) 1123 { 1124 linelen = 0; 1125 /* Protect against whitespace on end of line */ 1126 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t'))) 1127 delta += 2; 1128 if (crlf) 1129 delta += 2; 1130 else 1131 delta += 1; 1132 if (databuf[in] == '\r') 1133 in += 2; 1134 else 1135 in++; 1136 } 1137 else { 1138 if ((in + 1 != datalen) && 1139 (databuf[in+1] != '\n') && 1140 (linelen + 1) >= MAXLINESIZE) { 1141 linelen = 0; 1142 if (crlf) 1143 delta += 3; 1144 else 1145 delta += 2; 1146 } 1147 linelen++; 1148 delta++; 1149 in++; 1150 } 1151 } 1152 if (PY_SSIZE_T_MAX - delta < odatalen) { 1153 PyErr_NoMemory(); 1154 return NULL; 1155 } 1156 odatalen += delta; 1157 } 1158 1159 /* We allocate the output same size as input, this is overkill. 1160 */ 1161 odata = (unsigned char *) PyMem_Calloc(1, odatalen); 1162 if (odata == NULL) { 1163 PyErr_NoMemory(); 1164 return NULL; 1165 } 1166 1167 in = out = linelen = 0; 1168 while (in < datalen) { 1169 if ((databuf[in] > 126) || 1170 (databuf[in] == '=') || 1171 (header && databuf[in] == '_') || 1172 ((databuf[in] == '.') && (linelen == 0) && 1173 (in + 1 == datalen || databuf[in+1] == '\n' || 1174 databuf[in+1] == '\r' || databuf[in+1] == 0)) || 1175 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) || 1176 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) || 1177 ((databuf[in] < 33) && 1178 (databuf[in] != '\r') && (databuf[in] != '\n') && 1179 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' '))))) 1180 { 1181 if ((linelen + 3 )>= MAXLINESIZE) { 1182 odata[out++] = '='; 1183 if (crlf) odata[out++] = '\r'; 1184 odata[out++] = '\n'; 1185 linelen = 0; 1186 } 1187 odata[out++] = '='; 1188 to_hex(databuf[in], &odata[out]); 1189 out += 2; 1190 in++; 1191 linelen += 3; 1192 } 1193 else { 1194 if (istext && 1195 ((databuf[in] == '\n') || 1196 ((in+1 < datalen) && (databuf[in] == '\r') && 1197 (databuf[in+1] == '\n')))) 1198 { 1199 linelen = 0; 1200 /* Protect against whitespace on end of line */ 1201 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) { 1202 ch = odata[out-1]; 1203 odata[out-1] = '='; 1204 to_hex(ch, &odata[out]); 1205 out += 2; 1206 } 1207 1208 if (crlf) odata[out++] = '\r'; 1209 odata[out++] = '\n'; 1210 if (databuf[in] == '\r') 1211 in += 2; 1212 else 1213 in++; 1214 } 1215 else { 1216 if ((in + 1 != datalen) && 1217 (databuf[in+1] != '\n') && 1218 (linelen + 1) >= MAXLINESIZE) { 1219 odata[out++] = '='; 1220 if (crlf) odata[out++] = '\r'; 1221 odata[out++] = '\n'; 1222 linelen = 0; 1223 } 1224 linelen++; 1225 if (header && databuf[in] == ' ') { 1226 odata[out++] = '_'; 1227 in++; 1228 } 1229 else { 1230 odata[out++] = databuf[in++]; 1231 } 1232 } 1233 } 1234 } 1235 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) { 1236 PyMem_Free(odata); 1237 return NULL; 1238 } 1239 PyMem_Free(odata); 1240 return rv; 1241} 1242 1243/* List of functions defined in the module */ 1244 1245static struct PyMethodDef binascii_module_methods[] = { 1246 BINASCII_A2B_UU_METHODDEF 1247 BINASCII_B2A_UU_METHODDEF 1248 BINASCII_A2B_BASE64_METHODDEF 1249 BINASCII_B2A_BASE64_METHODDEF 1250 BINASCII_A2B_HEX_METHODDEF 1251 BINASCII_B2A_HEX_METHODDEF 1252 BINASCII_HEXLIFY_METHODDEF 1253 BINASCII_UNHEXLIFY_METHODDEF 1254 BINASCII_CRC_HQX_METHODDEF 1255 BINASCII_CRC32_METHODDEF 1256 BINASCII_A2B_QP_METHODDEF 1257 BINASCII_B2A_QP_METHODDEF 1258 {NULL, NULL} /* sentinel */ 1259}; 1260 1261 1262/* Initialization function for the module (*must* be called PyInit_binascii) */ 1263PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII"); 1264 1265static int 1266binascii_exec(PyObject *module) { 1267 int result; 1268 binascii_state *state = PyModule_GetState(module); 1269 if (state == NULL) { 1270 return -1; 1271 } 1272 1273 state->Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL); 1274 if (state->Error == NULL) { 1275 return -1; 1276 } 1277 Py_INCREF(state->Error); 1278 result = PyModule_AddObject(module, "Error", state->Error); 1279 if (result == -1) { 1280 Py_DECREF(state->Error); 1281 return -1; 1282 } 1283 1284 state->Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL); 1285 if (state->Incomplete == NULL) { 1286 return -1; 1287 } 1288 Py_INCREF(state->Incomplete); 1289 result = PyModule_AddObject(module, "Incomplete", state->Incomplete); 1290 if (result == -1) { 1291 Py_DECREF(state->Incomplete); 1292 return -1; 1293 } 1294 1295 return 0; 1296} 1297 1298static PyModuleDef_Slot binascii_slots[] = { 1299 {Py_mod_exec, binascii_exec}, 1300 {0, NULL} 1301}; 1302 1303static int 1304binascii_traverse(PyObject *module, visitproc visit, void *arg) 1305{ 1306 binascii_state *state = get_binascii_state(module); 1307 Py_VISIT(state->Error); 1308 Py_VISIT(state->Incomplete); 1309 return 0; 1310} 1311 1312static int 1313binascii_clear(PyObject *module) 1314{ 1315 binascii_state *state = get_binascii_state(module); 1316 Py_CLEAR(state->Error); 1317 Py_CLEAR(state->Incomplete); 1318 return 0; 1319} 1320 1321static void 1322binascii_free(void *module) 1323{ 1324 binascii_clear((PyObject *)module); 1325} 1326 1327static struct PyModuleDef binasciimodule = { 1328 PyModuleDef_HEAD_INIT, 1329 "binascii", 1330 doc_binascii, 1331 sizeof(binascii_state), 1332 binascii_module_methods, 1333 binascii_slots, 1334 binascii_traverse, 1335 binascii_clear, 1336 binascii_free 1337}; 1338 1339PyMODINIT_FUNC 1340PyInit_binascii(void) 1341{ 1342 return PyModuleDef_Init(&binasciimodule); 1343} 1344