1/* Format bytes as hexadecimal */ 2 3#include "Python.h" 4#include "pycore_strhex.h" // _Py_strhex_with_sep() 5#include <stdlib.h> // abs() 6 7static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen, 8 PyObject* sep, int bytes_per_sep_group, 9 const int return_bytes) 10{ 11 assert(arglen >= 0); 12 13 Py_UCS1 sep_char = 0; 14 if (sep) { 15 Py_ssize_t seplen = PyObject_Length((PyObject*)sep); 16 if (seplen < 0) { 17 return NULL; 18 } 19 if (seplen != 1) { 20 PyErr_SetString(PyExc_ValueError, "sep must be length 1."); 21 return NULL; 22 } 23 if (PyUnicode_Check(sep)) { 24 if (PyUnicode_READY(sep)) 25 return NULL; 26 if (PyUnicode_KIND(sep) != PyUnicode_1BYTE_KIND) { 27 PyErr_SetString(PyExc_ValueError, "sep must be ASCII."); 28 return NULL; 29 } 30 sep_char = PyUnicode_READ_CHAR(sep, 0); 31 } 32 else if (PyBytes_Check(sep)) { 33 sep_char = PyBytes_AS_STRING(sep)[0]; 34 } 35 else { 36 PyErr_SetString(PyExc_TypeError, "sep must be str or bytes."); 37 return NULL; 38 } 39 if (sep_char > 127 && !return_bytes) { 40 PyErr_SetString(PyExc_ValueError, "sep must be ASCII."); 41 return NULL; 42 } 43 } 44 else { 45 bytes_per_sep_group = 0; 46 } 47 48 unsigned int abs_bytes_per_sep = abs(bytes_per_sep_group); 49 Py_ssize_t resultlen = 0; 50 if (bytes_per_sep_group && arglen > 0) { 51 /* How many sep characters we'll be inserting. */ 52 resultlen = (arglen - 1) / abs_bytes_per_sep; 53 } 54 /* Bounds checking for our Py_ssize_t indices. */ 55 if (arglen >= PY_SSIZE_T_MAX / 2 - resultlen) { 56 return PyErr_NoMemory(); 57 } 58 resultlen += arglen * 2; 59 60 if ((size_t)abs_bytes_per_sep >= (size_t)arglen) { 61 bytes_per_sep_group = 0; 62 abs_bytes_per_sep = 0; 63 } 64 65 PyObject *retval; 66 Py_UCS1 *retbuf; 67 if (return_bytes) { 68 /* If _PyBytes_FromSize() were public we could avoid malloc+copy. */ 69 retval = PyBytes_FromStringAndSize(NULL, resultlen); 70 if (!retval) { 71 return NULL; 72 } 73 retbuf = (Py_UCS1 *)PyBytes_AS_STRING(retval); 74 } 75 else { 76 retval = PyUnicode_New(resultlen, 127); 77 if (!retval) { 78 return NULL; 79 } 80 retbuf = PyUnicode_1BYTE_DATA(retval); 81 } 82 83 /* Hexlify */ 84 Py_ssize_t i, j; 85 unsigned char c; 86 87 if (bytes_per_sep_group == 0) { 88 for (i = j = 0; i < arglen; ++i) { 89 assert((j + 1) < resultlen); 90 c = argbuf[i]; 91 retbuf[j++] = Py_hexdigits[c >> 4]; 92 retbuf[j++] = Py_hexdigits[c & 0x0f]; 93 } 94 assert(j == resultlen); 95 } 96 else { 97 /* The number of complete chunk+sep periods */ 98 Py_ssize_t chunks = (arglen - 1) / abs_bytes_per_sep; 99 Py_ssize_t chunk; 100 unsigned int k; 101 102 if (bytes_per_sep_group < 0) { 103 i = j = 0; 104 for (chunk = 0; chunk < chunks; chunk++) { 105 for (k = 0; k < abs_bytes_per_sep; k++) { 106 c = argbuf[i++]; 107 retbuf[j++] = Py_hexdigits[c >> 4]; 108 retbuf[j++] = Py_hexdigits[c & 0x0f]; 109 } 110 retbuf[j++] = sep_char; 111 } 112 while (i < arglen) { 113 c = argbuf[i++]; 114 retbuf[j++] = Py_hexdigits[c >> 4]; 115 retbuf[j++] = Py_hexdigits[c & 0x0f]; 116 } 117 assert(j == resultlen); 118 } 119 else { 120 i = arglen - 1; 121 j = resultlen - 1; 122 for (chunk = 0; chunk < chunks; chunk++) { 123 for (k = 0; k < abs_bytes_per_sep; k++) { 124 c = argbuf[i--]; 125 retbuf[j--] = Py_hexdigits[c & 0x0f]; 126 retbuf[j--] = Py_hexdigits[c >> 4]; 127 } 128 retbuf[j--] = sep_char; 129 } 130 while (i >= 0) { 131 c = argbuf[i--]; 132 retbuf[j--] = Py_hexdigits[c & 0x0f]; 133 retbuf[j--] = Py_hexdigits[c >> 4]; 134 } 135 assert(j == -1); 136 } 137 } 138 139#ifdef Py_DEBUG 140 if (!return_bytes) { 141 assert(_PyUnicode_CheckConsistency(retval, 1)); 142 } 143#endif 144 145 return retval; 146} 147 148PyObject * _Py_strhex(const char* argbuf, const Py_ssize_t arglen) 149{ 150 return _Py_strhex_impl(argbuf, arglen, NULL, 0, 0); 151} 152 153/* Same as above but returns a bytes() instead of str() to avoid the 154 * need to decode the str() when bytes are needed. */ 155PyObject* _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen) 156{ 157 return _Py_strhex_impl(argbuf, arglen, NULL, 0, 1); 158} 159 160/* These variants include support for a separator between every N bytes: */ 161 162PyObject* _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen, 163 PyObject* sep, const int bytes_per_group) 164{ 165 return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 0); 166} 167 168/* Same as above but returns a bytes() instead of str() to avoid the 169 * need to decode the str() when bytes are needed. */ 170PyObject* _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen, 171 PyObject* sep, const int bytes_per_group) 172{ 173 return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 1); 174} 175