xref: /third_party/python/Python/pystrhex.c (revision 7db96d56)
1/* Format bytes as hexadecimal */
2
3#include "Python.h"
4#include "pycore_strhex.h"        // _Py_strhex_with_sep()
5#include <stdlib.h>               // abs()
6
7static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen,
8                                 PyObject* sep, int bytes_per_sep_group,
9                                 const int return_bytes)
10{
11    assert(arglen >= 0);
12
13    Py_UCS1 sep_char = 0;
14    if (sep) {
15        Py_ssize_t seplen = PyObject_Length((PyObject*)sep);
16        if (seplen < 0) {
17            return NULL;
18        }
19        if (seplen != 1) {
20            PyErr_SetString(PyExc_ValueError, "sep must be length 1.");
21            return NULL;
22        }
23        if (PyUnicode_Check(sep)) {
24            if (PyUnicode_READY(sep))
25                return NULL;
26            if (PyUnicode_KIND(sep) != PyUnicode_1BYTE_KIND) {
27                PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
28                return NULL;
29            }
30            sep_char = PyUnicode_READ_CHAR(sep, 0);
31        }
32        else if (PyBytes_Check(sep)) {
33            sep_char = PyBytes_AS_STRING(sep)[0];
34        }
35        else {
36            PyErr_SetString(PyExc_TypeError, "sep must be str or bytes.");
37            return NULL;
38        }
39        if (sep_char > 127 && !return_bytes) {
40            PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
41            return NULL;
42        }
43    }
44    else {
45        bytes_per_sep_group = 0;
46    }
47
48    unsigned int abs_bytes_per_sep = abs(bytes_per_sep_group);
49    Py_ssize_t resultlen = 0;
50    if (bytes_per_sep_group && arglen > 0) {
51        /* How many sep characters we'll be inserting. */
52        resultlen = (arglen - 1) / abs_bytes_per_sep;
53    }
54    /* Bounds checking for our Py_ssize_t indices. */
55    if (arglen >= PY_SSIZE_T_MAX / 2 - resultlen) {
56        return PyErr_NoMemory();
57    }
58    resultlen += arglen * 2;
59
60    if ((size_t)abs_bytes_per_sep >= (size_t)arglen) {
61        bytes_per_sep_group = 0;
62        abs_bytes_per_sep = 0;
63    }
64
65    PyObject *retval;
66    Py_UCS1 *retbuf;
67    if (return_bytes) {
68        /* If _PyBytes_FromSize() were public we could avoid malloc+copy. */
69        retval = PyBytes_FromStringAndSize(NULL, resultlen);
70        if (!retval) {
71            return NULL;
72        }
73        retbuf = (Py_UCS1 *)PyBytes_AS_STRING(retval);
74    }
75    else {
76        retval = PyUnicode_New(resultlen, 127);
77        if (!retval) {
78            return NULL;
79        }
80        retbuf = PyUnicode_1BYTE_DATA(retval);
81    }
82
83    /* Hexlify */
84    Py_ssize_t i, j;
85    unsigned char c;
86
87    if (bytes_per_sep_group == 0) {
88        for (i = j = 0; i < arglen; ++i) {
89            assert((j + 1) < resultlen);
90            c = argbuf[i];
91            retbuf[j++] = Py_hexdigits[c >> 4];
92            retbuf[j++] = Py_hexdigits[c & 0x0f];
93        }
94        assert(j == resultlen);
95    }
96    else {
97        /* The number of complete chunk+sep periods */
98        Py_ssize_t chunks = (arglen - 1) / abs_bytes_per_sep;
99        Py_ssize_t chunk;
100        unsigned int k;
101
102        if (bytes_per_sep_group < 0) {
103            i = j = 0;
104            for (chunk = 0; chunk < chunks; chunk++) {
105                for (k = 0; k < abs_bytes_per_sep; k++) {
106                    c = argbuf[i++];
107                    retbuf[j++] = Py_hexdigits[c >> 4];
108                    retbuf[j++] = Py_hexdigits[c & 0x0f];
109                }
110                retbuf[j++] = sep_char;
111            }
112            while (i < arglen) {
113                c = argbuf[i++];
114                retbuf[j++] = Py_hexdigits[c >> 4];
115                retbuf[j++] = Py_hexdigits[c & 0x0f];
116            }
117            assert(j == resultlen);
118        }
119        else {
120            i = arglen - 1;
121            j = resultlen - 1;
122            for (chunk = 0; chunk < chunks; chunk++) {
123                for (k = 0; k < abs_bytes_per_sep; k++) {
124                    c = argbuf[i--];
125                    retbuf[j--] = Py_hexdigits[c & 0x0f];
126                    retbuf[j--] = Py_hexdigits[c >> 4];
127                }
128                retbuf[j--] = sep_char;
129            }
130            while (i >= 0) {
131                c = argbuf[i--];
132                retbuf[j--] = Py_hexdigits[c & 0x0f];
133                retbuf[j--] = Py_hexdigits[c >> 4];
134            }
135            assert(j == -1);
136        }
137    }
138
139#ifdef Py_DEBUG
140    if (!return_bytes) {
141        assert(_PyUnicode_CheckConsistency(retval, 1));
142    }
143#endif
144
145    return retval;
146}
147
148PyObject * _Py_strhex(const char* argbuf, const Py_ssize_t arglen)
149{
150    return _Py_strhex_impl(argbuf, arglen, NULL, 0, 0);
151}
152
153/* Same as above but returns a bytes() instead of str() to avoid the
154 * need to decode the str() when bytes are needed. */
155PyObject* _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen)
156{
157    return _Py_strhex_impl(argbuf, arglen, NULL, 0, 1);
158}
159
160/* These variants include support for a separator between every N bytes: */
161
162PyObject* _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen,
163                              PyObject* sep, const int bytes_per_group)
164{
165    return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 0);
166}
167
168/* Same as above but returns a bytes() instead of str() to avoid the
169 * need to decode the str() when bytes are needed. */
170PyObject* _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen,
171                                    PyObject* sep, const int bytes_per_group)
172{
173    return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 1);
174}
175