1/* stringlib: bytes joining implementation */ 2 3#if STRINGLIB_IS_UNICODE 4#error join.h only compatible with byte-wise strings 5#endif 6 7Py_LOCAL_INLINE(PyObject *) 8STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) 9{ 10 const char *sepstr = STRINGLIB_STR(sep); 11 Py_ssize_t seplen = STRINGLIB_LEN(sep); 12 PyObject *res = NULL; 13 char *p; 14 Py_ssize_t seqlen = 0; 15 Py_ssize_t sz = 0; 16 Py_ssize_t i, nbufs; 17 PyObject *seq, *item; 18 Py_buffer *buffers = NULL; 19#define NB_STATIC_BUFFERS 10 20 Py_buffer static_buffers[NB_STATIC_BUFFERS]; 21#define GIL_THRESHOLD 1048576 22 int drop_gil = 1; 23 PyThreadState *save = NULL; 24 25 seq = PySequence_Fast(iterable, "can only join an iterable"); 26 if (seq == NULL) { 27 return NULL; 28 } 29 30 seqlen = PySequence_Fast_GET_SIZE(seq); 31 if (seqlen == 0) { 32 Py_DECREF(seq); 33 return STRINGLIB_NEW(NULL, 0); 34 } 35#if !STRINGLIB_MUTABLE 36 if (seqlen == 1) { 37 item = PySequence_Fast_GET_ITEM(seq, 0); 38 if (STRINGLIB_CHECK_EXACT(item)) { 39 Py_INCREF(item); 40 Py_DECREF(seq); 41 return item; 42 } 43 } 44#endif 45 if (seqlen > NB_STATIC_BUFFERS) { 46 buffers = PyMem_NEW(Py_buffer, seqlen); 47 if (buffers == NULL) { 48 Py_DECREF(seq); 49 PyErr_NoMemory(); 50 return NULL; 51 } 52 } 53 else { 54 buffers = static_buffers; 55 } 56 57 /* Here is the general case. Do a pre-pass to figure out the total 58 * amount of space we'll need (sz), and see whether all arguments are 59 * bytes-like. 60 */ 61 for (i = 0, nbufs = 0; i < seqlen; i++) { 62 Py_ssize_t itemlen; 63 item = PySequence_Fast_GET_ITEM(seq, i); 64 if (PyBytes_CheckExact(item)) { 65 /* Fast path. */ 66 Py_INCREF(item); 67 buffers[i].obj = item; 68 buffers[i].buf = PyBytes_AS_STRING(item); 69 buffers[i].len = PyBytes_GET_SIZE(item); 70 } 71 else { 72 if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) { 73 PyErr_Format(PyExc_TypeError, 74 "sequence item %zd: expected a bytes-like object, " 75 "%.80s found", 76 i, Py_TYPE(item)->tp_name); 77 goto error; 78 } 79 /* If the backing objects are mutable, then dropping the GIL 80 * opens up race conditions where another thread tries to modify 81 * the object which we hold a buffer on it. Such code has data 82 * races anyway, but this is a conservative approach that avoids 83 * changing the behaviour of that data race. 84 */ 85 drop_gil = 0; 86 } 87 nbufs = i + 1; /* for error cleanup */ 88 itemlen = buffers[i].len; 89 if (itemlen > PY_SSIZE_T_MAX - sz) { 90 PyErr_SetString(PyExc_OverflowError, 91 "join() result is too long"); 92 goto error; 93 } 94 sz += itemlen; 95 if (i != 0) { 96 if (seplen > PY_SSIZE_T_MAX - sz) { 97 PyErr_SetString(PyExc_OverflowError, 98 "join() result is too long"); 99 goto error; 100 } 101 sz += seplen; 102 } 103 if (seqlen != PySequence_Fast_GET_SIZE(seq)) { 104 PyErr_SetString(PyExc_RuntimeError, 105 "sequence changed size during iteration"); 106 goto error; 107 } 108 } 109 110 /* Allocate result space. */ 111 res = STRINGLIB_NEW(NULL, sz); 112 if (res == NULL) 113 goto error; 114 115 /* Catenate everything. */ 116 p = STRINGLIB_STR(res); 117 if (sz < GIL_THRESHOLD) { 118 drop_gil = 0; /* Benefits are likely outweighed by the overheads */ 119 } 120 if (drop_gil) { 121 save = PyEval_SaveThread(); 122 } 123 if (!seplen) { 124 /* fast path */ 125 for (i = 0; i < nbufs; i++) { 126 Py_ssize_t n = buffers[i].len; 127 char *q = buffers[i].buf; 128 memcpy(p, q, n); 129 p += n; 130 } 131 } 132 else { 133 for (i = 0; i < nbufs; i++) { 134 Py_ssize_t n; 135 char *q; 136 if (i) { 137 memcpy(p, sepstr, seplen); 138 p += seplen; 139 } 140 n = buffers[i].len; 141 q = buffers[i].buf; 142 memcpy(p, q, n); 143 p += n; 144 } 145 } 146 if (drop_gil) { 147 PyEval_RestoreThread(save); 148 } 149 goto done; 150 151error: 152 res = NULL; 153done: 154 Py_DECREF(seq); 155 for (i = 0; i < nbufs; i++) 156 PyBuffer_Release(&buffers[i]); 157 if (buffers != static_buffers) 158 PyMem_Free(buffers); 159 return res; 160} 161 162#undef NB_STATIC_BUFFERS 163#undef GIL_THRESHOLD 164