xref: /third_party/python/Objects/stringlib/join.h (revision 7db96d56)
1/* stringlib: bytes joining implementation */
2
3#if STRINGLIB_IS_UNICODE
4#error join.h only compatible with byte-wise strings
5#endif
6
7Py_LOCAL_INLINE(PyObject *)
8STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
9{
10    const char *sepstr = STRINGLIB_STR(sep);
11    Py_ssize_t seplen = STRINGLIB_LEN(sep);
12    PyObject *res = NULL;
13    char *p;
14    Py_ssize_t seqlen = 0;
15    Py_ssize_t sz = 0;
16    Py_ssize_t i, nbufs;
17    PyObject *seq, *item;
18    Py_buffer *buffers = NULL;
19#define NB_STATIC_BUFFERS 10
20    Py_buffer static_buffers[NB_STATIC_BUFFERS];
21#define GIL_THRESHOLD 1048576
22    int drop_gil = 1;
23    PyThreadState *save = NULL;
24
25    seq = PySequence_Fast(iterable, "can only join an iterable");
26    if (seq == NULL) {
27        return NULL;
28    }
29
30    seqlen = PySequence_Fast_GET_SIZE(seq);
31    if (seqlen == 0) {
32        Py_DECREF(seq);
33        return STRINGLIB_NEW(NULL, 0);
34    }
35#if !STRINGLIB_MUTABLE
36    if (seqlen == 1) {
37        item = PySequence_Fast_GET_ITEM(seq, 0);
38        if (STRINGLIB_CHECK_EXACT(item)) {
39            Py_INCREF(item);
40            Py_DECREF(seq);
41            return item;
42        }
43    }
44#endif
45    if (seqlen > NB_STATIC_BUFFERS) {
46        buffers = PyMem_NEW(Py_buffer, seqlen);
47        if (buffers == NULL) {
48            Py_DECREF(seq);
49            PyErr_NoMemory();
50            return NULL;
51        }
52    }
53    else {
54        buffers = static_buffers;
55    }
56
57    /* Here is the general case.  Do a pre-pass to figure out the total
58     * amount of space we'll need (sz), and see whether all arguments are
59     * bytes-like.
60     */
61    for (i = 0, nbufs = 0; i < seqlen; i++) {
62        Py_ssize_t itemlen;
63        item = PySequence_Fast_GET_ITEM(seq, i);
64        if (PyBytes_CheckExact(item)) {
65            /* Fast path. */
66            Py_INCREF(item);
67            buffers[i].obj = item;
68            buffers[i].buf = PyBytes_AS_STRING(item);
69            buffers[i].len = PyBytes_GET_SIZE(item);
70        }
71        else {
72            if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
73                PyErr_Format(PyExc_TypeError,
74                             "sequence item %zd: expected a bytes-like object, "
75                             "%.80s found",
76                             i, Py_TYPE(item)->tp_name);
77                goto error;
78            }
79            /* If the backing objects are mutable, then dropping the GIL
80             * opens up race conditions where another thread tries to modify
81             * the object which we hold a buffer on it. Such code has data
82             * races anyway, but this is a conservative approach that avoids
83             * changing the behaviour of that data race.
84             */
85            drop_gil = 0;
86        }
87        nbufs = i + 1;  /* for error cleanup */
88        itemlen = buffers[i].len;
89        if (itemlen > PY_SSIZE_T_MAX - sz) {
90            PyErr_SetString(PyExc_OverflowError,
91                            "join() result is too long");
92            goto error;
93        }
94        sz += itemlen;
95        if (i != 0) {
96            if (seplen > PY_SSIZE_T_MAX - sz) {
97                PyErr_SetString(PyExc_OverflowError,
98                                "join() result is too long");
99                goto error;
100            }
101            sz += seplen;
102        }
103        if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
104            PyErr_SetString(PyExc_RuntimeError,
105                            "sequence changed size during iteration");
106            goto error;
107        }
108    }
109
110    /* Allocate result space. */
111    res = STRINGLIB_NEW(NULL, sz);
112    if (res == NULL)
113        goto error;
114
115    /* Catenate everything. */
116    p = STRINGLIB_STR(res);
117    if (sz < GIL_THRESHOLD) {
118        drop_gil = 0;   /* Benefits are likely outweighed by the overheads */
119    }
120    if (drop_gil) {
121        save = PyEval_SaveThread();
122    }
123    if (!seplen) {
124        /* fast path */
125        for (i = 0; i < nbufs; i++) {
126            Py_ssize_t n = buffers[i].len;
127            char *q = buffers[i].buf;
128            memcpy(p, q, n);
129            p += n;
130        }
131    }
132    else {
133        for (i = 0; i < nbufs; i++) {
134            Py_ssize_t n;
135            char *q;
136            if (i) {
137                memcpy(p, sepstr, seplen);
138                p += seplen;
139            }
140            n = buffers[i].len;
141            q = buffers[i].buf;
142            memcpy(p, q, n);
143            p += n;
144        }
145    }
146    if (drop_gil) {
147        PyEval_RestoreThread(save);
148    }
149    goto done;
150
151error:
152    res = NULL;
153done:
154    Py_DECREF(seq);
155    for (i = 0; i < nbufs; i++)
156        PyBuffer_Release(&buffers[i]);
157    if (buffers != static_buffers)
158        PyMem_Free(buffers);
159    return res;
160}
161
162#undef NB_STATIC_BUFFERS
163#undef GIL_THRESHOLD
164