1#if STRINGLIB_IS_UNICODE
2# error "transmogrify.h only compatible with byte-wise strings"
3#endif
4
5/* the more complicated methods.  parts of these should be pulled out into the
6   shared code in bytes_methods.c to cut down on duplicate code bloat.  */
7
8/*[clinic input]
9class B "PyObject *" "&PyType_Type"
10[clinic start generated code]*/
11/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2935558188d97c76]*/
12
13#include "clinic/transmogrify.h.h"
14
15static inline PyObject *
16return_self(PyObject *self)
17{
18#if !STRINGLIB_MUTABLE
19    if (STRINGLIB_CHECK_EXACT(self)) {
20        Py_INCREF(self);
21        return self;
22    }
23#endif
24    return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
25}
26
27/*[clinic input]
28B.expandtabs as stringlib_expandtabs
29
30    tabsize: int = 8
31
32Return a copy where all tab characters are expanded using spaces.
33
34If tabsize is not given, a tab size of 8 characters is assumed.
35[clinic start generated code]*/
36
37static PyObject *
38stringlib_expandtabs_impl(PyObject *self, int tabsize)
39/*[clinic end generated code: output=069cb7fae72e4c2b input=3c6d3b12aa3ccbea]*/
40{
41    const char *e, *p;
42    char *q;
43    Py_ssize_t i, j;
44    PyObject *u;
45
46    /* First pass: determine size of output string */
47    i = j = 0;
48    e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
49    for (p = STRINGLIB_STR(self); p < e; p++) {
50        if (*p == '\t') {
51            if (tabsize > 0) {
52                Py_ssize_t incr = tabsize - (j % tabsize);
53                if (j > PY_SSIZE_T_MAX - incr)
54                    goto overflow;
55                j += incr;
56            }
57        }
58        else {
59            if (j > PY_SSIZE_T_MAX - 1)
60                goto overflow;
61            j++;
62            if (*p == '\n' || *p == '\r') {
63                if (i > PY_SSIZE_T_MAX - j)
64                    goto overflow;
65                i += j;
66                j = 0;
67            }
68        }
69    }
70
71    if (i > PY_SSIZE_T_MAX - j)
72        goto overflow;
73
74    /* Second pass: create output string and fill it */
75    u = STRINGLIB_NEW(NULL, i + j);
76    if (!u)
77        return NULL;
78
79    j = 0;
80    q = STRINGLIB_STR(u);
81
82    for (p = STRINGLIB_STR(self); p < e; p++) {
83        if (*p == '\t') {
84            if (tabsize > 0) {
85                i = tabsize - (j % tabsize);
86                j += i;
87                while (i--)
88                    *q++ = ' ';
89            }
90        }
91        else {
92            j++;
93            *q++ = *p;
94            if (*p == '\n' || *p == '\r')
95                j = 0;
96        }
97    }
98
99    return u;
100  overflow:
101    PyErr_SetString(PyExc_OverflowError, "result too long");
102    return NULL;
103}
104
105static inline PyObject *
106pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
107{
108    PyObject *u;
109
110    if (left < 0)
111        left = 0;
112    if (right < 0)
113        right = 0;
114
115    if (left == 0 && right == 0) {
116        return return_self(self);
117    }
118
119    u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right);
120    if (u) {
121        if (left)
122            memset(STRINGLIB_STR(u), fill, left);
123        memcpy(STRINGLIB_STR(u) + left,
124               STRINGLIB_STR(self),
125               STRINGLIB_LEN(self));
126        if (right)
127            memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
128                   fill, right);
129    }
130
131    return u;
132}
133
134/*[clinic input]
135B.ljust as stringlib_ljust
136
137    width: Py_ssize_t
138    fillchar: char = b' '
139    /
140
141Return a left-justified string of length width.
142
143Padding is done using the specified fill character.
144[clinic start generated code]*/
145
146static PyObject *
147stringlib_ljust_impl(PyObject *self, Py_ssize_t width, char fillchar)
148/*[clinic end generated code: output=c79ca173c5ff8337 input=eff2d014bc7d80df]*/
149{
150    if (STRINGLIB_LEN(self) >= width) {
151        return return_self(self);
152    }
153
154    return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
155}
156
157
158/*[clinic input]
159B.rjust as stringlib_rjust
160
161    width: Py_ssize_t
162    fillchar: char = b' '
163    /
164
165Return a right-justified string of length width.
166
167Padding is done using the specified fill character.
168[clinic start generated code]*/
169
170static PyObject *
171stringlib_rjust_impl(PyObject *self, Py_ssize_t width, char fillchar)
172/*[clinic end generated code: output=7df5d728a5439570 input=218b0bd31308955d]*/
173{
174    if (STRINGLIB_LEN(self) >= width) {
175        return return_self(self);
176    }
177
178    return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
179}
180
181
182/*[clinic input]
183B.center as stringlib_center
184
185    width: Py_ssize_t
186    fillchar: char = b' '
187    /
188
189Return a centered string of length width.
190
191Padding is done using the specified fill character.
192[clinic start generated code]*/
193
194static PyObject *
195stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar)
196/*[clinic end generated code: output=d8da2e055288b4c2 input=3776fd278765d89b]*/
197{
198    Py_ssize_t marg, left;
199
200    if (STRINGLIB_LEN(self) >= width) {
201        return return_self(self);
202    }
203
204    marg = width - STRINGLIB_LEN(self);
205    left = marg / 2 + (marg & width & 1);
206
207    return pad(self, left, marg - left, fillchar);
208}
209
210/*[clinic input]
211B.zfill as stringlib_zfill
212
213    width: Py_ssize_t
214    /
215
216Pad a numeric string with zeros on the left, to fill a field of the given width.
217
218The original string is never truncated.
219[clinic start generated code]*/
220
221static PyObject *
222stringlib_zfill_impl(PyObject *self, Py_ssize_t width)
223/*[clinic end generated code: output=0b3c684a7f1b2319 input=2da6d7b8e9bcb19a]*/
224{
225    Py_ssize_t fill;
226    PyObject *s;
227    char *p;
228
229    if (STRINGLIB_LEN(self) >= width) {
230        return return_self(self);
231    }
232
233    fill = width - STRINGLIB_LEN(self);
234
235    s = pad(self, fill, 0, '0');
236
237    if (s == NULL)
238        return NULL;
239
240    p = STRINGLIB_STR(s);
241    if (p[fill] == '+' || p[fill] == '-') {
242        /* move sign to beginning of string */
243        p[0] = p[fill];
244        p[fill] = '0';
245    }
246
247    return s;
248}
249
250
251/* find and count characters and substrings */
252
253#define findchar(target, target_len, c)                         \
254  ((char *)memchr((const void *)(target), c, target_len))
255
256
257static Py_ssize_t
258countchar(const char *target, Py_ssize_t target_len, char c,
259          Py_ssize_t maxcount)
260{
261    Py_ssize_t count = 0;
262    const char *start = target;
263    const char *end = target + target_len;
264
265    while ((start = findchar(start, end - start, c)) != NULL) {
266        count++;
267        if (count >= maxcount)
268            break;
269        start += 1;
270    }
271    return count;
272}
273
274
275/* Algorithms for different cases of string replacement */
276
277/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
278static PyObject *
279stringlib_replace_interleave(PyObject *self,
280                             const char *to_s, Py_ssize_t to_len,
281                             Py_ssize_t maxcount)
282{
283    const char *self_s;
284    char *result_s;
285    Py_ssize_t self_len, result_len;
286    Py_ssize_t count, i;
287    PyObject *result;
288
289    self_len = STRINGLIB_LEN(self);
290
291    /* 1 at the end plus 1 after every character;
292       count = min(maxcount, self_len + 1) */
293    if (maxcount <= self_len) {
294        count = maxcount;
295    }
296    else {
297        /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
298        count = self_len + 1;
299    }
300
301    /* Check for overflow */
302    /*   result_len = count * to_len + self_len; */
303    assert(count > 0);
304    if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
305        PyErr_SetString(PyExc_OverflowError,
306                        "replace bytes is too long");
307        return NULL;
308    }
309    result_len = count * to_len + self_len;
310    result = STRINGLIB_NEW(NULL, result_len);
311    if (result == NULL) {
312        return NULL;
313    }
314
315    self_s = STRINGLIB_STR(self);
316    result_s = STRINGLIB_STR(result);
317
318    if (to_len > 1) {
319        /* Lay the first one down (guaranteed this will occur) */
320        memcpy(result_s, to_s, to_len);
321        result_s += to_len;
322        count -= 1;
323
324        for (i = 0; i < count; i++) {
325            *result_s++ = *self_s++;
326            memcpy(result_s, to_s, to_len);
327            result_s += to_len;
328        }
329    }
330    else {
331        result_s[0] = to_s[0];
332        result_s += to_len;
333        count -= 1;
334        for (i = 0; i < count; i++) {
335            *result_s++ = *self_s++;
336            result_s[0] = to_s[0];
337            result_s += to_len;
338        }
339    }
340
341    /* Copy the rest of the original string */
342    memcpy(result_s, self_s, self_len - i);
343
344    return result;
345}
346
347/* Special case for deleting a single character */
348/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
349static PyObject *
350stringlib_replace_delete_single_character(PyObject *self,
351                                          char from_c, Py_ssize_t maxcount)
352{
353    const char *self_s, *start, *next, *end;
354    char *result_s;
355    Py_ssize_t self_len, result_len;
356    Py_ssize_t count;
357    PyObject *result;
358
359    self_len = STRINGLIB_LEN(self);
360    self_s = STRINGLIB_STR(self);
361
362    count = countchar(self_s, self_len, from_c, maxcount);
363    if (count == 0) {
364        return return_self(self);
365    }
366
367    result_len = self_len - count;  /* from_len == 1 */
368    assert(result_len>=0);
369
370    result = STRINGLIB_NEW(NULL, result_len);
371    if (result == NULL) {
372        return NULL;
373    }
374    result_s = STRINGLIB_STR(result);
375
376    start = self_s;
377    end = self_s + self_len;
378    while (count-- > 0) {
379        next = findchar(start, end - start, from_c);
380        if (next == NULL)
381            break;
382        memcpy(result_s, start, next - start);
383        result_s += (next - start);
384        start = next + 1;
385    }
386    memcpy(result_s, start, end - start);
387
388    return result;
389}
390
391/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
392
393static PyObject *
394stringlib_replace_delete_substring(PyObject *self,
395                                   const char *from_s, Py_ssize_t from_len,
396                                   Py_ssize_t maxcount)
397{
398    const char *self_s, *start, *next, *end;
399    char *result_s;
400    Py_ssize_t self_len, result_len;
401    Py_ssize_t count, offset;
402    PyObject *result;
403
404    self_len = STRINGLIB_LEN(self);
405    self_s = STRINGLIB_STR(self);
406
407    count = stringlib_count(self_s, self_len,
408                            from_s, from_len,
409                            maxcount);
410
411    if (count == 0) {
412        /* no matches */
413        return return_self(self);
414    }
415
416    result_len = self_len - (count * from_len);
417    assert (result_len>=0);
418
419    result = STRINGLIB_NEW(NULL, result_len);
420    if (result == NULL) {
421        return NULL;
422    }
423    result_s = STRINGLIB_STR(result);
424
425    start = self_s;
426    end = self_s + self_len;
427    while (count-- > 0) {
428        offset = stringlib_find(start, end - start,
429                                from_s, from_len,
430                                0);
431        if (offset == -1)
432            break;
433        next = start + offset;
434
435        memcpy(result_s, start, next - start);
436
437        result_s += (next - start);
438        start = next + from_len;
439    }
440    memcpy(result_s, start, end - start);
441    return result;
442}
443
444/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
445static PyObject *
446stringlib_replace_single_character_in_place(PyObject *self,
447                                            char from_c, char to_c,
448                                            Py_ssize_t maxcount)
449{
450    const char *self_s, *end;
451    char *result_s, *start, *next;
452    Py_ssize_t self_len;
453    PyObject *result;
454
455    /* The result string will be the same size */
456    self_s = STRINGLIB_STR(self);
457    self_len = STRINGLIB_LEN(self);
458
459    next = findchar(self_s, self_len, from_c);
460
461    if (next == NULL) {
462        /* No matches; return the original bytes */
463        return return_self(self);
464    }
465
466    /* Need to make a new bytes */
467    result = STRINGLIB_NEW(NULL, self_len);
468    if (result == NULL) {
469        return NULL;
470    }
471    result_s = STRINGLIB_STR(result);
472    memcpy(result_s, self_s, self_len);
473
474    /* change everything in-place, starting with this one */
475    start =  result_s + (next - self_s);
476    *start = to_c;
477    start++;
478    end = result_s + self_len;
479
480    while (--maxcount > 0) {
481        next = findchar(start, end - start, from_c);
482        if (next == NULL)
483            break;
484        *next = to_c;
485        start = next + 1;
486    }
487
488    return result;
489}
490
491/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
492static PyObject *
493stringlib_replace_substring_in_place(PyObject *self,
494                                     const char *from_s, Py_ssize_t from_len,
495                                     const char *to_s, Py_ssize_t to_len,
496                                     Py_ssize_t maxcount)
497{
498    const char *self_s, *end;
499    char *result_s, *start;
500    Py_ssize_t self_len, offset;
501    PyObject *result;
502
503    /* The result bytes will be the same size */
504
505    self_s = STRINGLIB_STR(self);
506    self_len = STRINGLIB_LEN(self);
507
508    offset = stringlib_find(self_s, self_len,
509                            from_s, from_len,
510                            0);
511    if (offset == -1) {
512        /* No matches; return the original bytes */
513        return return_self(self);
514    }
515
516    /* Need to make a new bytes */
517    result = STRINGLIB_NEW(NULL, self_len);
518    if (result == NULL) {
519        return NULL;
520    }
521    result_s = STRINGLIB_STR(result);
522    memcpy(result_s, self_s, self_len);
523
524    /* change everything in-place, starting with this one */
525    start =  result_s + offset;
526    memcpy(start, to_s, from_len);
527    start += from_len;
528    end = result_s + self_len;
529
530    while ( --maxcount > 0) {
531        offset = stringlib_find(start, end - start,
532                                from_s, from_len,
533                                0);
534        if (offset == -1)
535            break;
536        memcpy(start + offset, to_s, from_len);
537        start += offset + from_len;
538    }
539
540    return result;
541}
542
543/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
544static PyObject *
545stringlib_replace_single_character(PyObject *self,
546                                   char from_c,
547                                   const char *to_s, Py_ssize_t to_len,
548                                   Py_ssize_t maxcount)
549{
550    const char *self_s, *start, *next, *end;
551    char *result_s;
552    Py_ssize_t self_len, result_len;
553    Py_ssize_t count;
554    PyObject *result;
555
556    self_s = STRINGLIB_STR(self);
557    self_len = STRINGLIB_LEN(self);
558
559    count = countchar(self_s, self_len, from_c, maxcount);
560    if (count == 0) {
561        /* no matches, return unchanged */
562        return return_self(self);
563    }
564
565    /* use the difference between current and new, hence the "-1" */
566    /*   result_len = self_len + count * (to_len-1)  */
567    assert(count > 0);
568    if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
569        PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
570        return NULL;
571    }
572    result_len = self_len + count * (to_len - 1);
573
574    result = STRINGLIB_NEW(NULL, result_len);
575    if (result == NULL) {
576        return NULL;
577    }
578    result_s = STRINGLIB_STR(result);
579
580    start = self_s;
581    end = self_s + self_len;
582    while (count-- > 0) {
583        next = findchar(start, end - start, from_c);
584        if (next == NULL)
585            break;
586
587        if (next == start) {
588            /* replace with the 'to' */
589            memcpy(result_s, to_s, to_len);
590            result_s += to_len;
591            start += 1;
592        } else {
593            /* copy the unchanged old then the 'to' */
594            memcpy(result_s, start, next - start);
595            result_s += (next - start);
596            memcpy(result_s, to_s, to_len);
597            result_s += to_len;
598            start = next + 1;
599        }
600    }
601    /* Copy the remainder of the remaining bytes */
602    memcpy(result_s, start, end - start);
603
604    return result;
605}
606
607/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
608static PyObject *
609stringlib_replace_substring(PyObject *self,
610                            const char *from_s, Py_ssize_t from_len,
611                            const char *to_s, Py_ssize_t to_len,
612                            Py_ssize_t maxcount)
613{
614    const char *self_s, *start, *next, *end;
615    char *result_s;
616    Py_ssize_t self_len, result_len;
617    Py_ssize_t count, offset;
618    PyObject *result;
619
620    self_s = STRINGLIB_STR(self);
621    self_len = STRINGLIB_LEN(self);
622
623    count = stringlib_count(self_s, self_len,
624                            from_s, from_len,
625                            maxcount);
626
627    if (count == 0) {
628        /* no matches, return unchanged */
629        return return_self(self);
630    }
631
632    /* Check for overflow */
633    /*    result_len = self_len + count * (to_len-from_len) */
634    assert(count > 0);
635    if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
636        PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
637        return NULL;
638    }
639    result_len = self_len + count * (to_len - from_len);
640
641    result = STRINGLIB_NEW(NULL, result_len);
642    if (result == NULL) {
643        return NULL;
644    }
645    result_s = STRINGLIB_STR(result);
646
647    start = self_s;
648    end = self_s + self_len;
649    while (count-- > 0) {
650        offset = stringlib_find(start, end - start,
651                                from_s, from_len,
652                                0);
653        if (offset == -1)
654            break;
655        next = start + offset;
656        if (next == start) {
657            /* replace with the 'to' */
658            memcpy(result_s, to_s, to_len);
659            result_s += to_len;
660            start += from_len;
661        } else {
662            /* copy the unchanged old then the 'to' */
663            memcpy(result_s, start, next - start);
664            result_s += (next - start);
665            memcpy(result_s, to_s, to_len);
666            result_s += to_len;
667            start = next + from_len;
668        }
669    }
670    /* Copy the remainder of the remaining bytes */
671    memcpy(result_s, start, end - start);
672
673    return result;
674}
675
676
677static PyObject *
678stringlib_replace(PyObject *self,
679                  const char *from_s, Py_ssize_t from_len,
680                  const char *to_s, Py_ssize_t to_len,
681                  Py_ssize_t maxcount)
682{
683    if (STRINGLIB_LEN(self) < from_len) {
684        /* nothing to do; return the original bytes */
685        return return_self(self);
686    }
687    if (maxcount < 0) {
688        maxcount = PY_SSIZE_T_MAX;
689    } else if (maxcount == 0) {
690        /* nothing to do; return the original bytes */
691        return return_self(self);
692    }
693
694    /* Handle zero-length special cases */
695    if (from_len == 0) {
696        if (to_len == 0) {
697            /* nothing to do; return the original bytes */
698            return return_self(self);
699        }
700        /* insert the 'to' bytes everywhere.    */
701        /*    >>> b"Python".replace(b"", b".")  */
702        /*    b'.P.y.t.h.o.n.'                  */
703        return stringlib_replace_interleave(self, to_s, to_len, maxcount);
704    }
705
706    if (to_len == 0) {
707        /* delete all occurrences of 'from' bytes */
708        if (from_len == 1) {
709            return stringlib_replace_delete_single_character(
710                self, from_s[0], maxcount);
711        } else {
712            return stringlib_replace_delete_substring(
713                self, from_s, from_len, maxcount);
714        }
715    }
716
717    /* Handle special case where both bytes have the same length */
718
719    if (from_len == to_len) {
720        if (from_len == 1) {
721            return stringlib_replace_single_character_in_place(
722                self, from_s[0], to_s[0], maxcount);
723        } else {
724            return stringlib_replace_substring_in_place(
725                self, from_s, from_len, to_s, to_len, maxcount);
726        }
727    }
728
729    /* Otherwise use the more generic algorithms */
730    if (from_len == 1) {
731        return stringlib_replace_single_character(
732            self, from_s[0], to_s, to_len, maxcount);
733    } else {
734        /* len('from')>=2, len('to')>=1 */
735        return stringlib_replace_substring(
736            self, from_s, from_len, to_s, to_len, maxcount);
737    }
738}
739
740#undef findchar
741