1#if STRINGLIB_IS_UNICODE 2# error "transmogrify.h only compatible with byte-wise strings" 3#endif 4 5/* the more complicated methods. parts of these should be pulled out into the 6 shared code in bytes_methods.c to cut down on duplicate code bloat. */ 7 8/*[clinic input] 9class B "PyObject *" "&PyType_Type" 10[clinic start generated code]*/ 11/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2935558188d97c76]*/ 12 13#include "clinic/transmogrify.h.h" 14 15static inline PyObject * 16return_self(PyObject *self) 17{ 18#if !STRINGLIB_MUTABLE 19 if (STRINGLIB_CHECK_EXACT(self)) { 20 Py_INCREF(self); 21 return self; 22 } 23#endif 24 return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); 25} 26 27/*[clinic input] 28B.expandtabs as stringlib_expandtabs 29 30 tabsize: int = 8 31 32Return a copy where all tab characters are expanded using spaces. 33 34If tabsize is not given, a tab size of 8 characters is assumed. 35[clinic start generated code]*/ 36 37static PyObject * 38stringlib_expandtabs_impl(PyObject *self, int tabsize) 39/*[clinic end generated code: output=069cb7fae72e4c2b input=3c6d3b12aa3ccbea]*/ 40{ 41 const char *e, *p; 42 char *q; 43 Py_ssize_t i, j; 44 PyObject *u; 45 46 /* First pass: determine size of output string */ 47 i = j = 0; 48 e = STRINGLIB_STR(self) + STRINGLIB_LEN(self); 49 for (p = STRINGLIB_STR(self); p < e; p++) { 50 if (*p == '\t') { 51 if (tabsize > 0) { 52 Py_ssize_t incr = tabsize - (j % tabsize); 53 if (j > PY_SSIZE_T_MAX - incr) 54 goto overflow; 55 j += incr; 56 } 57 } 58 else { 59 if (j > PY_SSIZE_T_MAX - 1) 60 goto overflow; 61 j++; 62 if (*p == '\n' || *p == '\r') { 63 if (i > PY_SSIZE_T_MAX - j) 64 goto overflow; 65 i += j; 66 j = 0; 67 } 68 } 69 } 70 71 if (i > PY_SSIZE_T_MAX - j) 72 goto overflow; 73 74 /* Second pass: create output string and fill it */ 75 u = STRINGLIB_NEW(NULL, i + j); 76 if (!u) 77 return NULL; 78 79 j = 0; 80 q = STRINGLIB_STR(u); 81 82 for (p = STRINGLIB_STR(self); p < e; p++) { 83 if (*p == '\t') { 84 if (tabsize > 0) { 85 i = tabsize - (j % tabsize); 86 j += i; 87 while (i--) 88 *q++ = ' '; 89 } 90 } 91 else { 92 j++; 93 *q++ = *p; 94 if (*p == '\n' || *p == '\r') 95 j = 0; 96 } 97 } 98 99 return u; 100 overflow: 101 PyErr_SetString(PyExc_OverflowError, "result too long"); 102 return NULL; 103} 104 105static inline PyObject * 106pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill) 107{ 108 PyObject *u; 109 110 if (left < 0) 111 left = 0; 112 if (right < 0) 113 right = 0; 114 115 if (left == 0 && right == 0) { 116 return return_self(self); 117 } 118 119 u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right); 120 if (u) { 121 if (left) 122 memset(STRINGLIB_STR(u), fill, left); 123 memcpy(STRINGLIB_STR(u) + left, 124 STRINGLIB_STR(self), 125 STRINGLIB_LEN(self)); 126 if (right) 127 memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self), 128 fill, right); 129 } 130 131 return u; 132} 133 134/*[clinic input] 135B.ljust as stringlib_ljust 136 137 width: Py_ssize_t 138 fillchar: char = b' ' 139 / 140 141Return a left-justified string of length width. 142 143Padding is done using the specified fill character. 144[clinic start generated code]*/ 145 146static PyObject * 147stringlib_ljust_impl(PyObject *self, Py_ssize_t width, char fillchar) 148/*[clinic end generated code: output=c79ca173c5ff8337 input=eff2d014bc7d80df]*/ 149{ 150 if (STRINGLIB_LEN(self) >= width) { 151 return return_self(self); 152 } 153 154 return pad(self, 0, width - STRINGLIB_LEN(self), fillchar); 155} 156 157 158/*[clinic input] 159B.rjust as stringlib_rjust 160 161 width: Py_ssize_t 162 fillchar: char = b' ' 163 / 164 165Return a right-justified string of length width. 166 167Padding is done using the specified fill character. 168[clinic start generated code]*/ 169 170static PyObject * 171stringlib_rjust_impl(PyObject *self, Py_ssize_t width, char fillchar) 172/*[clinic end generated code: output=7df5d728a5439570 input=218b0bd31308955d]*/ 173{ 174 if (STRINGLIB_LEN(self) >= width) { 175 return return_self(self); 176 } 177 178 return pad(self, width - STRINGLIB_LEN(self), 0, fillchar); 179} 180 181 182/*[clinic input] 183B.center as stringlib_center 184 185 width: Py_ssize_t 186 fillchar: char = b' ' 187 / 188 189Return a centered string of length width. 190 191Padding is done using the specified fill character. 192[clinic start generated code]*/ 193 194static PyObject * 195stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar) 196/*[clinic end generated code: output=d8da2e055288b4c2 input=3776fd278765d89b]*/ 197{ 198 Py_ssize_t marg, left; 199 200 if (STRINGLIB_LEN(self) >= width) { 201 return return_self(self); 202 } 203 204 marg = width - STRINGLIB_LEN(self); 205 left = marg / 2 + (marg & width & 1); 206 207 return pad(self, left, marg - left, fillchar); 208} 209 210/*[clinic input] 211B.zfill as stringlib_zfill 212 213 width: Py_ssize_t 214 / 215 216Pad a numeric string with zeros on the left, to fill a field of the given width. 217 218The original string is never truncated. 219[clinic start generated code]*/ 220 221static PyObject * 222stringlib_zfill_impl(PyObject *self, Py_ssize_t width) 223/*[clinic end generated code: output=0b3c684a7f1b2319 input=2da6d7b8e9bcb19a]*/ 224{ 225 Py_ssize_t fill; 226 PyObject *s; 227 char *p; 228 229 if (STRINGLIB_LEN(self) >= width) { 230 return return_self(self); 231 } 232 233 fill = width - STRINGLIB_LEN(self); 234 235 s = pad(self, fill, 0, '0'); 236 237 if (s == NULL) 238 return NULL; 239 240 p = STRINGLIB_STR(s); 241 if (p[fill] == '+' || p[fill] == '-') { 242 /* move sign to beginning of string */ 243 p[0] = p[fill]; 244 p[fill] = '0'; 245 } 246 247 return s; 248} 249 250 251/* find and count characters and substrings */ 252 253#define findchar(target, target_len, c) \ 254 ((char *)memchr((const void *)(target), c, target_len)) 255 256 257static Py_ssize_t 258countchar(const char *target, Py_ssize_t target_len, char c, 259 Py_ssize_t maxcount) 260{ 261 Py_ssize_t count = 0; 262 const char *start = target; 263 const char *end = target + target_len; 264 265 while ((start = findchar(start, end - start, c)) != NULL) { 266 count++; 267 if (count >= maxcount) 268 break; 269 start += 1; 270 } 271 return count; 272} 273 274 275/* Algorithms for different cases of string replacement */ 276 277/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ 278static PyObject * 279stringlib_replace_interleave(PyObject *self, 280 const char *to_s, Py_ssize_t to_len, 281 Py_ssize_t maxcount) 282{ 283 const char *self_s; 284 char *result_s; 285 Py_ssize_t self_len, result_len; 286 Py_ssize_t count, i; 287 PyObject *result; 288 289 self_len = STRINGLIB_LEN(self); 290 291 /* 1 at the end plus 1 after every character; 292 count = min(maxcount, self_len + 1) */ 293 if (maxcount <= self_len) { 294 count = maxcount; 295 } 296 else { 297 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */ 298 count = self_len + 1; 299 } 300 301 /* Check for overflow */ 302 /* result_len = count * to_len + self_len; */ 303 assert(count > 0); 304 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) { 305 PyErr_SetString(PyExc_OverflowError, 306 "replace bytes is too long"); 307 return NULL; 308 } 309 result_len = count * to_len + self_len; 310 result = STRINGLIB_NEW(NULL, result_len); 311 if (result == NULL) { 312 return NULL; 313 } 314 315 self_s = STRINGLIB_STR(self); 316 result_s = STRINGLIB_STR(result); 317 318 if (to_len > 1) { 319 /* Lay the first one down (guaranteed this will occur) */ 320 memcpy(result_s, to_s, to_len); 321 result_s += to_len; 322 count -= 1; 323 324 for (i = 0; i < count; i++) { 325 *result_s++ = *self_s++; 326 memcpy(result_s, to_s, to_len); 327 result_s += to_len; 328 } 329 } 330 else { 331 result_s[0] = to_s[0]; 332 result_s += to_len; 333 count -= 1; 334 for (i = 0; i < count; i++) { 335 *result_s++ = *self_s++; 336 result_s[0] = to_s[0]; 337 result_s += to_len; 338 } 339 } 340 341 /* Copy the rest of the original string */ 342 memcpy(result_s, self_s, self_len - i); 343 344 return result; 345} 346 347/* Special case for deleting a single character */ 348/* len(self)>=1, len(from)==1, to="", maxcount>=1 */ 349static PyObject * 350stringlib_replace_delete_single_character(PyObject *self, 351 char from_c, Py_ssize_t maxcount) 352{ 353 const char *self_s, *start, *next, *end; 354 char *result_s; 355 Py_ssize_t self_len, result_len; 356 Py_ssize_t count; 357 PyObject *result; 358 359 self_len = STRINGLIB_LEN(self); 360 self_s = STRINGLIB_STR(self); 361 362 count = countchar(self_s, self_len, from_c, maxcount); 363 if (count == 0) { 364 return return_self(self); 365 } 366 367 result_len = self_len - count; /* from_len == 1 */ 368 assert(result_len>=0); 369 370 result = STRINGLIB_NEW(NULL, result_len); 371 if (result == NULL) { 372 return NULL; 373 } 374 result_s = STRINGLIB_STR(result); 375 376 start = self_s; 377 end = self_s + self_len; 378 while (count-- > 0) { 379 next = findchar(start, end - start, from_c); 380 if (next == NULL) 381 break; 382 memcpy(result_s, start, next - start); 383 result_s += (next - start); 384 start = next + 1; 385 } 386 memcpy(result_s, start, end - start); 387 388 return result; 389} 390 391/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ 392 393static PyObject * 394stringlib_replace_delete_substring(PyObject *self, 395 const char *from_s, Py_ssize_t from_len, 396 Py_ssize_t maxcount) 397{ 398 const char *self_s, *start, *next, *end; 399 char *result_s; 400 Py_ssize_t self_len, result_len; 401 Py_ssize_t count, offset; 402 PyObject *result; 403 404 self_len = STRINGLIB_LEN(self); 405 self_s = STRINGLIB_STR(self); 406 407 count = stringlib_count(self_s, self_len, 408 from_s, from_len, 409 maxcount); 410 411 if (count == 0) { 412 /* no matches */ 413 return return_self(self); 414 } 415 416 result_len = self_len - (count * from_len); 417 assert (result_len>=0); 418 419 result = STRINGLIB_NEW(NULL, result_len); 420 if (result == NULL) { 421 return NULL; 422 } 423 result_s = STRINGLIB_STR(result); 424 425 start = self_s; 426 end = self_s + self_len; 427 while (count-- > 0) { 428 offset = stringlib_find(start, end - start, 429 from_s, from_len, 430 0); 431 if (offset == -1) 432 break; 433 next = start + offset; 434 435 memcpy(result_s, start, next - start); 436 437 result_s += (next - start); 438 start = next + from_len; 439 } 440 memcpy(result_s, start, end - start); 441 return result; 442} 443 444/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */ 445static PyObject * 446stringlib_replace_single_character_in_place(PyObject *self, 447 char from_c, char to_c, 448 Py_ssize_t maxcount) 449{ 450 const char *self_s, *end; 451 char *result_s, *start, *next; 452 Py_ssize_t self_len; 453 PyObject *result; 454 455 /* The result string will be the same size */ 456 self_s = STRINGLIB_STR(self); 457 self_len = STRINGLIB_LEN(self); 458 459 next = findchar(self_s, self_len, from_c); 460 461 if (next == NULL) { 462 /* No matches; return the original bytes */ 463 return return_self(self); 464 } 465 466 /* Need to make a new bytes */ 467 result = STRINGLIB_NEW(NULL, self_len); 468 if (result == NULL) { 469 return NULL; 470 } 471 result_s = STRINGLIB_STR(result); 472 memcpy(result_s, self_s, self_len); 473 474 /* change everything in-place, starting with this one */ 475 start = result_s + (next - self_s); 476 *start = to_c; 477 start++; 478 end = result_s + self_len; 479 480 while (--maxcount > 0) { 481 next = findchar(start, end - start, from_c); 482 if (next == NULL) 483 break; 484 *next = to_c; 485 start = next + 1; 486 } 487 488 return result; 489} 490 491/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ 492static PyObject * 493stringlib_replace_substring_in_place(PyObject *self, 494 const char *from_s, Py_ssize_t from_len, 495 const char *to_s, Py_ssize_t to_len, 496 Py_ssize_t maxcount) 497{ 498 const char *self_s, *end; 499 char *result_s, *start; 500 Py_ssize_t self_len, offset; 501 PyObject *result; 502 503 /* The result bytes will be the same size */ 504 505 self_s = STRINGLIB_STR(self); 506 self_len = STRINGLIB_LEN(self); 507 508 offset = stringlib_find(self_s, self_len, 509 from_s, from_len, 510 0); 511 if (offset == -1) { 512 /* No matches; return the original bytes */ 513 return return_self(self); 514 } 515 516 /* Need to make a new bytes */ 517 result = STRINGLIB_NEW(NULL, self_len); 518 if (result == NULL) { 519 return NULL; 520 } 521 result_s = STRINGLIB_STR(result); 522 memcpy(result_s, self_s, self_len); 523 524 /* change everything in-place, starting with this one */ 525 start = result_s + offset; 526 memcpy(start, to_s, from_len); 527 start += from_len; 528 end = result_s + self_len; 529 530 while ( --maxcount > 0) { 531 offset = stringlib_find(start, end - start, 532 from_s, from_len, 533 0); 534 if (offset == -1) 535 break; 536 memcpy(start + offset, to_s, from_len); 537 start += offset + from_len; 538 } 539 540 return result; 541} 542 543/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */ 544static PyObject * 545stringlib_replace_single_character(PyObject *self, 546 char from_c, 547 const char *to_s, Py_ssize_t to_len, 548 Py_ssize_t maxcount) 549{ 550 const char *self_s, *start, *next, *end; 551 char *result_s; 552 Py_ssize_t self_len, result_len; 553 Py_ssize_t count; 554 PyObject *result; 555 556 self_s = STRINGLIB_STR(self); 557 self_len = STRINGLIB_LEN(self); 558 559 count = countchar(self_s, self_len, from_c, maxcount); 560 if (count == 0) { 561 /* no matches, return unchanged */ 562 return return_self(self); 563 } 564 565 /* use the difference between current and new, hence the "-1" */ 566 /* result_len = self_len + count * (to_len-1) */ 567 assert(count > 0); 568 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) { 569 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); 570 return NULL; 571 } 572 result_len = self_len + count * (to_len - 1); 573 574 result = STRINGLIB_NEW(NULL, result_len); 575 if (result == NULL) { 576 return NULL; 577 } 578 result_s = STRINGLIB_STR(result); 579 580 start = self_s; 581 end = self_s + self_len; 582 while (count-- > 0) { 583 next = findchar(start, end - start, from_c); 584 if (next == NULL) 585 break; 586 587 if (next == start) { 588 /* replace with the 'to' */ 589 memcpy(result_s, to_s, to_len); 590 result_s += to_len; 591 start += 1; 592 } else { 593 /* copy the unchanged old then the 'to' */ 594 memcpy(result_s, start, next - start); 595 result_s += (next - start); 596 memcpy(result_s, to_s, to_len); 597 result_s += to_len; 598 start = next + 1; 599 } 600 } 601 /* Copy the remainder of the remaining bytes */ 602 memcpy(result_s, start, end - start); 603 604 return result; 605} 606 607/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ 608static PyObject * 609stringlib_replace_substring(PyObject *self, 610 const char *from_s, Py_ssize_t from_len, 611 const char *to_s, Py_ssize_t to_len, 612 Py_ssize_t maxcount) 613{ 614 const char *self_s, *start, *next, *end; 615 char *result_s; 616 Py_ssize_t self_len, result_len; 617 Py_ssize_t count, offset; 618 PyObject *result; 619 620 self_s = STRINGLIB_STR(self); 621 self_len = STRINGLIB_LEN(self); 622 623 count = stringlib_count(self_s, self_len, 624 from_s, from_len, 625 maxcount); 626 627 if (count == 0) { 628 /* no matches, return unchanged */ 629 return return_self(self); 630 } 631 632 /* Check for overflow */ 633 /* result_len = self_len + count * (to_len-from_len) */ 634 assert(count > 0); 635 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) { 636 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); 637 return NULL; 638 } 639 result_len = self_len + count * (to_len - from_len); 640 641 result = STRINGLIB_NEW(NULL, result_len); 642 if (result == NULL) { 643 return NULL; 644 } 645 result_s = STRINGLIB_STR(result); 646 647 start = self_s; 648 end = self_s + self_len; 649 while (count-- > 0) { 650 offset = stringlib_find(start, end - start, 651 from_s, from_len, 652 0); 653 if (offset == -1) 654 break; 655 next = start + offset; 656 if (next == start) { 657 /* replace with the 'to' */ 658 memcpy(result_s, to_s, to_len); 659 result_s += to_len; 660 start += from_len; 661 } else { 662 /* copy the unchanged old then the 'to' */ 663 memcpy(result_s, start, next - start); 664 result_s += (next - start); 665 memcpy(result_s, to_s, to_len); 666 result_s += to_len; 667 start = next + from_len; 668 } 669 } 670 /* Copy the remainder of the remaining bytes */ 671 memcpy(result_s, start, end - start); 672 673 return result; 674} 675 676 677static PyObject * 678stringlib_replace(PyObject *self, 679 const char *from_s, Py_ssize_t from_len, 680 const char *to_s, Py_ssize_t to_len, 681 Py_ssize_t maxcount) 682{ 683 if (STRINGLIB_LEN(self) < from_len) { 684 /* nothing to do; return the original bytes */ 685 return return_self(self); 686 } 687 if (maxcount < 0) { 688 maxcount = PY_SSIZE_T_MAX; 689 } else if (maxcount == 0) { 690 /* nothing to do; return the original bytes */ 691 return return_self(self); 692 } 693 694 /* Handle zero-length special cases */ 695 if (from_len == 0) { 696 if (to_len == 0) { 697 /* nothing to do; return the original bytes */ 698 return return_self(self); 699 } 700 /* insert the 'to' bytes everywhere. */ 701 /* >>> b"Python".replace(b"", b".") */ 702 /* b'.P.y.t.h.o.n.' */ 703 return stringlib_replace_interleave(self, to_s, to_len, maxcount); 704 } 705 706 if (to_len == 0) { 707 /* delete all occurrences of 'from' bytes */ 708 if (from_len == 1) { 709 return stringlib_replace_delete_single_character( 710 self, from_s[0], maxcount); 711 } else { 712 return stringlib_replace_delete_substring( 713 self, from_s, from_len, maxcount); 714 } 715 } 716 717 /* Handle special case where both bytes have the same length */ 718 719 if (from_len == to_len) { 720 if (from_len == 1) { 721 return stringlib_replace_single_character_in_place( 722 self, from_s[0], to_s[0], maxcount); 723 } else { 724 return stringlib_replace_substring_in_place( 725 self, from_s, from_len, to_s, to_len, maxcount); 726 } 727 } 728 729 /* Otherwise use the more generic algorithms */ 730 if (from_len == 1) { 731 return stringlib_replace_single_character( 732 self, from_s[0], to_s, to_len, maxcount); 733 } else { 734 /* len('from')>=2, len('to')>=1 */ 735 return stringlib_replace_substring( 736 self, from_s, from_len, to_s, to_len, maxcount); 737 } 738} 739 740#undef findchar 741