1/* 2 unicode_format.h -- implementation of str.format(). 3*/ 4 5#include "pycore_floatobject.h" // _PyFloat_FormatAdvancedWriter() 6 7/************************************************************************/ 8/*********** Global data structures and forward declarations *********/ 9/************************************************************************/ 10 11/* 12 A SubString consists of the characters between two string or 13 unicode pointers. 14*/ 15typedef struct { 16 PyObject *str; /* borrowed reference */ 17 Py_ssize_t start, end; 18} SubString; 19 20 21typedef enum { 22 ANS_INIT, 23 ANS_AUTO, 24 ANS_MANUAL 25} AutoNumberState; /* Keep track if we're auto-numbering fields */ 26 27/* Keeps track of our auto-numbering state, and which number field we're on */ 28typedef struct { 29 AutoNumberState an_state; 30 int an_field_number; 31} AutoNumber; 32 33 34/* forward declaration for recursion */ 35static PyObject * 36build_string(SubString *input, PyObject *args, PyObject *kwargs, 37 int recursion_depth, AutoNumber *auto_number); 38 39 40 41/************************************************************************/ 42/************************** Utility functions ************************/ 43/************************************************************************/ 44 45static void 46AutoNumber_Init(AutoNumber *auto_number) 47{ 48 auto_number->an_state = ANS_INIT; 49 auto_number->an_field_number = 0; 50} 51 52/* fill in a SubString from a pointer and length */ 53Py_LOCAL_INLINE(void) 54SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end) 55{ 56 str->str = s; 57 str->start = start; 58 str->end = end; 59} 60 61/* return a new string. if str->str is NULL, return None */ 62Py_LOCAL_INLINE(PyObject *) 63SubString_new_object(SubString *str) 64{ 65 if (str->str == NULL) 66 Py_RETURN_NONE; 67 return PyUnicode_Substring(str->str, str->start, str->end); 68} 69 70/* return a new string. if str->str is NULL, return a new empty string */ 71Py_LOCAL_INLINE(PyObject *) 72SubString_new_object_or_empty(SubString *str) 73{ 74 if (str->str == NULL) { 75 return PyUnicode_New(0, 0); 76 } 77 return SubString_new_object(str); 78} 79 80/* Return 1 if an error has been detected switching between automatic 81 field numbering and manual field specification, else return 0. Set 82 ValueError on error. */ 83static int 84autonumber_state_error(AutoNumberState state, int field_name_is_empty) 85{ 86 if (state == ANS_MANUAL) { 87 if (field_name_is_empty) { 88 PyErr_SetString(PyExc_ValueError, "cannot switch from " 89 "manual field specification to " 90 "automatic field numbering"); 91 return 1; 92 } 93 } 94 else { 95 if (!field_name_is_empty) { 96 PyErr_SetString(PyExc_ValueError, "cannot switch from " 97 "automatic field numbering to " 98 "manual field specification"); 99 return 1; 100 } 101 } 102 return 0; 103} 104 105 106/************************************************************************/ 107/*********** Format string parsing -- integers and identifiers *********/ 108/************************************************************************/ 109 110static Py_ssize_t 111get_integer(const SubString *str) 112{ 113 Py_ssize_t accumulator = 0; 114 Py_ssize_t digitval; 115 Py_ssize_t i; 116 117 /* empty string is an error */ 118 if (str->start >= str->end) 119 return -1; 120 121 for (i = str->start; i < str->end; i++) { 122 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i)); 123 if (digitval < 0) 124 return -1; 125 /* 126 Detect possible overflow before it happens: 127 128 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if 129 accumulator > (PY_SSIZE_T_MAX - digitval) / 10. 130 */ 131 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) { 132 PyErr_Format(PyExc_ValueError, 133 "Too many decimal digits in format string"); 134 return -1; 135 } 136 accumulator = accumulator * 10 + digitval; 137 } 138 return accumulator; 139} 140 141/************************************************************************/ 142/******** Functions to get field objects and specification strings ******/ 143/************************************************************************/ 144 145/* do the equivalent of obj.name */ 146static PyObject * 147getattr(PyObject *obj, SubString *name) 148{ 149 PyObject *newobj; 150 PyObject *str = SubString_new_object(name); 151 if (str == NULL) 152 return NULL; 153 newobj = PyObject_GetAttr(obj, str); 154 Py_DECREF(str); 155 return newobj; 156} 157 158/* do the equivalent of obj[idx], where obj is a sequence */ 159static PyObject * 160getitem_sequence(PyObject *obj, Py_ssize_t idx) 161{ 162 return PySequence_GetItem(obj, idx); 163} 164 165/* do the equivalent of obj[idx], where obj is not a sequence */ 166static PyObject * 167getitem_idx(PyObject *obj, Py_ssize_t idx) 168{ 169 PyObject *newobj; 170 PyObject *idx_obj = PyLong_FromSsize_t(idx); 171 if (idx_obj == NULL) 172 return NULL; 173 newobj = PyObject_GetItem(obj, idx_obj); 174 Py_DECREF(idx_obj); 175 return newobj; 176} 177 178/* do the equivalent of obj[name] */ 179static PyObject * 180getitem_str(PyObject *obj, SubString *name) 181{ 182 PyObject *newobj; 183 PyObject *str = SubString_new_object(name); 184 if (str == NULL) 185 return NULL; 186 newobj = PyObject_GetItem(obj, str); 187 Py_DECREF(str); 188 return newobj; 189} 190 191typedef struct { 192 /* the entire string we're parsing. we assume that someone else 193 is managing its lifetime, and that it will exist for the 194 lifetime of the iterator. can be empty */ 195 SubString str; 196 197 /* index to where we are inside field_name */ 198 Py_ssize_t index; 199} FieldNameIterator; 200 201 202static int 203FieldNameIterator_init(FieldNameIterator *self, PyObject *s, 204 Py_ssize_t start, Py_ssize_t end) 205{ 206 SubString_init(&self->str, s, start, end); 207 self->index = start; 208 return 1; 209} 210 211static int 212_FieldNameIterator_attr(FieldNameIterator *self, SubString *name) 213{ 214 Py_UCS4 c; 215 216 name->str = self->str.str; 217 name->start = self->index; 218 219 /* return everything until '.' or '[' */ 220 while (self->index < self->str.end) { 221 c = PyUnicode_READ_CHAR(self->str.str, self->index++); 222 switch (c) { 223 case '[': 224 case '.': 225 /* backup so that we this character will be seen next time */ 226 self->index--; 227 break; 228 default: 229 continue; 230 } 231 break; 232 } 233 /* end of string is okay */ 234 name->end = self->index; 235 return 1; 236} 237 238static int 239_FieldNameIterator_item(FieldNameIterator *self, SubString *name) 240{ 241 int bracket_seen = 0; 242 Py_UCS4 c; 243 244 name->str = self->str.str; 245 name->start = self->index; 246 247 /* return everything until ']' */ 248 while (self->index < self->str.end) { 249 c = PyUnicode_READ_CHAR(self->str.str, self->index++); 250 switch (c) { 251 case ']': 252 bracket_seen = 1; 253 break; 254 default: 255 continue; 256 } 257 break; 258 } 259 /* make sure we ended with a ']' */ 260 if (!bracket_seen) { 261 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string"); 262 return 0; 263 } 264 265 /* end of string is okay */ 266 /* don't include the ']' */ 267 name->end = self->index-1; 268 return 1; 269} 270 271/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */ 272static int 273FieldNameIterator_next(FieldNameIterator *self, int *is_attribute, 274 Py_ssize_t *name_idx, SubString *name) 275{ 276 /* check at end of input */ 277 if (self->index >= self->str.end) 278 return 1; 279 280 switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) { 281 case '.': 282 *is_attribute = 1; 283 if (_FieldNameIterator_attr(self, name) == 0) 284 return 0; 285 *name_idx = -1; 286 break; 287 case '[': 288 *is_attribute = 0; 289 if (_FieldNameIterator_item(self, name) == 0) 290 return 0; 291 *name_idx = get_integer(name); 292 if (*name_idx == -1 && PyErr_Occurred()) 293 return 0; 294 break; 295 default: 296 /* Invalid character follows ']' */ 297 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may " 298 "follow ']' in format field specifier"); 299 return 0; 300 } 301 302 /* empty string is an error */ 303 if (name->start == name->end) { 304 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string"); 305 return 0; 306 } 307 308 return 2; 309} 310 311 312/* input: field_name 313 output: 'first' points to the part before the first '[' or '.' 314 'first_idx' is -1 if 'first' is not an integer, otherwise 315 it's the value of first converted to an integer 316 'rest' is an iterator to return the rest 317*/ 318static int 319field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first, 320 Py_ssize_t *first_idx, FieldNameIterator *rest, 321 AutoNumber *auto_number) 322{ 323 Py_UCS4 c; 324 Py_ssize_t i = start; 325 int field_name_is_empty; 326 int using_numeric_index; 327 328 /* find the part up until the first '.' or '[' */ 329 while (i < end) { 330 switch (c = PyUnicode_READ_CHAR(str, i++)) { 331 case '[': 332 case '.': 333 /* backup so that we this character is available to the 334 "rest" iterator */ 335 i--; 336 break; 337 default: 338 continue; 339 } 340 break; 341 } 342 343 /* set up the return values */ 344 SubString_init(first, str, start, i); 345 FieldNameIterator_init(rest, str, i, end); 346 347 /* see if "first" is an integer, in which case it's used as an index */ 348 *first_idx = get_integer(first); 349 if (*first_idx == -1 && PyErr_Occurred()) 350 return 0; 351 352 field_name_is_empty = first->start >= first->end; 353 354 /* If the field name is omitted or if we have a numeric index 355 specified, then we're doing numeric indexing into args. */ 356 using_numeric_index = field_name_is_empty || *first_idx != -1; 357 358 /* We always get here exactly one time for each field we're 359 processing. And we get here in field order (counting by left 360 braces). So this is the perfect place to handle automatic field 361 numbering if the field name is omitted. */ 362 363 /* Check if we need to do the auto-numbering. It's not needed if 364 we're called from string.Format routines, because it's handled 365 in that class by itself. */ 366 if (auto_number) { 367 /* Initialize our auto numbering state if this is the first 368 time we're either auto-numbering or manually numbering. */ 369 if (auto_number->an_state == ANS_INIT && using_numeric_index) 370 auto_number->an_state = field_name_is_empty ? 371 ANS_AUTO : ANS_MANUAL; 372 373 /* Make sure our state is consistent with what we're doing 374 this time through. Only check if we're using a numeric 375 index. */ 376 if (using_numeric_index) 377 if (autonumber_state_error(auto_number->an_state, 378 field_name_is_empty)) 379 return 0; 380 /* Zero length field means we want to do auto-numbering of the 381 fields. */ 382 if (field_name_is_empty) 383 *first_idx = (auto_number->an_field_number)++; 384 } 385 386 return 1; 387} 388 389 390/* 391 get_field_object returns the object inside {}, before the 392 format_spec. It handles getindex and getattr lookups and consumes 393 the entire input string. 394*/ 395static PyObject * 396get_field_object(SubString *input, PyObject *args, PyObject *kwargs, 397 AutoNumber *auto_number) 398{ 399 PyObject *obj = NULL; 400 int ok; 401 int is_attribute; 402 SubString name; 403 SubString first; 404 Py_ssize_t index; 405 FieldNameIterator rest; 406 407 if (!field_name_split(input->str, input->start, input->end, &first, 408 &index, &rest, auto_number)) { 409 goto error; 410 } 411 412 if (index == -1) { 413 /* look up in kwargs */ 414 PyObject *key = SubString_new_object(&first); 415 if (key == NULL) { 416 goto error; 417 } 418 if (kwargs == NULL) { 419 PyErr_SetObject(PyExc_KeyError, key); 420 Py_DECREF(key); 421 goto error; 422 } 423 /* Use PyObject_GetItem instead of PyDict_GetItem because this 424 code is no longer just used with kwargs. It might be passed 425 a non-dict when called through format_map. */ 426 obj = PyObject_GetItem(kwargs, key); 427 Py_DECREF(key); 428 if (obj == NULL) { 429 goto error; 430 } 431 } 432 else { 433 /* If args is NULL, we have a format string with a positional field 434 with only kwargs to retrieve it from. This can only happen when 435 used with format_map(), where positional arguments are not 436 allowed. */ 437 if (args == NULL) { 438 PyErr_SetString(PyExc_ValueError, "Format string contains " 439 "positional fields"); 440 goto error; 441 } 442 443 /* look up in args */ 444 obj = PySequence_GetItem(args, index); 445 if (obj == NULL) { 446 PyErr_Format(PyExc_IndexError, 447 "Replacement index %zd out of range for positional " 448 "args tuple", 449 index); 450 goto error; 451 } 452 } 453 454 /* iterate over the rest of the field_name */ 455 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index, 456 &name)) == 2) { 457 PyObject *tmp; 458 459 if (is_attribute) 460 /* getattr lookup "." */ 461 tmp = getattr(obj, &name); 462 else 463 /* getitem lookup "[]" */ 464 if (index == -1) 465 tmp = getitem_str(obj, &name); 466 else 467 if (PySequence_Check(obj)) 468 tmp = getitem_sequence(obj, index); 469 else 470 /* not a sequence */ 471 tmp = getitem_idx(obj, index); 472 if (tmp == NULL) 473 goto error; 474 475 /* assign to obj */ 476 Py_DECREF(obj); 477 obj = tmp; 478 } 479 /* end of iterator, this is the non-error case */ 480 if (ok == 1) 481 return obj; 482error: 483 Py_XDECREF(obj); 484 return NULL; 485} 486 487/************************************************************************/ 488/***************** Field rendering functions **************************/ 489/************************************************************************/ 490 491/* 492 render_field() is the main function in this section. It takes the 493 field object and field specification string generated by 494 get_field_and_spec, and renders the field into the output string. 495 496 render_field calls fieldobj.__format__(format_spec) method, and 497 appends to the output. 498*/ 499static int 500render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer) 501{ 502 int ok = 0; 503 PyObject *result = NULL; 504 PyObject *format_spec_object = NULL; 505 int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; 506 int err; 507 508 /* If we know the type exactly, skip the lookup of __format__ and just 509 call the formatter directly. */ 510 if (PyUnicode_CheckExact(fieldobj)) 511 formatter = _PyUnicode_FormatAdvancedWriter; 512 else if (PyLong_CheckExact(fieldobj)) 513 formatter = _PyLong_FormatAdvancedWriter; 514 else if (PyFloat_CheckExact(fieldobj)) 515 formatter = _PyFloat_FormatAdvancedWriter; 516 else if (PyComplex_CheckExact(fieldobj)) 517 formatter = _PyComplex_FormatAdvancedWriter; 518 519 if (formatter) { 520 /* we know exactly which formatter will be called when __format__ is 521 looked up, so call it directly, instead. */ 522 err = formatter(writer, fieldobj, format_spec->str, 523 format_spec->start, format_spec->end); 524 return (err == 0); 525 } 526 else { 527 /* We need to create an object out of the pointers we have, because 528 __format__ takes a string/unicode object for format_spec. */ 529 if (format_spec->str) 530 format_spec_object = PyUnicode_Substring(format_spec->str, 531 format_spec->start, 532 format_spec->end); 533 else 534 format_spec_object = PyUnicode_New(0, 0); 535 if (format_spec_object == NULL) 536 goto done; 537 538 result = PyObject_Format(fieldobj, format_spec_object); 539 } 540 if (result == NULL) 541 goto done; 542 543 if (_PyUnicodeWriter_WriteStr(writer, result) == -1) 544 goto done; 545 ok = 1; 546 547done: 548 Py_XDECREF(format_spec_object); 549 Py_XDECREF(result); 550 return ok; 551} 552 553static int 554parse_field(SubString *str, SubString *field_name, SubString *format_spec, 555 int *format_spec_needs_expanding, Py_UCS4 *conversion) 556{ 557 /* Note this function works if the field name is zero length, 558 which is good. Zero length field names are handled later, in 559 field_name_split. */ 560 561 Py_UCS4 c = 0; 562 563 /* initialize these, as they may be empty */ 564 *conversion = '\0'; 565 SubString_init(format_spec, NULL, 0, 0); 566 567 /* Search for the field name. it's terminated by the end of 568 the string, or a ':' or '!' */ 569 field_name->str = str->str; 570 field_name->start = str->start; 571 while (str->start < str->end) { 572 switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { 573 case '{': 574 PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name"); 575 return 0; 576 case '[': 577 for (; str->start < str->end; str->start++) 578 if (PyUnicode_READ_CHAR(str->str, str->start) == ']') 579 break; 580 continue; 581 case '}': 582 case ':': 583 case '!': 584 break; 585 default: 586 continue; 587 } 588 break; 589 } 590 591 field_name->end = str->start - 1; 592 if (c == '!' || c == ':') { 593 Py_ssize_t count; 594 /* we have a format specifier and/or a conversion */ 595 /* don't include the last character */ 596 597 /* see if there's a conversion specifier */ 598 if (c == '!') { 599 /* there must be another character present */ 600 if (str->start >= str->end) { 601 PyErr_SetString(PyExc_ValueError, 602 "end of string while looking for conversion " 603 "specifier"); 604 return 0; 605 } 606 *conversion = PyUnicode_READ_CHAR(str->str, str->start++); 607 608 if (str->start < str->end) { 609 c = PyUnicode_READ_CHAR(str->str, str->start++); 610 if (c == '}') 611 return 1; 612 if (c != ':') { 613 PyErr_SetString(PyExc_ValueError, 614 "expected ':' after conversion specifier"); 615 return 0; 616 } 617 } 618 } 619 format_spec->str = str->str; 620 format_spec->start = str->start; 621 count = 1; 622 while (str->start < str->end) { 623 switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { 624 case '{': 625 *format_spec_needs_expanding = 1; 626 count++; 627 break; 628 case '}': 629 count--; 630 if (count == 0) { 631 format_spec->end = str->start - 1; 632 return 1; 633 } 634 break; 635 default: 636 break; 637 } 638 } 639 640 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec"); 641 return 0; 642 } 643 else if (c != '}') { 644 PyErr_SetString(PyExc_ValueError, "expected '}' before end of string"); 645 return 0; 646 } 647 648 return 1; 649} 650 651/************************************************************************/ 652/******* Output string allocation and escape-to-markup processing ******/ 653/************************************************************************/ 654 655/* MarkupIterator breaks the string into pieces of either literal 656 text, or things inside {} that need to be marked up. it is 657 designed to make it easy to wrap a Python iterator around it, for 658 use with the Formatter class */ 659 660typedef struct { 661 SubString str; 662} MarkupIterator; 663 664static int 665MarkupIterator_init(MarkupIterator *self, PyObject *str, 666 Py_ssize_t start, Py_ssize_t end) 667{ 668 SubString_init(&self->str, str, start, end); 669 return 1; 670} 671 672/* returns 0 on error, 1 on non-error termination, and 2 if it got a 673 string (or something to be expanded) */ 674static int 675MarkupIterator_next(MarkupIterator *self, SubString *literal, 676 int *field_present, SubString *field_name, 677 SubString *format_spec, Py_UCS4 *conversion, 678 int *format_spec_needs_expanding) 679{ 680 int at_end; 681 Py_UCS4 c = 0; 682 Py_ssize_t start; 683 Py_ssize_t len; 684 int markup_follows = 0; 685 686 /* initialize all of the output variables */ 687 SubString_init(literal, NULL, 0, 0); 688 SubString_init(field_name, NULL, 0, 0); 689 SubString_init(format_spec, NULL, 0, 0); 690 *conversion = '\0'; 691 *format_spec_needs_expanding = 0; 692 *field_present = 0; 693 694 /* No more input, end of iterator. This is the normal exit 695 path. */ 696 if (self->str.start >= self->str.end) 697 return 1; 698 699 start = self->str.start; 700 701 /* First read any literal text. Read until the end of string, an 702 escaped '{' or '}', or an unescaped '{'. In order to never 703 allocate memory and so I can just pass pointers around, if 704 there's an escaped '{' or '}' then we'll return the literal 705 including the brace, but no format object. The next time 706 through, we'll return the rest of the literal, skipping past 707 the second consecutive brace. */ 708 while (self->str.start < self->str.end) { 709 switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) { 710 case '{': 711 case '}': 712 markup_follows = 1; 713 break; 714 default: 715 continue; 716 } 717 break; 718 } 719 720 at_end = self->str.start >= self->str.end; 721 len = self->str.start - start; 722 723 if ((c == '}') && (at_end || 724 (c != PyUnicode_READ_CHAR(self->str.str, 725 self->str.start)))) { 726 PyErr_SetString(PyExc_ValueError, "Single '}' encountered " 727 "in format string"); 728 return 0; 729 } 730 if (at_end && c == '{') { 731 PyErr_SetString(PyExc_ValueError, "Single '{' encountered " 732 "in format string"); 733 return 0; 734 } 735 if (!at_end) { 736 if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) { 737 /* escaped } or {, skip it in the input. there is no 738 markup object following us, just this literal text */ 739 self->str.start++; 740 markup_follows = 0; 741 } 742 else 743 len--; 744 } 745 746 /* record the literal text */ 747 literal->str = self->str.str; 748 literal->start = start; 749 literal->end = start + len; 750 751 if (!markup_follows) 752 return 2; 753 754 /* this is markup; parse the field */ 755 *field_present = 1; 756 if (!parse_field(&self->str, field_name, format_spec, 757 format_spec_needs_expanding, conversion)) 758 return 0; 759 return 2; 760} 761 762 763/* do the !r or !s conversion on obj */ 764static PyObject * 765do_conversion(PyObject *obj, Py_UCS4 conversion) 766{ 767 /* XXX in pre-3.0, do we need to convert this to unicode, since it 768 might have returned a string? */ 769 switch (conversion) { 770 case 'r': 771 return PyObject_Repr(obj); 772 case 's': 773 return PyObject_Str(obj); 774 case 'a': 775 return PyObject_ASCII(obj); 776 default: 777 if (conversion > 32 && conversion < 127) { 778 /* It's the ASCII subrange; casting to char is safe 779 (assuming the execution character set is an ASCII 780 superset). */ 781 PyErr_Format(PyExc_ValueError, 782 "Unknown conversion specifier %c", 783 (char)conversion); 784 } else 785 PyErr_Format(PyExc_ValueError, 786 "Unknown conversion specifier \\x%x", 787 (unsigned int)conversion); 788 return NULL; 789 } 790} 791 792/* given: 793 794 {field_name!conversion:format_spec} 795 796 compute the result and write it to output. 797 format_spec_needs_expanding is an optimization. if it's false, 798 just output the string directly, otherwise recursively expand the 799 format_spec string. 800 801 field_name is allowed to be zero length, in which case we 802 are doing auto field numbering. 803*/ 804 805static int 806output_markup(SubString *field_name, SubString *format_spec, 807 int format_spec_needs_expanding, Py_UCS4 conversion, 808 _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs, 809 int recursion_depth, AutoNumber *auto_number) 810{ 811 PyObject *tmp = NULL; 812 PyObject *fieldobj = NULL; 813 SubString expanded_format_spec; 814 SubString *actual_format_spec; 815 int result = 0; 816 817 /* convert field_name to an object */ 818 fieldobj = get_field_object(field_name, args, kwargs, auto_number); 819 if (fieldobj == NULL) 820 goto done; 821 822 if (conversion != '\0') { 823 tmp = do_conversion(fieldobj, conversion); 824 if (tmp == NULL || PyUnicode_READY(tmp) == -1) 825 goto done; 826 827 /* do the assignment, transferring ownership: fieldobj = tmp */ 828 Py_DECREF(fieldobj); 829 fieldobj = tmp; 830 tmp = NULL; 831 } 832 833 /* if needed, recursively compute the format_spec */ 834 if (format_spec_needs_expanding) { 835 tmp = build_string(format_spec, args, kwargs, recursion_depth-1, 836 auto_number); 837 if (tmp == NULL || PyUnicode_READY(tmp) == -1) 838 goto done; 839 840 /* note that in the case we're expanding the format string, 841 tmp must be kept around until after the call to 842 render_field. */ 843 SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp)); 844 actual_format_spec = &expanded_format_spec; 845 } 846 else 847 actual_format_spec = format_spec; 848 849 if (render_field(fieldobj, actual_format_spec, writer) == 0) 850 goto done; 851 852 result = 1; 853 854done: 855 Py_XDECREF(fieldobj); 856 Py_XDECREF(tmp); 857 858 return result; 859} 860 861/* 862 do_markup is the top-level loop for the format() method. It 863 searches through the format string for escapes to markup codes, and 864 calls other functions to move non-markup text to the output, 865 and to perform the markup to the output. 866*/ 867static int 868do_markup(SubString *input, PyObject *args, PyObject *kwargs, 869 _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number) 870{ 871 MarkupIterator iter; 872 int format_spec_needs_expanding; 873 int result; 874 int field_present; 875 SubString literal; 876 SubString field_name; 877 SubString format_spec; 878 Py_UCS4 conversion; 879 880 MarkupIterator_init(&iter, input->str, input->start, input->end); 881 while ((result = MarkupIterator_next(&iter, &literal, &field_present, 882 &field_name, &format_spec, 883 &conversion, 884 &format_spec_needs_expanding)) == 2) { 885 if (literal.end != literal.start) { 886 if (!field_present && iter.str.start == iter.str.end) 887 writer->overallocate = 0; 888 if (_PyUnicodeWriter_WriteSubstring(writer, literal.str, 889 literal.start, literal.end) < 0) 890 return 0; 891 } 892 893 if (field_present) { 894 if (iter.str.start == iter.str.end) 895 writer->overallocate = 0; 896 if (!output_markup(&field_name, &format_spec, 897 format_spec_needs_expanding, conversion, writer, 898 args, kwargs, recursion_depth, auto_number)) 899 return 0; 900 } 901 } 902 return result; 903} 904 905 906/* 907 build_string allocates the output string and then 908 calls do_markup to do the heavy lifting. 909*/ 910static PyObject * 911build_string(SubString *input, PyObject *args, PyObject *kwargs, 912 int recursion_depth, AutoNumber *auto_number) 913{ 914 _PyUnicodeWriter writer; 915 916 /* check the recursion level */ 917 if (recursion_depth <= 0) { 918 PyErr_SetString(PyExc_ValueError, 919 "Max string recursion exceeded"); 920 return NULL; 921 } 922 923 _PyUnicodeWriter_Init(&writer); 924 writer.overallocate = 1; 925 writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100; 926 927 if (!do_markup(input, args, kwargs, &writer, recursion_depth, 928 auto_number)) { 929 _PyUnicodeWriter_Dealloc(&writer); 930 return NULL; 931 } 932 933 return _PyUnicodeWriter_Finish(&writer); 934} 935 936/************************************************************************/ 937/*********** main routine ***********************************************/ 938/************************************************************************/ 939 940/* this is the main entry point */ 941static PyObject * 942do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) 943{ 944 SubString input; 945 946 /* PEP 3101 says only 2 levels, so that 947 "{0:{1}}".format('abc', 's') # works 948 "{0:{1:{2}}}".format('abc', 's', '') # fails 949 */ 950 int recursion_depth = 2; 951 952 AutoNumber auto_number; 953 954 if (PyUnicode_READY(self) == -1) 955 return NULL; 956 957 AutoNumber_Init(&auto_number); 958 SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self)); 959 return build_string(&input, args, kwargs, recursion_depth, &auto_number); 960} 961 962static PyObject * 963do_string_format_map(PyObject *self, PyObject *obj) 964{ 965 return do_string_format(self, NULL, obj); 966} 967 968 969/************************************************************************/ 970/*********** formatteriterator ******************************************/ 971/************************************************************************/ 972 973/* This is used to implement string.Formatter.vparse(). It exists so 974 Formatter can share code with the built in unicode.format() method. 975 It's really just a wrapper around MarkupIterator that is callable 976 from Python. */ 977 978typedef struct { 979 PyObject_HEAD 980 PyObject *str; 981 MarkupIterator it_markup; 982} formatteriterobject; 983 984static void 985formatteriter_dealloc(formatteriterobject *it) 986{ 987 Py_XDECREF(it->str); 988 PyObject_Free(it); 989} 990 991/* returns a tuple: 992 (literal, field_name, format_spec, conversion) 993 994 literal is any literal text to output. might be zero length 995 field_name is the string before the ':'. might be None 996 format_spec is the string after the ':'. mibht be None 997 conversion is either None, or the string after the '!' 998*/ 999static PyObject * 1000formatteriter_next(formatteriterobject *it) 1001{ 1002 SubString literal; 1003 SubString field_name; 1004 SubString format_spec; 1005 Py_UCS4 conversion; 1006 int format_spec_needs_expanding; 1007 int field_present; 1008 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present, 1009 &field_name, &format_spec, &conversion, 1010 &format_spec_needs_expanding); 1011 1012 /* all of the SubString objects point into it->str, so no 1013 memory management needs to be done on them */ 1014 assert(0 <= result && result <= 2); 1015 if (result == 0 || result == 1) 1016 /* if 0, error has already been set, if 1, iterator is empty */ 1017 return NULL; 1018 else { 1019 PyObject *literal_str = NULL; 1020 PyObject *field_name_str = NULL; 1021 PyObject *format_spec_str = NULL; 1022 PyObject *conversion_str = NULL; 1023 PyObject *tuple = NULL; 1024 1025 literal_str = SubString_new_object(&literal); 1026 if (literal_str == NULL) 1027 goto done; 1028 1029 field_name_str = SubString_new_object(&field_name); 1030 if (field_name_str == NULL) 1031 goto done; 1032 1033 /* if field_name is non-zero length, return a string for 1034 format_spec (even if zero length), else return None */ 1035 format_spec_str = (field_present ? 1036 SubString_new_object_or_empty : 1037 SubString_new_object)(&format_spec); 1038 if (format_spec_str == NULL) 1039 goto done; 1040 1041 /* if the conversion is not specified, return a None, 1042 otherwise create a one length string with the conversion 1043 character */ 1044 if (conversion == '\0') { 1045 conversion_str = Py_None; 1046 Py_INCREF(conversion_str); 1047 } 1048 else 1049 conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, 1050 &conversion, 1); 1051 if (conversion_str == NULL) 1052 goto done; 1053 1054 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str, 1055 conversion_str); 1056 done: 1057 Py_XDECREF(literal_str); 1058 Py_XDECREF(field_name_str); 1059 Py_XDECREF(format_spec_str); 1060 Py_XDECREF(conversion_str); 1061 return tuple; 1062 } 1063} 1064 1065static PyMethodDef formatteriter_methods[] = { 1066 {NULL, NULL} /* sentinel */ 1067}; 1068 1069static PyTypeObject PyFormatterIter_Type = { 1070 PyVarObject_HEAD_INIT(&PyType_Type, 0) 1071 "formatteriterator", /* tp_name */ 1072 sizeof(formatteriterobject), /* tp_basicsize */ 1073 0, /* tp_itemsize */ 1074 /* methods */ 1075 (destructor)formatteriter_dealloc, /* tp_dealloc */ 1076 0, /* tp_vectorcall_offset */ 1077 0, /* tp_getattr */ 1078 0, /* tp_setattr */ 1079 0, /* tp_as_async */ 1080 0, /* tp_repr */ 1081 0, /* tp_as_number */ 1082 0, /* tp_as_sequence */ 1083 0, /* tp_as_mapping */ 1084 0, /* tp_hash */ 1085 0, /* tp_call */ 1086 0, /* tp_str */ 1087 PyObject_GenericGetAttr, /* tp_getattro */ 1088 0, /* tp_setattro */ 1089 0, /* tp_as_buffer */ 1090 Py_TPFLAGS_DEFAULT, /* tp_flags */ 1091 0, /* tp_doc */ 1092 0, /* tp_traverse */ 1093 0, /* tp_clear */ 1094 0, /* tp_richcompare */ 1095 0, /* tp_weaklistoffset */ 1096 PyObject_SelfIter, /* tp_iter */ 1097 (iternextfunc)formatteriter_next, /* tp_iternext */ 1098 formatteriter_methods, /* tp_methods */ 1099 0, 1100}; 1101 1102/* unicode_formatter_parser is used to implement 1103 string.Formatter.vformat. it parses a string and returns tuples 1104 describing the parsed elements. It's a wrapper around 1105 stringlib/string_format.h's MarkupIterator */ 1106static PyObject * 1107formatter_parser(PyObject *ignored, PyObject *self) 1108{ 1109 formatteriterobject *it; 1110 1111 if (!PyUnicode_Check(self)) { 1112 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name); 1113 return NULL; 1114 } 1115 1116 if (PyUnicode_READY(self) == -1) 1117 return NULL; 1118 1119 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); 1120 if (it == NULL) 1121 return NULL; 1122 1123 /* take ownership, give the object to the iterator */ 1124 Py_INCREF(self); 1125 it->str = self; 1126 1127 /* initialize the contained MarkupIterator */ 1128 MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self)); 1129 return (PyObject *)it; 1130} 1131 1132 1133/************************************************************************/ 1134/*********** fieldnameiterator ******************************************/ 1135/************************************************************************/ 1136 1137 1138/* This is used to implement string.Formatter.vparse(). It parses the 1139 field name into attribute and item values. It's a Python-callable 1140 wrapper around FieldNameIterator */ 1141 1142typedef struct { 1143 PyObject_HEAD 1144 PyObject *str; 1145 FieldNameIterator it_field; 1146} fieldnameiterobject; 1147 1148static void 1149fieldnameiter_dealloc(fieldnameiterobject *it) 1150{ 1151 Py_XDECREF(it->str); 1152 PyObject_Free(it); 1153} 1154 1155/* returns a tuple: 1156 (is_attr, value) 1157 is_attr is true if we used attribute syntax (e.g., '.foo') 1158 false if we used index syntax (e.g., '[foo]') 1159 value is an integer or string 1160*/ 1161static PyObject * 1162fieldnameiter_next(fieldnameiterobject *it) 1163{ 1164 int result; 1165 int is_attr; 1166 Py_ssize_t idx; 1167 SubString name; 1168 1169 result = FieldNameIterator_next(&it->it_field, &is_attr, 1170 &idx, &name); 1171 if (result == 0 || result == 1) 1172 /* if 0, error has already been set, if 1, iterator is empty */ 1173 return NULL; 1174 else { 1175 PyObject* result = NULL; 1176 PyObject* is_attr_obj = NULL; 1177 PyObject* obj = NULL; 1178 1179 is_attr_obj = PyBool_FromLong(is_attr); 1180 if (is_attr_obj == NULL) 1181 goto done; 1182 1183 /* either an integer or a string */ 1184 if (idx != -1) 1185 obj = PyLong_FromSsize_t(idx); 1186 else 1187 obj = SubString_new_object(&name); 1188 if (obj == NULL) 1189 goto done; 1190 1191 /* return a tuple of values */ 1192 result = PyTuple_Pack(2, is_attr_obj, obj); 1193 1194 done: 1195 Py_XDECREF(is_attr_obj); 1196 Py_XDECREF(obj); 1197 return result; 1198 } 1199} 1200 1201static PyMethodDef fieldnameiter_methods[] = { 1202 {NULL, NULL} /* sentinel */ 1203}; 1204 1205static PyTypeObject PyFieldNameIter_Type = { 1206 PyVarObject_HEAD_INIT(&PyType_Type, 0) 1207 "fieldnameiterator", /* tp_name */ 1208 sizeof(fieldnameiterobject), /* tp_basicsize */ 1209 0, /* tp_itemsize */ 1210 /* methods */ 1211 (destructor)fieldnameiter_dealloc, /* tp_dealloc */ 1212 0, /* tp_vectorcall_offset */ 1213 0, /* tp_getattr */ 1214 0, /* tp_setattr */ 1215 0, /* tp_as_async */ 1216 0, /* tp_repr */ 1217 0, /* tp_as_number */ 1218 0, /* tp_as_sequence */ 1219 0, /* tp_as_mapping */ 1220 0, /* tp_hash */ 1221 0, /* tp_call */ 1222 0, /* tp_str */ 1223 PyObject_GenericGetAttr, /* tp_getattro */ 1224 0, /* tp_setattro */ 1225 0, /* tp_as_buffer */ 1226 Py_TPFLAGS_DEFAULT, /* tp_flags */ 1227 0, /* tp_doc */ 1228 0, /* tp_traverse */ 1229 0, /* tp_clear */ 1230 0, /* tp_richcompare */ 1231 0, /* tp_weaklistoffset */ 1232 PyObject_SelfIter, /* tp_iter */ 1233 (iternextfunc)fieldnameiter_next, /* tp_iternext */ 1234 fieldnameiter_methods, /* tp_methods */ 1235 0}; 1236 1237/* unicode_formatter_field_name_split is used to implement 1238 string.Formatter.vformat. it takes a PEP 3101 "field name", and 1239 returns a tuple of (first, rest): "first", the part before the 1240 first '.' or '['; and "rest", an iterator for the rest of the field 1241 name. it's a wrapper around stringlib/string_format.h's 1242 field_name_split. The iterator it returns is a 1243 FieldNameIterator */ 1244static PyObject * 1245formatter_field_name_split(PyObject *ignored, PyObject *self) 1246{ 1247 SubString first; 1248 Py_ssize_t first_idx; 1249 fieldnameiterobject *it; 1250 1251 PyObject *first_obj = NULL; 1252 PyObject *result = NULL; 1253 1254 if (!PyUnicode_Check(self)) { 1255 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name); 1256 return NULL; 1257 } 1258 1259 if (PyUnicode_READY(self) == -1) 1260 return NULL; 1261 1262 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); 1263 if (it == NULL) 1264 return NULL; 1265 1266 /* take ownership, give the object to the iterator. this is 1267 just to keep the field_name alive */ 1268 Py_INCREF(self); 1269 it->str = self; 1270 1271 /* Pass in auto_number = NULL. We'll return an empty string for 1272 first_obj in that case. */ 1273 if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self), 1274 &first, &first_idx, &it->it_field, NULL)) 1275 goto done; 1276 1277 /* first becomes an integer, if possible; else a string */ 1278 if (first_idx != -1) 1279 first_obj = PyLong_FromSsize_t(first_idx); 1280 else 1281 /* convert "first" into a string object */ 1282 first_obj = SubString_new_object(&first); 1283 if (first_obj == NULL) 1284 goto done; 1285 1286 /* return a tuple of values */ 1287 result = PyTuple_Pack(2, first_obj, it); 1288 1289done: 1290 Py_XDECREF(it); 1291 Py_XDECREF(first_obj); 1292 return result; 1293} 1294