1/* -*- Mode: C; c-file-style: "python" -*- */ 2 3#include <Python.h> 4#include "pycore_dtoa.h" // _Py_dg_strtod() 5#include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR 6#include <locale.h> 7 8/* Case-insensitive string match used for nan and inf detection; t should be 9 lower-case. Returns 1 for a successful match, 0 otherwise. */ 10 11static int 12case_insensitive_match(const char *s, const char *t) 13{ 14 while(*t && Py_TOLOWER(*s) == *t) { 15 s++; 16 t++; 17 } 18 return *t ? 0 : 1; 19} 20 21/* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or 22 "infinity", with an optional leading sign of "+" or "-". On success, 23 return the NaN or Infinity as a double and set *endptr to point just beyond 24 the successfully parsed portion of the string. On failure, return -1.0 and 25 set *endptr to point to the start of the string. */ 26 27#if _PY_SHORT_FLOAT_REPR == 1 28 29double 30_Py_parse_inf_or_nan(const char *p, char **endptr) 31{ 32 double retval; 33 const char *s; 34 int negate = 0; 35 36 s = p; 37 if (*s == '-') { 38 negate = 1; 39 s++; 40 } 41 else if (*s == '+') { 42 s++; 43 } 44 if (case_insensitive_match(s, "inf")) { 45 s += 3; 46 if (case_insensitive_match(s, "inity")) 47 s += 5; 48 retval = _Py_dg_infinity(negate); 49 } 50 else if (case_insensitive_match(s, "nan")) { 51 s += 3; 52 retval = _Py_dg_stdnan(negate); 53 } 54 else { 55 s = p; 56 retval = -1.0; 57 } 58 *endptr = (char *)s; 59 return retval; 60} 61 62#else 63 64double 65_Py_parse_inf_or_nan(const char *p, char **endptr) 66{ 67 double retval; 68 const char *s; 69 int negate = 0; 70 71 s = p; 72 if (*s == '-') { 73 negate = 1; 74 s++; 75 } 76 else if (*s == '+') { 77 s++; 78 } 79 if (case_insensitive_match(s, "inf")) { 80 s += 3; 81 if (case_insensitive_match(s, "inity")) 82 s += 5; 83 retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL; 84 } 85 else if (case_insensitive_match(s, "nan")) { 86 s += 3; 87 retval = negate ? -Py_NAN : Py_NAN; 88 } 89 else { 90 s = p; 91 retval = -1.0; 92 } 93 *endptr = (char *)s; 94 return retval; 95} 96 97#endif 98 99/** 100 * _PyOS_ascii_strtod: 101 * @nptr: the string to convert to a numeric value. 102 * @endptr: if non-%NULL, it returns the character after 103 * the last character used in the conversion. 104 * 105 * Converts a string to a #gdouble value. 106 * This function behaves like the standard strtod() function 107 * does in the C locale. It does this without actually 108 * changing the current locale, since that would not be 109 * thread-safe. 110 * 111 * This function is typically used when reading configuration 112 * files or other non-user input that should be locale independent. 113 * To handle input from the user you should normally use the 114 * locale-sensitive system strtod() function. 115 * 116 * If the correct value would cause overflow, plus or minus %HUGE_VAL 117 * is returned (according to the sign of the value), and %ERANGE is 118 * stored in %errno. If the correct value would cause underflow, 119 * zero is returned and %ERANGE is stored in %errno. 120 * If memory allocation fails, %ENOMEM is stored in %errno. 121 * 122 * This function resets %errno before calling strtod() so that 123 * you can reliably detect overflow and underflow. 124 * 125 * Return value: the #gdouble value. 126 **/ 127 128#if _PY_SHORT_FLOAT_REPR == 1 129 130static double 131_PyOS_ascii_strtod(const char *nptr, char **endptr) 132{ 133 double result; 134 _Py_SET_53BIT_PRECISION_HEADER; 135 136 assert(nptr != NULL); 137 /* Set errno to zero, so that we can distinguish zero results 138 and underflows */ 139 errno = 0; 140 141 _Py_SET_53BIT_PRECISION_START; 142 result = _Py_dg_strtod(nptr, endptr); 143 _Py_SET_53BIT_PRECISION_END; 144 145 if (*endptr == nptr) 146 /* string might represent an inf or nan */ 147 result = _Py_parse_inf_or_nan(nptr, endptr); 148 149 return result; 150 151} 152 153#else 154 155/* 156 Use system strtod; since strtod is locale aware, we may 157 have to first fix the decimal separator. 158 159 Note that unlike _Py_dg_strtod, the system strtod may not always give 160 correctly rounded results. 161*/ 162 163static double 164_PyOS_ascii_strtod(const char *nptr, char **endptr) 165{ 166 char *fail_pos; 167 double val; 168 struct lconv *locale_data; 169 const char *decimal_point; 170 size_t decimal_point_len; 171 const char *p, *decimal_point_pos; 172 const char *end = NULL; /* Silence gcc */ 173 const char *digits_pos = NULL; 174 int negate = 0; 175 176 assert(nptr != NULL); 177 178 fail_pos = NULL; 179 180 locale_data = localeconv(); 181 decimal_point = locale_data->decimal_point; 182 decimal_point_len = strlen(decimal_point); 183 184 assert(decimal_point_len != 0); 185 186 decimal_point_pos = NULL; 187 188 /* Parse infinities and nans */ 189 val = _Py_parse_inf_or_nan(nptr, endptr); 190 if (*endptr != nptr) 191 return val; 192 193 /* Set errno to zero, so that we can distinguish zero results 194 and underflows */ 195 errno = 0; 196 197 /* We process the optional sign manually, then pass the remainder to 198 the system strtod. This ensures that the result of an underflow 199 has the correct sign. (bug #1725) */ 200 p = nptr; 201 /* Process leading sign, if present */ 202 if (*p == '-') { 203 negate = 1; 204 p++; 205 } 206 else if (*p == '+') { 207 p++; 208 } 209 210 /* Some platform strtods accept hex floats; Python shouldn't (at the 211 moment), so we check explicitly for strings starting with '0x'. */ 212 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X')) 213 goto invalid_string; 214 215 /* Check that what's left begins with a digit or decimal point */ 216 if (!Py_ISDIGIT(*p) && *p != '.') 217 goto invalid_string; 218 219 digits_pos = p; 220 if (decimal_point[0] != '.' || 221 decimal_point[1] != 0) 222 { 223 /* Look for a '.' in the input; if present, it'll need to be 224 swapped for the current locale's decimal point before we 225 call strtod. On the other hand, if we find the current 226 locale's decimal point then the input is invalid. */ 227 while (Py_ISDIGIT(*p)) 228 p++; 229 230 if (*p == '.') 231 { 232 decimal_point_pos = p++; 233 234 /* locate end of number */ 235 while (Py_ISDIGIT(*p)) 236 p++; 237 238 if (*p == 'e' || *p == 'E') 239 p++; 240 if (*p == '+' || *p == '-') 241 p++; 242 while (Py_ISDIGIT(*p)) 243 p++; 244 end = p; 245 } 246 else if (strncmp(p, decimal_point, decimal_point_len) == 0) 247 /* Python bug #1417699 */ 248 goto invalid_string; 249 /* For the other cases, we need not convert the decimal 250 point */ 251 } 252 253 if (decimal_point_pos) { 254 char *copy, *c; 255 /* Create a copy of the input, with the '.' converted to the 256 locale-specific decimal point */ 257 copy = (char *)PyMem_Malloc(end - digits_pos + 258 1 + decimal_point_len); 259 if (copy == NULL) { 260 *endptr = (char *)nptr; 261 errno = ENOMEM; 262 return val; 263 } 264 265 c = copy; 266 memcpy(c, digits_pos, decimal_point_pos - digits_pos); 267 c += decimal_point_pos - digits_pos; 268 memcpy(c, decimal_point, decimal_point_len); 269 c += decimal_point_len; 270 memcpy(c, decimal_point_pos + 1, 271 end - (decimal_point_pos + 1)); 272 c += end - (decimal_point_pos + 1); 273 *c = 0; 274 275 val = strtod(copy, &fail_pos); 276 277 if (fail_pos) 278 { 279 if (fail_pos > decimal_point_pos) 280 fail_pos = (char *)digits_pos + 281 (fail_pos - copy) - 282 (decimal_point_len - 1); 283 else 284 fail_pos = (char *)digits_pos + 285 (fail_pos - copy); 286 } 287 288 PyMem_Free(copy); 289 290 } 291 else { 292 val = strtod(digits_pos, &fail_pos); 293 } 294 295 if (fail_pos == digits_pos) 296 goto invalid_string; 297 298 if (negate && fail_pos != nptr) 299 val = -val; 300 *endptr = fail_pos; 301 302 return val; 303 304 invalid_string: 305 *endptr = (char*)nptr; 306 errno = EINVAL; 307 return -1.0; 308} 309 310#endif 311 312/* PyOS_string_to_double converts a null-terminated byte string s (interpreted 313 as a string of ASCII characters) to a float. The string should not have 314 leading or trailing whitespace. The conversion is independent of the 315 current locale. 316 317 If endptr is NULL, try to convert the whole string. Raise ValueError and 318 return -1.0 if the string is not a valid representation of a floating-point 319 number. 320 321 If endptr is non-NULL, try to convert as much of the string as possible. 322 If no initial segment of the string is the valid representation of a 323 floating-point number then *endptr is set to point to the beginning of the 324 string, -1.0 is returned and again ValueError is raised. 325 326 On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine), 327 if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python 328 exception is raised. Otherwise, overflow_exception should point to 329 a Python exception, this exception will be raised, -1.0 will be returned, 330 and *endptr will point just past the end of the converted value. 331 332 If any other failure occurs (for example lack of memory), -1.0 is returned 333 and the appropriate Python exception will have been set. 334*/ 335 336double 337PyOS_string_to_double(const char *s, 338 char **endptr, 339 PyObject *overflow_exception) 340{ 341 double x, result=-1.0; 342 char *fail_pos; 343 344 errno = 0; 345 x = _PyOS_ascii_strtod(s, &fail_pos); 346 347 if (errno == ENOMEM) { 348 PyErr_NoMemory(); 349 fail_pos = (char *)s; 350 } 351 else if (!endptr && (fail_pos == s || *fail_pos != '\0')) 352 PyErr_Format(PyExc_ValueError, 353 "could not convert string to float: " 354 "'%.200s'", s); 355 else if (fail_pos == s) 356 PyErr_Format(PyExc_ValueError, 357 "could not convert string to float: " 358 "'%.200s'", s); 359 else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception) 360 PyErr_Format(overflow_exception, 361 "value too large to convert to float: " 362 "'%.200s'", s); 363 else 364 result = x; 365 366 if (endptr != NULL) 367 *endptr = fail_pos; 368 return result; 369} 370 371/* Remove underscores that follow the underscore placement rule from 372 the string and then call the `innerfunc` function on the result. 373 It should return a new object or NULL on exception. 374 375 `what` is used for the error message emitted when underscores are detected 376 that don't follow the rule. `arg` is an opaque pointer passed to the inner 377 function. 378 379 This is used to implement underscore-agnostic conversion for floats 380 and complex numbers. 381*/ 382PyObject * 383_Py_string_to_number_with_underscores( 384 const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg, 385 PyObject *(*innerfunc)(const char *, Py_ssize_t, void *)) 386{ 387 char prev; 388 const char *p, *last; 389 char *dup, *end; 390 PyObject *result; 391 392 assert(s[orig_len] == '\0'); 393 394 if (strchr(s, '_') == NULL) { 395 return innerfunc(s, orig_len, arg); 396 } 397 398 dup = PyMem_Malloc(orig_len + 1); 399 if (dup == NULL) { 400 return PyErr_NoMemory(); 401 } 402 end = dup; 403 prev = '\0'; 404 last = s + orig_len; 405 for (p = s; *p; p++) { 406 if (*p == '_') { 407 /* Underscores are only allowed after digits. */ 408 if (!(prev >= '0' && prev <= '9')) { 409 goto error; 410 } 411 } 412 else { 413 *end++ = *p; 414 /* Underscores are only allowed before digits. */ 415 if (prev == '_' && !(*p >= '0' && *p <= '9')) { 416 goto error; 417 } 418 } 419 prev = *p; 420 } 421 /* Underscores are not allowed at the end. */ 422 if (prev == '_') { 423 goto error; 424 } 425 /* No embedded NULs allowed. */ 426 if (p != last) { 427 goto error; 428 } 429 *end = '\0'; 430 result = innerfunc(dup, end - dup, arg); 431 PyMem_Free(dup); 432 return result; 433 434 error: 435 PyMem_Free(dup); 436 PyErr_Format(PyExc_ValueError, 437 "could not convert string to %s: " 438 "%R", what, obj); 439 return NULL; 440} 441 442#if _PY_SHORT_FLOAT_REPR == 0 443 444/* Given a string that may have a decimal point in the current 445 locale, change it back to a dot. Since the string cannot get 446 longer, no need for a maximum buffer size parameter. */ 447Py_LOCAL_INLINE(void) 448change_decimal_from_locale_to_dot(char* buffer) 449{ 450 struct lconv *locale_data = localeconv(); 451 const char *decimal_point = locale_data->decimal_point; 452 453 if (decimal_point[0] != '.' || decimal_point[1] != 0) { 454 size_t decimal_point_len = strlen(decimal_point); 455 456 if (*buffer == '+' || *buffer == '-') 457 buffer++; 458 while (Py_ISDIGIT(*buffer)) 459 buffer++; 460 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) { 461 *buffer = '.'; 462 buffer++; 463 if (decimal_point_len > 1) { 464 /* buffer needs to get smaller */ 465 size_t rest_len = strlen(buffer + 466 (decimal_point_len - 1)); 467 memmove(buffer, 468 buffer + (decimal_point_len - 1), 469 rest_len); 470 buffer[rest_len] = 0; 471 } 472 } 473 } 474} 475 476 477/* From the C99 standard, section 7.19.6: 478The exponent always contains at least two digits, and only as many more digits 479as necessary to represent the exponent. 480*/ 481#define MIN_EXPONENT_DIGITS 2 482 483/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS 484 in length. */ 485Py_LOCAL_INLINE(void) 486ensure_minimum_exponent_length(char* buffer, size_t buf_size) 487{ 488 char *p = strpbrk(buffer, "eE"); 489 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) { 490 char *start = p + 2; 491 int exponent_digit_cnt = 0; 492 int leading_zero_cnt = 0; 493 int in_leading_zeros = 1; 494 int significant_digit_cnt; 495 496 /* Skip over the exponent and the sign. */ 497 p += 2; 498 499 /* Find the end of the exponent, keeping track of leading 500 zeros. */ 501 while (*p && Py_ISDIGIT(*p)) { 502 if (in_leading_zeros && *p == '0') 503 ++leading_zero_cnt; 504 if (*p != '0') 505 in_leading_zeros = 0; 506 ++p; 507 ++exponent_digit_cnt; 508 } 509 510 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt; 511 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) { 512 /* If there are 2 exactly digits, we're done, 513 regardless of what they contain */ 514 } 515 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) { 516 int extra_zeros_cnt; 517 518 /* There are more than 2 digits in the exponent. See 519 if we can delete some of the leading zeros */ 520 if (significant_digit_cnt < MIN_EXPONENT_DIGITS) 521 significant_digit_cnt = MIN_EXPONENT_DIGITS; 522 extra_zeros_cnt = exponent_digit_cnt - 523 significant_digit_cnt; 524 525 /* Delete extra_zeros_cnt worth of characters from the 526 front of the exponent */ 527 assert(extra_zeros_cnt >= 0); 528 529 /* Add one to significant_digit_cnt to copy the 530 trailing 0 byte, thus setting the length */ 531 memmove(start, 532 start + extra_zeros_cnt, 533 significant_digit_cnt + 1); 534 } 535 else { 536 /* If there are fewer than 2 digits, add zeros 537 until there are 2, if there's enough room */ 538 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt; 539 if (start + zeros + exponent_digit_cnt + 1 540 < buffer + buf_size) { 541 memmove(start + zeros, start, 542 exponent_digit_cnt + 1); 543 memset(start, '0', zeros); 544 } 545 } 546 } 547} 548 549/* Remove trailing zeros after the decimal point from a numeric string; also 550 remove the decimal point if all digits following it are zero. The numeric 551 string must end in '\0', and should not have any leading or trailing 552 whitespace. Assumes that the decimal point is '.'. */ 553Py_LOCAL_INLINE(void) 554remove_trailing_zeros(char *buffer) 555{ 556 char *old_fraction_end, *new_fraction_end, *end, *p; 557 558 p = buffer; 559 if (*p == '-' || *p == '+') 560 /* Skip leading sign, if present */ 561 ++p; 562 while (Py_ISDIGIT(*p)) 563 ++p; 564 565 /* if there's no decimal point there's nothing to do */ 566 if (*p++ != '.') 567 return; 568 569 /* scan any digits after the point */ 570 while (Py_ISDIGIT(*p)) 571 ++p; 572 old_fraction_end = p; 573 574 /* scan up to ending '\0' */ 575 while (*p != '\0') 576 p++; 577 /* +1 to make sure that we move the null byte as well */ 578 end = p+1; 579 580 /* scan back from fraction_end, looking for removable zeros */ 581 p = old_fraction_end; 582 while (*(p-1) == '0') 583 --p; 584 /* and remove point if we've got that far */ 585 if (*(p-1) == '.') 586 --p; 587 new_fraction_end = p; 588 589 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end); 590} 591 592/* Ensure that buffer has a decimal point in it. The decimal point will not 593 be in the current locale, it will always be '.'. Don't add a decimal point 594 if an exponent is present. Also, convert to exponential notation where 595 adding a '.0' would produce too many significant digits (see issue 5864). 596 597 Returns a pointer to the fixed buffer, or NULL on failure. 598*/ 599Py_LOCAL_INLINE(char *) 600ensure_decimal_point(char* buffer, size_t buf_size, int precision) 601{ 602 int digit_count, insert_count = 0, convert_to_exp = 0; 603 const char *chars_to_insert; 604 char *digits_start; 605 606 /* search for the first non-digit character */ 607 char *p = buffer; 608 if (*p == '-' || *p == '+') 609 /* Skip leading sign, if present. I think this could only 610 ever be '-', but it can't hurt to check for both. */ 611 ++p; 612 digits_start = p; 613 while (*p && Py_ISDIGIT(*p)) 614 ++p; 615 digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int); 616 617 if (*p == '.') { 618 if (Py_ISDIGIT(*(p+1))) { 619 /* Nothing to do, we already have a decimal 620 point and a digit after it */ 621 } 622 else { 623 /* We have a decimal point, but no following 624 digit. Insert a zero after the decimal. */ 625 /* can't ever get here via PyOS_double_to_string */ 626 assert(precision == -1); 627 ++p; 628 chars_to_insert = "0"; 629 insert_count = 1; 630 } 631 } 632 else if (!(*p == 'e' || *p == 'E')) { 633 /* Don't add ".0" if we have an exponent. */ 634 if (digit_count == precision) { 635 /* issue 5864: don't add a trailing .0 in the case 636 where the '%g'-formatted result already has as many 637 significant digits as were requested. Switch to 638 exponential notation instead. */ 639 convert_to_exp = 1; 640 /* no exponent, no point, and we shouldn't land here 641 for infs and nans, so we must be at the end of the 642 string. */ 643 assert(*p == '\0'); 644 } 645 else { 646 assert(precision == -1 || digit_count < precision); 647 chars_to_insert = ".0"; 648 insert_count = 2; 649 } 650 } 651 if (insert_count) { 652 size_t buf_len = strlen(buffer); 653 if (buf_len + insert_count + 1 >= buf_size) { 654 /* If there is not enough room in the buffer 655 for the additional text, just skip it. It's 656 not worth generating an error over. */ 657 } 658 else { 659 memmove(p + insert_count, p, 660 buffer + strlen(buffer) - p + 1); 661 memcpy(p, chars_to_insert, insert_count); 662 } 663 } 664 if (convert_to_exp) { 665 int written; 666 size_t buf_avail; 667 p = digits_start; 668 /* insert decimal point */ 669 assert(digit_count >= 1); 670 memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */ 671 p[1] = '.'; 672 p += digit_count+1; 673 assert(p <= buf_size+buffer); 674 buf_avail = buf_size+buffer-p; 675 if (buf_avail == 0) 676 return NULL; 677 /* Add exponent. It's okay to use lower case 'e': we only 678 arrive here as a result of using the empty format code or 679 repr/str builtins and those never want an upper case 'E' */ 680 written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1); 681 if (!(0 <= written && 682 written < Py_SAFE_DOWNCAST(buf_avail, size_t, int))) 683 /* output truncated, or something else bad happened */ 684 return NULL; 685 remove_trailing_zeros(buffer); 686 } 687 return buffer; 688} 689 690/* see FORMATBUFLEN in unicodeobject.c */ 691#define FLOAT_FORMATBUFLEN 120 692 693/** 694 * _PyOS_ascii_formatd: 695 * @buffer: A buffer to place the resulting string in 696 * @buf_size: The length of the buffer. 697 * @format: The printf()-style format to use for the 698 * code to use for converting. 699 * @d: The #gdouble to convert 700 * @precision: The precision to use when formatting. 701 * 702 * Converts a #gdouble to a string, using the '.' as 703 * decimal point. To format the number you pass in 704 * a printf()-style format string. Allowed conversion 705 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'. 706 * 707 * 'Z' is the same as 'g', except it always has a decimal and 708 * at least one digit after the decimal. 709 * 710 * Return value: The pointer to the buffer with the converted string. 711 * On failure returns NULL but does not set any Python exception. 712 **/ 713static char * 714_PyOS_ascii_formatd(char *buffer, 715 size_t buf_size, 716 const char *format, 717 double d, 718 int precision) 719{ 720 char format_char; 721 size_t format_len = strlen(format); 722 723 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but 724 also with at least one character past the decimal. */ 725 char tmp_format[FLOAT_FORMATBUFLEN]; 726 727 /* The last character in the format string must be the format char */ 728 format_char = format[format_len - 1]; 729 730 if (format[0] != '%') 731 return NULL; 732 733 /* I'm not sure why this test is here. It's ensuring that the format 734 string after the first character doesn't have a single quote, a 735 lowercase l, or a percent. This is the reverse of the commented-out 736 test about 10 lines ago. */ 737 if (strpbrk(format + 1, "'l%")) 738 return NULL; 739 740 /* Also curious about this function is that it accepts format strings 741 like "%xg", which are invalid for floats. In general, the 742 interface to this function is not very good, but changing it is 743 difficult because it's a public API. */ 744 745 if (!(format_char == 'e' || format_char == 'E' || 746 format_char == 'f' || format_char == 'F' || 747 format_char == 'g' || format_char == 'G' || 748 format_char == 'Z')) 749 return NULL; 750 751 /* Map 'Z' format_char to 'g', by copying the format string and 752 replacing the final char with a 'g' */ 753 if (format_char == 'Z') { 754 if (format_len + 1 >= sizeof(tmp_format)) { 755 /* The format won't fit in our copy. Error out. In 756 practice, this will never happen and will be 757 detected by returning NULL */ 758 return NULL; 759 } 760 strcpy(tmp_format, format); 761 tmp_format[format_len - 1] = 'g'; 762 format = tmp_format; 763 } 764 765 766 /* Have PyOS_snprintf do the hard work */ 767 PyOS_snprintf(buffer, buf_size, format, d); 768 769 /* Do various fixups on the return string */ 770 771 /* Get the current locale, and find the decimal point string. 772 Convert that string back to a dot. */ 773 change_decimal_from_locale_to_dot(buffer); 774 775 /* If an exponent exists, ensure that the exponent is at least 776 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough 777 for the extra zeros. Also, if there are more than 778 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get 779 back to MIN_EXPONENT_DIGITS */ 780 ensure_minimum_exponent_length(buffer, buf_size); 781 782 /* If format_char is 'Z', make sure we have at least one character 783 after the decimal point (and make sure we have a decimal point); 784 also switch to exponential notation in some edge cases where the 785 extra character would produce more significant digits that we 786 really want. */ 787 if (format_char == 'Z') 788 buffer = ensure_decimal_point(buffer, buf_size, precision); 789 790 return buffer; 791} 792 793/* The fallback code to use if _Py_dg_dtoa is not available. */ 794 795char * PyOS_double_to_string(double val, 796 char format_code, 797 int precision, 798 int flags, 799 int *type) 800{ 801 char format[32]; 802 Py_ssize_t bufsize; 803 char *buf; 804 int t, exp; 805 int upper = 0; 806 807 /* Validate format_code, and map upper and lower case */ 808 switch (format_code) { 809 case 'e': /* exponent */ 810 case 'f': /* fixed */ 811 case 'g': /* general */ 812 break; 813 case 'E': 814 upper = 1; 815 format_code = 'e'; 816 break; 817 case 'F': 818 upper = 1; 819 format_code = 'f'; 820 break; 821 case 'G': 822 upper = 1; 823 format_code = 'g'; 824 break; 825 case 'r': /* repr format */ 826 /* Supplied precision is unused, must be 0. */ 827 if (precision != 0) { 828 PyErr_BadInternalCall(); 829 return NULL; 830 } 831 /* The repr() precision (17 significant decimal digits) is the 832 minimal number that is guaranteed to have enough precision 833 so that if the number is read back in the exact same binary 834 value is recreated. This is true for IEEE floating point 835 by design, and also happens to work for all other modern 836 hardware. */ 837 precision = 17; 838 format_code = 'g'; 839 break; 840 default: 841 PyErr_BadInternalCall(); 842 return NULL; 843 } 844 845 /* Here's a quick-and-dirty calculation to figure out how big a buffer 846 we need. In general, for a finite float we need: 847 848 1 byte for each digit of the decimal significand, and 849 850 1 for a possible sign 851 1 for a possible decimal point 852 2 for a possible [eE][+-] 853 1 for each digit of the exponent; if we allow 19 digits 854 total then we're safe up to exponents of 2**63. 855 1 for the trailing nul byte 856 857 This gives a total of 24 + the number of digits in the significand, 858 and the number of digits in the significand is: 859 860 for 'g' format: at most precision, except possibly 861 when precision == 0, when it's 1. 862 for 'e' format: precision+1 863 for 'f' format: precision digits after the point, at least 1 864 before. To figure out how many digits appear before the point 865 we have to examine the size of the number. If fabs(val) < 1.0 866 then there will be only one digit before the point. If 867 fabs(val) >= 1.0, then there are at most 868 869 1+floor(log10(ceiling(fabs(val)))) 870 871 digits before the point (where the 'ceiling' allows for the 872 possibility that the rounding rounds the integer part of val 873 up). A safe upper bound for the above quantity is 874 1+floor(exp/3), where exp is the unique integer such that 0.5 875 <= fabs(val)/2**exp < 1.0. This exp can be obtained from 876 frexp. 877 878 So we allow room for precision+1 digits for all formats, plus an 879 extra floor(exp/3) digits for 'f' format. 880 881 */ 882 883 if (Py_IS_NAN(val) || Py_IS_INFINITY(val)) 884 /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */ 885 bufsize = 5; 886 else { 887 bufsize = 25 + precision; 888 if (format_code == 'f' && fabs(val) >= 1.0) { 889 frexp(val, &exp); 890 bufsize += exp/3; 891 } 892 } 893 894 buf = PyMem_Malloc(bufsize); 895 if (buf == NULL) { 896 PyErr_NoMemory(); 897 return NULL; 898 } 899 900 /* Handle nan and inf. */ 901 if (Py_IS_NAN(val)) { 902 strcpy(buf, "nan"); 903 t = Py_DTST_NAN; 904 } else if (Py_IS_INFINITY(val)) { 905 if (copysign(1., val) == 1.) 906 strcpy(buf, "inf"); 907 else 908 strcpy(buf, "-inf"); 909 t = Py_DTST_INFINITE; 910 } else { 911 t = Py_DTST_FINITE; 912 if (flags & Py_DTSF_ADD_DOT_0) 913 format_code = 'Z'; 914 915 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c", 916 (flags & Py_DTSF_ALT ? "#" : ""), precision, 917 format_code); 918 _PyOS_ascii_formatd(buf, bufsize, format, val, precision); 919 920 if (flags & Py_DTSF_NO_NEG_0 && buf[0] == '-') { 921 char *buf2 = buf + 1; 922 while (*buf2 == '0' || *buf2 == '.') { 923 ++buf2; 924 } 925 if (*buf2 == 0 || *buf2 == 'e') { 926 size_t len = buf2 - buf + strlen(buf2); 927 assert(buf[len] == 0); 928 memmove(buf, buf+1, len); 929 } 930 } 931 } 932 933 /* Add sign when requested. It's convenient (esp. when formatting 934 complex numbers) to include a sign even for inf and nan. */ 935 if (flags & Py_DTSF_SIGN && buf[0] != '-') { 936 size_t len = strlen(buf); 937 /* the bufsize calculations above should ensure that we've got 938 space to add a sign */ 939 assert((size_t)bufsize >= len+2); 940 memmove(buf+1, buf, len+1); 941 buf[0] = '+'; 942 } 943 if (upper) { 944 /* Convert to upper case. */ 945 char *p1; 946 for (p1 = buf; *p1; p1++) 947 *p1 = Py_TOUPPER(*p1); 948 } 949 950 if (type) 951 *type = t; 952 return buf; 953} 954 955#else // _PY_SHORT_FLOAT_REPR == 1 956 957/* _Py_dg_dtoa is available. */ 958 959/* I'm using a lookup table here so that I don't have to invent a non-locale 960 specific way to convert to uppercase */ 961#define OFS_INF 0 962#define OFS_NAN 1 963#define OFS_E 2 964 965/* The lengths of these are known to the code below, so don't change them */ 966static const char * const lc_float_strings[] = { 967 "inf", 968 "nan", 969 "e", 970}; 971static const char * const uc_float_strings[] = { 972 "INF", 973 "NAN", 974 "E", 975}; 976 977 978/* Convert a double d to a string, and return a PyMem_Malloc'd block of 979 memory contain the resulting string. 980 981 Arguments: 982 d is the double to be converted 983 format_code is one of 'e', 'f', 'g', 'r'. 'e', 'f' and 'g' 984 correspond to '%e', '%f' and '%g'; 'r' corresponds to repr. 985 mode is one of '0', '2' or '3', and is completely determined by 986 format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0. 987 precision is the desired precision 988 always_add_sign is nonzero if a '+' sign should be included for positive 989 numbers 990 add_dot_0_if_integer is nonzero if integers in non-exponential form 991 should have ".0" added. Only applies to format codes 'r' and 'g'. 992 use_alt_formatting is nonzero if alternative formatting should be 993 used. Only applies to format codes 'e', 'f' and 'g'. For code 'g', 994 at most one of use_alt_formatting and add_dot_0_if_integer should 995 be nonzero. 996 type, if non-NULL, will be set to one of these constants to identify 997 the type of the 'd' argument: 998 Py_DTST_FINITE 999 Py_DTST_INFINITE 1000 Py_DTST_NAN 1001 1002 Returns a PyMem_Malloc'd block of memory containing the resulting string, 1003 or NULL on error. If NULL is returned, the Python error has been set. 1004 */ 1005 1006static char * 1007format_float_short(double d, char format_code, 1008 int mode, int precision, 1009 int always_add_sign, int add_dot_0_if_integer, 1010 int use_alt_formatting, int no_negative_zero, 1011 const char * const *float_strings, int *type) 1012{ 1013 char *buf = NULL; 1014 char *p = NULL; 1015 Py_ssize_t bufsize = 0; 1016 char *digits, *digits_end; 1017 int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0; 1018 Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end; 1019 _Py_SET_53BIT_PRECISION_HEADER; 1020 1021 /* _Py_dg_dtoa returns a digit string (no decimal point or exponent). 1022 Must be matched by a call to _Py_dg_freedtoa. */ 1023 _Py_SET_53BIT_PRECISION_START; 1024 digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign, 1025 &digits_end); 1026 _Py_SET_53BIT_PRECISION_END; 1027 1028 decpt = (Py_ssize_t)decpt_as_int; 1029 if (digits == NULL) { 1030 /* The only failure mode is no memory. */ 1031 PyErr_NoMemory(); 1032 goto exit; 1033 } 1034 assert(digits_end != NULL && digits_end >= digits); 1035 digits_len = digits_end - digits; 1036 1037 if (no_negative_zero && sign == 1 && 1038 (digits_len == 0 || (digits_len == 1 && digits[0] == '0'))) { 1039 sign = 0; 1040 } 1041 1042 if (digits_len && !Py_ISDIGIT(digits[0])) { 1043 /* Infinities and nans here; adapt Gay's output, 1044 so convert Infinity to inf and NaN to nan, and 1045 ignore sign of nan. Then return. */ 1046 1047 /* ignore the actual sign of a nan */ 1048 if (digits[0] == 'n' || digits[0] == 'N') 1049 sign = 0; 1050 1051 /* We only need 5 bytes to hold the result "+inf\0" . */ 1052 bufsize = 5; /* Used later in an assert. */ 1053 buf = (char *)PyMem_Malloc(bufsize); 1054 if (buf == NULL) { 1055 PyErr_NoMemory(); 1056 goto exit; 1057 } 1058 p = buf; 1059 1060 if (sign == 1) { 1061 *p++ = '-'; 1062 } 1063 else if (always_add_sign) { 1064 *p++ = '+'; 1065 } 1066 if (digits[0] == 'i' || digits[0] == 'I') { 1067 strncpy(p, float_strings[OFS_INF], 3); 1068 p += 3; 1069 1070 if (type) 1071 *type = Py_DTST_INFINITE; 1072 } 1073 else if (digits[0] == 'n' || digits[0] == 'N') { 1074 strncpy(p, float_strings[OFS_NAN], 3); 1075 p += 3; 1076 1077 if (type) 1078 *type = Py_DTST_NAN; 1079 } 1080 else { 1081 /* shouldn't get here: Gay's code should always return 1082 something starting with a digit, an 'I', or 'N' */ 1083 Py_UNREACHABLE(); 1084 } 1085 goto exit; 1086 } 1087 1088 /* The result must be finite (not inf or nan). */ 1089 if (type) 1090 *type = Py_DTST_FINITE; 1091 1092 1093 /* We got digits back, format them. We may need to pad 'digits' 1094 either on the left or right (or both) with extra zeros, so in 1095 general the resulting string has the form 1096 1097 [<sign>]<zeros><digits><zeros>[<exponent>] 1098 1099 where either of the <zeros> pieces could be empty, and there's a 1100 decimal point that could appear either in <digits> or in the 1101 leading or trailing <zeros>. 1102 1103 Imagine an infinite 'virtual' string vdigits, consisting of the 1104 string 'digits' (starting at index 0) padded on both the left and 1105 right with infinite strings of zeros. We want to output a slice 1106 1107 vdigits[vdigits_start : vdigits_end] 1108 1109 of this virtual string. Thus if vdigits_start < 0 then we'll end 1110 up producing some leading zeros; if vdigits_end > digits_len there 1111 will be trailing zeros in the output. The next section of code 1112 determines whether to use an exponent or not, figures out the 1113 position 'decpt' of the decimal point, and computes 'vdigits_start' 1114 and 'vdigits_end'. */ 1115 vdigits_end = digits_len; 1116 switch (format_code) { 1117 case 'e': 1118 use_exp = 1; 1119 vdigits_end = precision; 1120 break; 1121 case 'f': 1122 vdigits_end = decpt + precision; 1123 break; 1124 case 'g': 1125 if (decpt <= -4 || decpt > 1126 (add_dot_0_if_integer ? precision-1 : precision)) 1127 use_exp = 1; 1128 if (use_alt_formatting) 1129 vdigits_end = precision; 1130 break; 1131 case 'r': 1132 /* convert to exponential format at 1e16. We used to convert 1133 at 1e17, but that gives odd-looking results for some values 1134 when a 16-digit 'shortest' repr is padded with bogus zeros. 1135 For example, repr(2e16+8) would give 20000000000000010.0; 1136 the true value is 20000000000000008.0. */ 1137 if (decpt <= -4 || decpt > 16) 1138 use_exp = 1; 1139 break; 1140 default: 1141 PyErr_BadInternalCall(); 1142 goto exit; 1143 } 1144 1145 /* if using an exponent, reset decimal point position to 1 and adjust 1146 exponent accordingly.*/ 1147 if (use_exp) { 1148 exp = (int)decpt - 1; 1149 decpt = 1; 1150 } 1151 /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start < 1152 decpt < vdigits_end if add_dot_0_if_integer and no exponent */ 1153 vdigits_start = decpt <= 0 ? decpt-1 : 0; 1154 if (!use_exp && add_dot_0_if_integer) 1155 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1; 1156 else 1157 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt; 1158 1159 /* double check inequalities */ 1160 assert(vdigits_start <= 0 && 1161 0 <= digits_len && 1162 digits_len <= vdigits_end); 1163 /* decimal point should be in (vdigits_start, vdigits_end] */ 1164 assert(vdigits_start < decpt && decpt <= vdigits_end); 1165 1166 /* Compute an upper bound how much memory we need. This might be a few 1167 chars too long, but no big deal. */ 1168 bufsize = 1169 /* sign, decimal point and trailing 0 byte */ 1170 3 + 1171 1172 /* total digit count (including zero padding on both sides) */ 1173 (vdigits_end - vdigits_start) + 1174 1175 /* exponent "e+100", max 3 numerical digits */ 1176 (use_exp ? 5 : 0); 1177 1178 /* Now allocate the memory and initialize p to point to the start of 1179 it. */ 1180 buf = (char *)PyMem_Malloc(bufsize); 1181 if (buf == NULL) { 1182 PyErr_NoMemory(); 1183 goto exit; 1184 } 1185 p = buf; 1186 1187 /* Add a negative sign if negative, and a plus sign if non-negative 1188 and always_add_sign is true. */ 1189 if (sign == 1) 1190 *p++ = '-'; 1191 else if (always_add_sign) 1192 *p++ = '+'; 1193 1194 /* note that exactly one of the three 'if' conditions is true, 1195 so we include exactly one decimal point */ 1196 /* Zero padding on left of digit string */ 1197 if (decpt <= 0) { 1198 memset(p, '0', decpt-vdigits_start); 1199 p += decpt - vdigits_start; 1200 *p++ = '.'; 1201 memset(p, '0', 0-decpt); 1202 p += 0-decpt; 1203 } 1204 else { 1205 memset(p, '0', 0-vdigits_start); 1206 p += 0 - vdigits_start; 1207 } 1208 1209 /* Digits, with included decimal point */ 1210 if (0 < decpt && decpt <= digits_len) { 1211 strncpy(p, digits, decpt-0); 1212 p += decpt-0; 1213 *p++ = '.'; 1214 strncpy(p, digits+decpt, digits_len-decpt); 1215 p += digits_len-decpt; 1216 } 1217 else { 1218 strncpy(p, digits, digits_len); 1219 p += digits_len; 1220 } 1221 1222 /* And zeros on the right */ 1223 if (digits_len < decpt) { 1224 memset(p, '0', decpt-digits_len); 1225 p += decpt-digits_len; 1226 *p++ = '.'; 1227 memset(p, '0', vdigits_end-decpt); 1228 p += vdigits_end-decpt; 1229 } 1230 else { 1231 memset(p, '0', vdigits_end-digits_len); 1232 p += vdigits_end-digits_len; 1233 } 1234 1235 /* Delete a trailing decimal pt unless using alternative formatting. */ 1236 if (p[-1] == '.' && !use_alt_formatting) 1237 p--; 1238 1239 /* Now that we've done zero padding, add an exponent if needed. */ 1240 if (use_exp) { 1241 *p++ = float_strings[OFS_E][0]; 1242 exp_len = sprintf(p, "%+.02d", exp); 1243 p += exp_len; 1244 } 1245 exit: 1246 if (buf) { 1247 *p = '\0'; 1248 /* It's too late if this fails, as we've already stepped on 1249 memory that isn't ours. But it's an okay debugging test. */ 1250 assert(p-buf < bufsize); 1251 } 1252 if (digits) 1253 _Py_dg_freedtoa(digits); 1254 1255 return buf; 1256} 1257 1258 1259char * PyOS_double_to_string(double val, 1260 char format_code, 1261 int precision, 1262 int flags, 1263 int *type) 1264{ 1265 const char * const *float_strings = lc_float_strings; 1266 int mode; 1267 1268 /* Validate format_code, and map upper and lower case. Compute the 1269 mode and make any adjustments as needed. */ 1270 switch (format_code) { 1271 /* exponent */ 1272 case 'E': 1273 float_strings = uc_float_strings; 1274 format_code = 'e'; 1275 /* Fall through. */ 1276 case 'e': 1277 mode = 2; 1278 precision++; 1279 break; 1280 1281 /* fixed */ 1282 case 'F': 1283 float_strings = uc_float_strings; 1284 format_code = 'f'; 1285 /* Fall through. */ 1286 case 'f': 1287 mode = 3; 1288 break; 1289 1290 /* general */ 1291 case 'G': 1292 float_strings = uc_float_strings; 1293 format_code = 'g'; 1294 /* Fall through. */ 1295 case 'g': 1296 mode = 2; 1297 /* precision 0 makes no sense for 'g' format; interpret as 1 */ 1298 if (precision == 0) 1299 precision = 1; 1300 break; 1301 1302 /* repr format */ 1303 case 'r': 1304 mode = 0; 1305 /* Supplied precision is unused, must be 0. */ 1306 if (precision != 0) { 1307 PyErr_BadInternalCall(); 1308 return NULL; 1309 } 1310 break; 1311 1312 default: 1313 PyErr_BadInternalCall(); 1314 return NULL; 1315 } 1316 1317 return format_float_short(val, format_code, mode, precision, 1318 flags & Py_DTSF_SIGN, 1319 flags & Py_DTSF_ADD_DOT_0, 1320 flags & Py_DTSF_ALT, 1321 flags & Py_DTSF_NO_NEG_0, 1322 float_strings, type); 1323} 1324#endif // _PY_SHORT_FLOAT_REPR == 1 1325