xref: /third_party/python/Python/pystrtod.c (revision 7db96d56)
1/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include "pycore_dtoa.h"          // _Py_dg_strtod()
5#include "pycore_pymath.h"        // _PY_SHORT_FLOAT_REPR
6#include <locale.h>
7
8/* Case-insensitive string match used for nan and inf detection; t should be
9   lower-case.  Returns 1 for a successful match, 0 otherwise. */
10
11static int
12case_insensitive_match(const char *s, const char *t)
13{
14    while(*t && Py_TOLOWER(*s) == *t) {
15        s++;
16        t++;
17    }
18    return *t ? 0 : 1;
19}
20
21/* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
22   "infinity", with an optional leading sign of "+" or "-".  On success,
23   return the NaN or Infinity as a double and set *endptr to point just beyond
24   the successfully parsed portion of the string.  On failure, return -1.0 and
25   set *endptr to point to the start of the string. */
26
27#if _PY_SHORT_FLOAT_REPR == 1
28
29double
30_Py_parse_inf_or_nan(const char *p, char **endptr)
31{
32    double retval;
33    const char *s;
34    int negate = 0;
35
36    s = p;
37    if (*s == '-') {
38        negate = 1;
39        s++;
40    }
41    else if (*s == '+') {
42        s++;
43    }
44    if (case_insensitive_match(s, "inf")) {
45        s += 3;
46        if (case_insensitive_match(s, "inity"))
47            s += 5;
48        retval = _Py_dg_infinity(negate);
49    }
50    else if (case_insensitive_match(s, "nan")) {
51        s += 3;
52        retval = _Py_dg_stdnan(negate);
53    }
54    else {
55        s = p;
56        retval = -1.0;
57    }
58    *endptr = (char *)s;
59    return retval;
60}
61
62#else
63
64double
65_Py_parse_inf_or_nan(const char *p, char **endptr)
66{
67    double retval;
68    const char *s;
69    int negate = 0;
70
71    s = p;
72    if (*s == '-') {
73        negate = 1;
74        s++;
75    }
76    else if (*s == '+') {
77        s++;
78    }
79    if (case_insensitive_match(s, "inf")) {
80        s += 3;
81        if (case_insensitive_match(s, "inity"))
82            s += 5;
83        retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
84    }
85    else if (case_insensitive_match(s, "nan")) {
86        s += 3;
87        retval = negate ? -Py_NAN : Py_NAN;
88    }
89    else {
90        s = p;
91        retval = -1.0;
92    }
93    *endptr = (char *)s;
94    return retval;
95}
96
97#endif
98
99/**
100 * _PyOS_ascii_strtod:
101 * @nptr:    the string to convert to a numeric value.
102 * @endptr:  if non-%NULL, it returns the character after
103 *           the last character used in the conversion.
104 *
105 * Converts a string to a #gdouble value.
106 * This function behaves like the standard strtod() function
107 * does in the C locale. It does this without actually
108 * changing the current locale, since that would not be
109 * thread-safe.
110 *
111 * This function is typically used when reading configuration
112 * files or other non-user input that should be locale independent.
113 * To handle input from the user you should normally use the
114 * locale-sensitive system strtod() function.
115 *
116 * If the correct value would cause overflow, plus or minus %HUGE_VAL
117 * is returned (according to the sign of the value), and %ERANGE is
118 * stored in %errno. If the correct value would cause underflow,
119 * zero is returned and %ERANGE is stored in %errno.
120 * If memory allocation fails, %ENOMEM is stored in %errno.
121 *
122 * This function resets %errno before calling strtod() so that
123 * you can reliably detect overflow and underflow.
124 *
125 * Return value: the #gdouble value.
126 **/
127
128#if _PY_SHORT_FLOAT_REPR == 1
129
130static double
131_PyOS_ascii_strtod(const char *nptr, char **endptr)
132{
133    double result;
134    _Py_SET_53BIT_PRECISION_HEADER;
135
136    assert(nptr != NULL);
137    /* Set errno to zero, so that we can distinguish zero results
138       and underflows */
139    errno = 0;
140
141    _Py_SET_53BIT_PRECISION_START;
142    result = _Py_dg_strtod(nptr, endptr);
143    _Py_SET_53BIT_PRECISION_END;
144
145    if (*endptr == nptr)
146        /* string might represent an inf or nan */
147        result = _Py_parse_inf_or_nan(nptr, endptr);
148
149    return result;
150
151}
152
153#else
154
155/*
156   Use system strtod;  since strtod is locale aware, we may
157   have to first fix the decimal separator.
158
159   Note that unlike _Py_dg_strtod, the system strtod may not always give
160   correctly rounded results.
161*/
162
163static double
164_PyOS_ascii_strtod(const char *nptr, char **endptr)
165{
166    char *fail_pos;
167    double val;
168    struct lconv *locale_data;
169    const char *decimal_point;
170    size_t decimal_point_len;
171    const char *p, *decimal_point_pos;
172    const char *end = NULL; /* Silence gcc */
173    const char *digits_pos = NULL;
174    int negate = 0;
175
176    assert(nptr != NULL);
177
178    fail_pos = NULL;
179
180    locale_data = localeconv();
181    decimal_point = locale_data->decimal_point;
182    decimal_point_len = strlen(decimal_point);
183
184    assert(decimal_point_len != 0);
185
186    decimal_point_pos = NULL;
187
188    /* Parse infinities and nans */
189    val = _Py_parse_inf_or_nan(nptr, endptr);
190    if (*endptr != nptr)
191        return val;
192
193    /* Set errno to zero, so that we can distinguish zero results
194       and underflows */
195    errno = 0;
196
197    /* We process the optional sign manually, then pass the remainder to
198       the system strtod.  This ensures that the result of an underflow
199       has the correct sign. (bug #1725)  */
200    p = nptr;
201    /* Process leading sign, if present */
202    if (*p == '-') {
203        negate = 1;
204        p++;
205    }
206    else if (*p == '+') {
207        p++;
208    }
209
210    /* Some platform strtods accept hex floats; Python shouldn't (at the
211       moment), so we check explicitly for strings starting with '0x'. */
212    if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
213        goto invalid_string;
214
215    /* Check that what's left begins with a digit or decimal point */
216    if (!Py_ISDIGIT(*p) && *p != '.')
217        goto invalid_string;
218
219    digits_pos = p;
220    if (decimal_point[0] != '.' ||
221        decimal_point[1] != 0)
222    {
223        /* Look for a '.' in the input; if present, it'll need to be
224           swapped for the current locale's decimal point before we
225           call strtod.  On the other hand, if we find the current
226           locale's decimal point then the input is invalid. */
227        while (Py_ISDIGIT(*p))
228            p++;
229
230        if (*p == '.')
231        {
232            decimal_point_pos = p++;
233
234            /* locate end of number */
235            while (Py_ISDIGIT(*p))
236                p++;
237
238            if (*p == 'e' || *p == 'E')
239                p++;
240            if (*p == '+' || *p == '-')
241                p++;
242            while (Py_ISDIGIT(*p))
243                p++;
244            end = p;
245        }
246        else if (strncmp(p, decimal_point, decimal_point_len) == 0)
247            /* Python bug #1417699 */
248            goto invalid_string;
249        /* For the other cases, we need not convert the decimal
250           point */
251    }
252
253    if (decimal_point_pos) {
254        char *copy, *c;
255        /* Create a copy of the input, with the '.' converted to the
256           locale-specific decimal point */
257        copy = (char *)PyMem_Malloc(end - digits_pos +
258                                    1 + decimal_point_len);
259        if (copy == NULL) {
260            *endptr = (char *)nptr;
261            errno = ENOMEM;
262            return val;
263        }
264
265        c = copy;
266        memcpy(c, digits_pos, decimal_point_pos - digits_pos);
267        c += decimal_point_pos - digits_pos;
268        memcpy(c, decimal_point, decimal_point_len);
269        c += decimal_point_len;
270        memcpy(c, decimal_point_pos + 1,
271               end - (decimal_point_pos + 1));
272        c += end - (decimal_point_pos + 1);
273        *c = 0;
274
275        val = strtod(copy, &fail_pos);
276
277        if (fail_pos)
278        {
279            if (fail_pos > decimal_point_pos)
280                fail_pos = (char *)digits_pos +
281                    (fail_pos - copy) -
282                    (decimal_point_len - 1);
283            else
284                fail_pos = (char *)digits_pos +
285                    (fail_pos - copy);
286        }
287
288        PyMem_Free(copy);
289
290    }
291    else {
292        val = strtod(digits_pos, &fail_pos);
293    }
294
295    if (fail_pos == digits_pos)
296        goto invalid_string;
297
298    if (negate && fail_pos != nptr)
299        val = -val;
300    *endptr = fail_pos;
301
302    return val;
303
304  invalid_string:
305    *endptr = (char*)nptr;
306    errno = EINVAL;
307    return -1.0;
308}
309
310#endif
311
312/* PyOS_string_to_double converts a null-terminated byte string s (interpreted
313   as a string of ASCII characters) to a float.  The string should not have
314   leading or trailing whitespace.  The conversion is independent of the
315   current locale.
316
317   If endptr is NULL, try to convert the whole string.  Raise ValueError and
318   return -1.0 if the string is not a valid representation of a floating-point
319   number.
320
321   If endptr is non-NULL, try to convert as much of the string as possible.
322   If no initial segment of the string is the valid representation of a
323   floating-point number then *endptr is set to point to the beginning of the
324   string, -1.0 is returned and again ValueError is raised.
325
326   On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
327   if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
328   exception is raised.  Otherwise, overflow_exception should point to
329   a Python exception, this exception will be raised, -1.0 will be returned,
330   and *endptr will point just past the end of the converted value.
331
332   If any other failure occurs (for example lack of memory), -1.0 is returned
333   and the appropriate Python exception will have been set.
334*/
335
336double
337PyOS_string_to_double(const char *s,
338                      char **endptr,
339                      PyObject *overflow_exception)
340{
341    double x, result=-1.0;
342    char *fail_pos;
343
344    errno = 0;
345    x = _PyOS_ascii_strtod(s, &fail_pos);
346
347    if (errno == ENOMEM) {
348        PyErr_NoMemory();
349        fail_pos = (char *)s;
350    }
351    else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
352        PyErr_Format(PyExc_ValueError,
353                      "could not convert string to float: "
354                      "'%.200s'", s);
355    else if (fail_pos == s)
356        PyErr_Format(PyExc_ValueError,
357                      "could not convert string to float: "
358                      "'%.200s'", s);
359    else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
360        PyErr_Format(overflow_exception,
361                      "value too large to convert to float: "
362                      "'%.200s'", s);
363    else
364        result = x;
365
366    if (endptr != NULL)
367        *endptr = fail_pos;
368    return result;
369}
370
371/* Remove underscores that follow the underscore placement rule from
372   the string and then call the `innerfunc` function on the result.
373   It should return a new object or NULL on exception.
374
375   `what` is used for the error message emitted when underscores are detected
376   that don't follow the rule. `arg` is an opaque pointer passed to the inner
377   function.
378
379   This is used to implement underscore-agnostic conversion for floats
380   and complex numbers.
381*/
382PyObject *
383_Py_string_to_number_with_underscores(
384    const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
385    PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
386{
387    char prev;
388    const char *p, *last;
389    char *dup, *end;
390    PyObject *result;
391
392    assert(s[orig_len] == '\0');
393
394    if (strchr(s, '_') == NULL) {
395        return innerfunc(s, orig_len, arg);
396    }
397
398    dup = PyMem_Malloc(orig_len + 1);
399    if (dup == NULL) {
400        return PyErr_NoMemory();
401    }
402    end = dup;
403    prev = '\0';
404    last = s + orig_len;
405    for (p = s; *p; p++) {
406        if (*p == '_') {
407            /* Underscores are only allowed after digits. */
408            if (!(prev >= '0' && prev <= '9')) {
409                goto error;
410            }
411        }
412        else {
413            *end++ = *p;
414            /* Underscores are only allowed before digits. */
415            if (prev == '_' && !(*p >= '0' && *p <= '9')) {
416                goto error;
417            }
418        }
419        prev = *p;
420    }
421    /* Underscores are not allowed at the end. */
422    if (prev == '_') {
423        goto error;
424    }
425    /* No embedded NULs allowed. */
426    if (p != last) {
427        goto error;
428    }
429    *end = '\0';
430    result = innerfunc(dup, end - dup, arg);
431    PyMem_Free(dup);
432    return result;
433
434  error:
435    PyMem_Free(dup);
436    PyErr_Format(PyExc_ValueError,
437                 "could not convert string to %s: "
438                 "%R", what, obj);
439    return NULL;
440}
441
442#if _PY_SHORT_FLOAT_REPR == 0
443
444/* Given a string that may have a decimal point in the current
445   locale, change it back to a dot.  Since the string cannot get
446   longer, no need for a maximum buffer size parameter. */
447Py_LOCAL_INLINE(void)
448change_decimal_from_locale_to_dot(char* buffer)
449{
450    struct lconv *locale_data = localeconv();
451    const char *decimal_point = locale_data->decimal_point;
452
453    if (decimal_point[0] != '.' || decimal_point[1] != 0) {
454        size_t decimal_point_len = strlen(decimal_point);
455
456        if (*buffer == '+' || *buffer == '-')
457            buffer++;
458        while (Py_ISDIGIT(*buffer))
459            buffer++;
460        if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
461            *buffer = '.';
462            buffer++;
463            if (decimal_point_len > 1) {
464                /* buffer needs to get smaller */
465                size_t rest_len = strlen(buffer +
466                                     (decimal_point_len - 1));
467                memmove(buffer,
468                    buffer + (decimal_point_len - 1),
469                    rest_len);
470                buffer[rest_len] = 0;
471            }
472        }
473    }
474}
475
476
477/* From the C99 standard, section 7.19.6:
478The exponent always contains at least two digits, and only as many more digits
479as necessary to represent the exponent.
480*/
481#define MIN_EXPONENT_DIGITS 2
482
483/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
484   in length. */
485Py_LOCAL_INLINE(void)
486ensure_minimum_exponent_length(char* buffer, size_t buf_size)
487{
488    char *p = strpbrk(buffer, "eE");
489    if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
490        char *start = p + 2;
491        int exponent_digit_cnt = 0;
492        int leading_zero_cnt = 0;
493        int in_leading_zeros = 1;
494        int significant_digit_cnt;
495
496        /* Skip over the exponent and the sign. */
497        p += 2;
498
499        /* Find the end of the exponent, keeping track of leading
500           zeros. */
501        while (*p && Py_ISDIGIT(*p)) {
502            if (in_leading_zeros && *p == '0')
503                ++leading_zero_cnt;
504            if (*p != '0')
505                in_leading_zeros = 0;
506            ++p;
507            ++exponent_digit_cnt;
508        }
509
510        significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
511        if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
512            /* If there are 2 exactly digits, we're done,
513               regardless of what they contain */
514        }
515        else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
516            int extra_zeros_cnt;
517
518            /* There are more than 2 digits in the exponent.  See
519               if we can delete some of the leading zeros */
520            if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
521                significant_digit_cnt = MIN_EXPONENT_DIGITS;
522            extra_zeros_cnt = exponent_digit_cnt -
523                significant_digit_cnt;
524
525            /* Delete extra_zeros_cnt worth of characters from the
526               front of the exponent */
527            assert(extra_zeros_cnt >= 0);
528
529            /* Add one to significant_digit_cnt to copy the
530               trailing 0 byte, thus setting the length */
531            memmove(start,
532                start + extra_zeros_cnt,
533                significant_digit_cnt + 1);
534        }
535        else {
536            /* If there are fewer than 2 digits, add zeros
537               until there are 2, if there's enough room */
538            int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
539            if (start + zeros + exponent_digit_cnt + 1
540                  < buffer + buf_size) {
541                memmove(start + zeros, start,
542                    exponent_digit_cnt + 1);
543                memset(start, '0', zeros);
544            }
545        }
546    }
547}
548
549/* Remove trailing zeros after the decimal point from a numeric string; also
550   remove the decimal point if all digits following it are zero.  The numeric
551   string must end in '\0', and should not have any leading or trailing
552   whitespace.  Assumes that the decimal point is '.'. */
553Py_LOCAL_INLINE(void)
554remove_trailing_zeros(char *buffer)
555{
556    char *old_fraction_end, *new_fraction_end, *end, *p;
557
558    p = buffer;
559    if (*p == '-' || *p == '+')
560        /* Skip leading sign, if present */
561        ++p;
562    while (Py_ISDIGIT(*p))
563        ++p;
564
565    /* if there's no decimal point there's nothing to do */
566    if (*p++ != '.')
567        return;
568
569    /* scan any digits after the point */
570    while (Py_ISDIGIT(*p))
571        ++p;
572    old_fraction_end = p;
573
574    /* scan up to ending '\0' */
575    while (*p != '\0')
576        p++;
577    /* +1 to make sure that we move the null byte as well */
578    end = p+1;
579
580    /* scan back from fraction_end, looking for removable zeros */
581    p = old_fraction_end;
582    while (*(p-1) == '0')
583        --p;
584    /* and remove point if we've got that far */
585    if (*(p-1) == '.')
586        --p;
587    new_fraction_end = p;
588
589    memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
590}
591
592/* Ensure that buffer has a decimal point in it.  The decimal point will not
593   be in the current locale, it will always be '.'. Don't add a decimal point
594   if an exponent is present.  Also, convert to exponential notation where
595   adding a '.0' would produce too many significant digits (see issue 5864).
596
597   Returns a pointer to the fixed buffer, or NULL on failure.
598*/
599Py_LOCAL_INLINE(char *)
600ensure_decimal_point(char* buffer, size_t buf_size, int precision)
601{
602    int digit_count, insert_count = 0, convert_to_exp = 0;
603    const char *chars_to_insert;
604    char *digits_start;
605
606    /* search for the first non-digit character */
607    char *p = buffer;
608    if (*p == '-' || *p == '+')
609        /* Skip leading sign, if present.  I think this could only
610           ever be '-', but it can't hurt to check for both. */
611        ++p;
612    digits_start = p;
613    while (*p && Py_ISDIGIT(*p))
614        ++p;
615    digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
616
617    if (*p == '.') {
618        if (Py_ISDIGIT(*(p+1))) {
619            /* Nothing to do, we already have a decimal
620               point and a digit after it */
621        }
622        else {
623            /* We have a decimal point, but no following
624               digit.  Insert a zero after the decimal. */
625            /* can't ever get here via PyOS_double_to_string */
626            assert(precision == -1);
627            ++p;
628            chars_to_insert = "0";
629            insert_count = 1;
630        }
631    }
632    else if (!(*p == 'e' || *p == 'E')) {
633        /* Don't add ".0" if we have an exponent. */
634        if (digit_count == precision) {
635            /* issue 5864: don't add a trailing .0 in the case
636               where the '%g'-formatted result already has as many
637               significant digits as were requested.  Switch to
638               exponential notation instead. */
639            convert_to_exp = 1;
640            /* no exponent, no point, and we shouldn't land here
641               for infs and nans, so we must be at the end of the
642               string. */
643            assert(*p == '\0');
644        }
645        else {
646            assert(precision == -1 || digit_count < precision);
647            chars_to_insert = ".0";
648            insert_count = 2;
649        }
650    }
651    if (insert_count) {
652        size_t buf_len = strlen(buffer);
653        if (buf_len + insert_count + 1 >= buf_size) {
654            /* If there is not enough room in the buffer
655               for the additional text, just skip it.  It's
656               not worth generating an error over. */
657        }
658        else {
659            memmove(p + insert_count, p,
660                buffer + strlen(buffer) - p + 1);
661            memcpy(p, chars_to_insert, insert_count);
662        }
663    }
664    if (convert_to_exp) {
665        int written;
666        size_t buf_avail;
667        p = digits_start;
668        /* insert decimal point */
669        assert(digit_count >= 1);
670        memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
671        p[1] = '.';
672        p += digit_count+1;
673        assert(p <= buf_size+buffer);
674        buf_avail = buf_size+buffer-p;
675        if (buf_avail == 0)
676            return NULL;
677        /* Add exponent.  It's okay to use lower case 'e': we only
678           arrive here as a result of using the empty format code or
679           repr/str builtins and those never want an upper case 'E' */
680        written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
681        if (!(0 <= written &&
682              written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
683            /* output truncated, or something else bad happened */
684            return NULL;
685        remove_trailing_zeros(buffer);
686    }
687    return buffer;
688}
689
690/* see FORMATBUFLEN in unicodeobject.c */
691#define FLOAT_FORMATBUFLEN 120
692
693/**
694 * _PyOS_ascii_formatd:
695 * @buffer: A buffer to place the resulting string in
696 * @buf_size: The length of the buffer.
697 * @format: The printf()-style format to use for the
698 *          code to use for converting.
699 * @d: The #gdouble to convert
700 * @precision: The precision to use when formatting.
701 *
702 * Converts a #gdouble to a string, using the '.' as
703 * decimal point. To format the number you pass in
704 * a printf()-style format string. Allowed conversion
705 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
706 *
707 * 'Z' is the same as 'g', except it always has a decimal and
708 *     at least one digit after the decimal.
709 *
710 * Return value: The pointer to the buffer with the converted string.
711 * On failure returns NULL but does not set any Python exception.
712 **/
713static char *
714_PyOS_ascii_formatd(char       *buffer,
715                   size_t      buf_size,
716                   const char *format,
717                   double      d,
718                   int         precision)
719{
720    char format_char;
721    size_t format_len = strlen(format);
722
723    /* Issue 2264: code 'Z' requires copying the format.  'Z' is 'g', but
724       also with at least one character past the decimal. */
725    char tmp_format[FLOAT_FORMATBUFLEN];
726
727    /* The last character in the format string must be the format char */
728    format_char = format[format_len - 1];
729
730    if (format[0] != '%')
731        return NULL;
732
733    /* I'm not sure why this test is here.  It's ensuring that the format
734       string after the first character doesn't have a single quote, a
735       lowercase l, or a percent. This is the reverse of the commented-out
736       test about 10 lines ago. */
737    if (strpbrk(format + 1, "'l%"))
738        return NULL;
739
740    /* Also curious about this function is that it accepts format strings
741       like "%xg", which are invalid for floats.  In general, the
742       interface to this function is not very good, but changing it is
743       difficult because it's a public API. */
744
745    if (!(format_char == 'e' || format_char == 'E' ||
746          format_char == 'f' || format_char == 'F' ||
747          format_char == 'g' || format_char == 'G' ||
748          format_char == 'Z'))
749        return NULL;
750
751    /* Map 'Z' format_char to 'g', by copying the format string and
752       replacing the final char with a 'g' */
753    if (format_char == 'Z') {
754        if (format_len + 1 >= sizeof(tmp_format)) {
755            /* The format won't fit in our copy.  Error out.  In
756               practice, this will never happen and will be
757               detected by returning NULL */
758            return NULL;
759        }
760        strcpy(tmp_format, format);
761        tmp_format[format_len - 1] = 'g';
762        format = tmp_format;
763    }
764
765
766    /* Have PyOS_snprintf do the hard work */
767    PyOS_snprintf(buffer, buf_size, format, d);
768
769    /* Do various fixups on the return string */
770
771    /* Get the current locale, and find the decimal point string.
772       Convert that string back to a dot. */
773    change_decimal_from_locale_to_dot(buffer);
774
775    /* If an exponent exists, ensure that the exponent is at least
776       MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
777       for the extra zeros.  Also, if there are more than
778       MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
779       back to MIN_EXPONENT_DIGITS */
780    ensure_minimum_exponent_length(buffer, buf_size);
781
782    /* If format_char is 'Z', make sure we have at least one character
783       after the decimal point (and make sure we have a decimal point);
784       also switch to exponential notation in some edge cases where the
785       extra character would produce more significant digits that we
786       really want. */
787    if (format_char == 'Z')
788        buffer = ensure_decimal_point(buffer, buf_size, precision);
789
790    return buffer;
791}
792
793/* The fallback code to use if _Py_dg_dtoa is not available. */
794
795char * PyOS_double_to_string(double val,
796                                         char format_code,
797                                         int precision,
798                                         int flags,
799                                         int *type)
800{
801    char format[32];
802    Py_ssize_t bufsize;
803    char *buf;
804    int t, exp;
805    int upper = 0;
806
807    /* Validate format_code, and map upper and lower case */
808    switch (format_code) {
809    case 'e':          /* exponent */
810    case 'f':          /* fixed */
811    case 'g':          /* general */
812        break;
813    case 'E':
814        upper = 1;
815        format_code = 'e';
816        break;
817    case 'F':
818        upper = 1;
819        format_code = 'f';
820        break;
821    case 'G':
822        upper = 1;
823        format_code = 'g';
824        break;
825    case 'r':          /* repr format */
826        /* Supplied precision is unused, must be 0. */
827        if (precision != 0) {
828            PyErr_BadInternalCall();
829            return NULL;
830        }
831        /* The repr() precision (17 significant decimal digits) is the
832           minimal number that is guaranteed to have enough precision
833           so that if the number is read back in the exact same binary
834           value is recreated.  This is true for IEEE floating point
835           by design, and also happens to work for all other modern
836           hardware. */
837        precision = 17;
838        format_code = 'g';
839        break;
840    default:
841        PyErr_BadInternalCall();
842        return NULL;
843    }
844
845    /* Here's a quick-and-dirty calculation to figure out how big a buffer
846       we need.  In general, for a finite float we need:
847
848         1 byte for each digit of the decimal significand, and
849
850         1 for a possible sign
851         1 for a possible decimal point
852         2 for a possible [eE][+-]
853         1 for each digit of the exponent;  if we allow 19 digits
854           total then we're safe up to exponents of 2**63.
855         1 for the trailing nul byte
856
857       This gives a total of 24 + the number of digits in the significand,
858       and the number of digits in the significand is:
859
860         for 'g' format: at most precision, except possibly
861           when precision == 0, when it's 1.
862         for 'e' format: precision+1
863         for 'f' format: precision digits after the point, at least 1
864           before.  To figure out how many digits appear before the point
865           we have to examine the size of the number.  If fabs(val) < 1.0
866           then there will be only one digit before the point.  If
867           fabs(val) >= 1.0, then there are at most
868
869         1+floor(log10(ceiling(fabs(val))))
870
871           digits before the point (where the 'ceiling' allows for the
872           possibility that the rounding rounds the integer part of val
873           up).  A safe upper bound for the above quantity is
874           1+floor(exp/3), where exp is the unique integer such that 0.5
875           <= fabs(val)/2**exp < 1.0.  This exp can be obtained from
876           frexp.
877
878       So we allow room for precision+1 digits for all formats, plus an
879       extra floor(exp/3) digits for 'f' format.
880
881    */
882
883    if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
884        /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
885        bufsize = 5;
886    else {
887        bufsize = 25 + precision;
888        if (format_code == 'f' && fabs(val) >= 1.0) {
889            frexp(val, &exp);
890            bufsize += exp/3;
891        }
892    }
893
894    buf = PyMem_Malloc(bufsize);
895    if (buf == NULL) {
896        PyErr_NoMemory();
897        return NULL;
898    }
899
900    /* Handle nan and inf. */
901    if (Py_IS_NAN(val)) {
902        strcpy(buf, "nan");
903        t = Py_DTST_NAN;
904    } else if (Py_IS_INFINITY(val)) {
905        if (copysign(1., val) == 1.)
906            strcpy(buf, "inf");
907        else
908            strcpy(buf, "-inf");
909        t = Py_DTST_INFINITE;
910    } else {
911        t = Py_DTST_FINITE;
912        if (flags & Py_DTSF_ADD_DOT_0)
913            format_code = 'Z';
914
915        PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
916                      (flags & Py_DTSF_ALT ? "#" : ""), precision,
917                      format_code);
918        _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
919
920        if (flags & Py_DTSF_NO_NEG_0 && buf[0] == '-') {
921            char *buf2 = buf + 1;
922            while (*buf2 == '0' || *buf2 == '.') {
923                ++buf2;
924            }
925            if (*buf2 == 0 || *buf2 == 'e') {
926                size_t len = buf2 - buf + strlen(buf2);
927                assert(buf[len] == 0);
928                memmove(buf, buf+1, len);
929            }
930        }
931    }
932
933    /* Add sign when requested.  It's convenient (esp. when formatting
934     complex numbers) to include a sign even for inf and nan. */
935    if (flags & Py_DTSF_SIGN && buf[0] != '-') {
936        size_t len = strlen(buf);
937        /* the bufsize calculations above should ensure that we've got
938           space to add a sign */
939        assert((size_t)bufsize >= len+2);
940        memmove(buf+1, buf, len+1);
941        buf[0] = '+';
942    }
943    if (upper) {
944        /* Convert to upper case. */
945        char *p1;
946        for (p1 = buf; *p1; p1++)
947            *p1 = Py_TOUPPER(*p1);
948    }
949
950    if (type)
951        *type = t;
952    return buf;
953}
954
955#else  // _PY_SHORT_FLOAT_REPR == 1
956
957/* _Py_dg_dtoa is available. */
958
959/* I'm using a lookup table here so that I don't have to invent a non-locale
960   specific way to convert to uppercase */
961#define OFS_INF 0
962#define OFS_NAN 1
963#define OFS_E 2
964
965/* The lengths of these are known to the code below, so don't change them */
966static const char * const lc_float_strings[] = {
967    "inf",
968    "nan",
969    "e",
970};
971static const char * const uc_float_strings[] = {
972    "INF",
973    "NAN",
974    "E",
975};
976
977
978/* Convert a double d to a string, and return a PyMem_Malloc'd block of
979   memory contain the resulting string.
980
981   Arguments:
982     d is the double to be converted
983     format_code is one of 'e', 'f', 'g', 'r'.  'e', 'f' and 'g'
984       correspond to '%e', '%f' and '%g';  'r' corresponds to repr.
985     mode is one of '0', '2' or '3', and is completely determined by
986       format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
987     precision is the desired precision
988     always_add_sign is nonzero if a '+' sign should be included for positive
989       numbers
990     add_dot_0_if_integer is nonzero if integers in non-exponential form
991       should have ".0" added.  Only applies to format codes 'r' and 'g'.
992     use_alt_formatting is nonzero if alternative formatting should be
993       used.  Only applies to format codes 'e', 'f' and 'g'.  For code 'g',
994       at most one of use_alt_formatting and add_dot_0_if_integer should
995       be nonzero.
996     type, if non-NULL, will be set to one of these constants to identify
997       the type of the 'd' argument:
998     Py_DTST_FINITE
999     Py_DTST_INFINITE
1000     Py_DTST_NAN
1001
1002   Returns a PyMem_Malloc'd block of memory containing the resulting string,
1003    or NULL on error. If NULL is returned, the Python error has been set.
1004 */
1005
1006static char *
1007format_float_short(double d, char format_code,
1008                   int mode, int precision,
1009                   int always_add_sign, int add_dot_0_if_integer,
1010                   int use_alt_formatting, int no_negative_zero,
1011                   const char * const *float_strings, int *type)
1012{
1013    char *buf = NULL;
1014    char *p = NULL;
1015    Py_ssize_t bufsize = 0;
1016    char *digits, *digits_end;
1017    int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
1018    Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
1019    _Py_SET_53BIT_PRECISION_HEADER;
1020
1021    /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
1022       Must be matched by a call to _Py_dg_freedtoa. */
1023    _Py_SET_53BIT_PRECISION_START;
1024    digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
1025                         &digits_end);
1026    _Py_SET_53BIT_PRECISION_END;
1027
1028    decpt = (Py_ssize_t)decpt_as_int;
1029    if (digits == NULL) {
1030        /* The only failure mode is no memory. */
1031        PyErr_NoMemory();
1032        goto exit;
1033    }
1034    assert(digits_end != NULL && digits_end >= digits);
1035    digits_len = digits_end - digits;
1036
1037    if (no_negative_zero && sign == 1 &&
1038            (digits_len == 0 || (digits_len == 1 && digits[0] == '0'))) {
1039        sign = 0;
1040    }
1041
1042    if (digits_len && !Py_ISDIGIT(digits[0])) {
1043        /* Infinities and nans here; adapt Gay's output,
1044           so convert Infinity to inf and NaN to nan, and
1045           ignore sign of nan. Then return. */
1046
1047        /* ignore the actual sign of a nan */
1048        if (digits[0] == 'n' || digits[0] == 'N')
1049            sign = 0;
1050
1051        /* We only need 5 bytes to hold the result "+inf\0" . */
1052        bufsize = 5; /* Used later in an assert. */
1053        buf = (char *)PyMem_Malloc(bufsize);
1054        if (buf == NULL) {
1055            PyErr_NoMemory();
1056            goto exit;
1057        }
1058        p = buf;
1059
1060        if (sign == 1) {
1061            *p++ = '-';
1062        }
1063        else if (always_add_sign) {
1064            *p++ = '+';
1065        }
1066        if (digits[0] == 'i' || digits[0] == 'I') {
1067            strncpy(p, float_strings[OFS_INF], 3);
1068            p += 3;
1069
1070            if (type)
1071                *type = Py_DTST_INFINITE;
1072        }
1073        else if (digits[0] == 'n' || digits[0] == 'N') {
1074            strncpy(p, float_strings[OFS_NAN], 3);
1075            p += 3;
1076
1077            if (type)
1078                *type = Py_DTST_NAN;
1079        }
1080        else {
1081            /* shouldn't get here: Gay's code should always return
1082               something starting with a digit, an 'I',  or 'N' */
1083            Py_UNREACHABLE();
1084        }
1085        goto exit;
1086    }
1087
1088    /* The result must be finite (not inf or nan). */
1089    if (type)
1090        *type = Py_DTST_FINITE;
1091
1092
1093    /* We got digits back, format them.  We may need to pad 'digits'
1094       either on the left or right (or both) with extra zeros, so in
1095       general the resulting string has the form
1096
1097         [<sign>]<zeros><digits><zeros>[<exponent>]
1098
1099       where either of the <zeros> pieces could be empty, and there's a
1100       decimal point that could appear either in <digits> or in the
1101       leading or trailing <zeros>.
1102
1103       Imagine an infinite 'virtual' string vdigits, consisting of the
1104       string 'digits' (starting at index 0) padded on both the left and
1105       right with infinite strings of zeros.  We want to output a slice
1106
1107         vdigits[vdigits_start : vdigits_end]
1108
1109       of this virtual string.  Thus if vdigits_start < 0 then we'll end
1110       up producing some leading zeros; if vdigits_end > digits_len there
1111       will be trailing zeros in the output.  The next section of code
1112       determines whether to use an exponent or not, figures out the
1113       position 'decpt' of the decimal point, and computes 'vdigits_start'
1114       and 'vdigits_end'. */
1115    vdigits_end = digits_len;
1116    switch (format_code) {
1117    case 'e':
1118        use_exp = 1;
1119        vdigits_end = precision;
1120        break;
1121    case 'f':
1122        vdigits_end = decpt + precision;
1123        break;
1124    case 'g':
1125        if (decpt <= -4 || decpt >
1126            (add_dot_0_if_integer ? precision-1 : precision))
1127            use_exp = 1;
1128        if (use_alt_formatting)
1129            vdigits_end = precision;
1130        break;
1131    case 'r':
1132        /* convert to exponential format at 1e16.  We used to convert
1133           at 1e17, but that gives odd-looking results for some values
1134           when a 16-digit 'shortest' repr is padded with bogus zeros.
1135           For example, repr(2e16+8) would give 20000000000000010.0;
1136           the true value is 20000000000000008.0. */
1137        if (decpt <= -4 || decpt > 16)
1138            use_exp = 1;
1139        break;
1140    default:
1141        PyErr_BadInternalCall();
1142        goto exit;
1143    }
1144
1145    /* if using an exponent, reset decimal point position to 1 and adjust
1146       exponent accordingly.*/
1147    if (use_exp) {
1148        exp = (int)decpt - 1;
1149        decpt = 1;
1150    }
1151    /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1152       decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1153    vdigits_start = decpt <= 0 ? decpt-1 : 0;
1154    if (!use_exp && add_dot_0_if_integer)
1155        vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1156    else
1157        vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1158
1159    /* double check inequalities */
1160    assert(vdigits_start <= 0 &&
1161           0 <= digits_len &&
1162           digits_len <= vdigits_end);
1163    /* decimal point should be in (vdigits_start, vdigits_end] */
1164    assert(vdigits_start < decpt && decpt <= vdigits_end);
1165
1166    /* Compute an upper bound how much memory we need. This might be a few
1167       chars too long, but no big deal. */
1168    bufsize =
1169        /* sign, decimal point and trailing 0 byte */
1170        3 +
1171
1172        /* total digit count (including zero padding on both sides) */
1173        (vdigits_end - vdigits_start) +
1174
1175        /* exponent "e+100", max 3 numerical digits */
1176        (use_exp ? 5 : 0);
1177
1178    /* Now allocate the memory and initialize p to point to the start of
1179       it. */
1180    buf = (char *)PyMem_Malloc(bufsize);
1181    if (buf == NULL) {
1182        PyErr_NoMemory();
1183        goto exit;
1184    }
1185    p = buf;
1186
1187    /* Add a negative sign if negative, and a plus sign if non-negative
1188       and always_add_sign is true. */
1189    if (sign == 1)
1190        *p++ = '-';
1191    else if (always_add_sign)
1192        *p++ = '+';
1193
1194    /* note that exactly one of the three 'if' conditions is true,
1195       so we include exactly one decimal point */
1196    /* Zero padding on left of digit string */
1197    if (decpt <= 0) {
1198        memset(p, '0', decpt-vdigits_start);
1199        p += decpt - vdigits_start;
1200        *p++ = '.';
1201        memset(p, '0', 0-decpt);
1202        p += 0-decpt;
1203    }
1204    else {
1205        memset(p, '0', 0-vdigits_start);
1206        p += 0 - vdigits_start;
1207    }
1208
1209    /* Digits, with included decimal point */
1210    if (0 < decpt && decpt <= digits_len) {
1211        strncpy(p, digits, decpt-0);
1212        p += decpt-0;
1213        *p++ = '.';
1214        strncpy(p, digits+decpt, digits_len-decpt);
1215        p += digits_len-decpt;
1216    }
1217    else {
1218        strncpy(p, digits, digits_len);
1219        p += digits_len;
1220    }
1221
1222    /* And zeros on the right */
1223    if (digits_len < decpt) {
1224        memset(p, '0', decpt-digits_len);
1225        p += decpt-digits_len;
1226        *p++ = '.';
1227        memset(p, '0', vdigits_end-decpt);
1228        p += vdigits_end-decpt;
1229    }
1230    else {
1231        memset(p, '0', vdigits_end-digits_len);
1232        p += vdigits_end-digits_len;
1233    }
1234
1235    /* Delete a trailing decimal pt unless using alternative formatting. */
1236    if (p[-1] == '.' && !use_alt_formatting)
1237        p--;
1238
1239    /* Now that we've done zero padding, add an exponent if needed. */
1240    if (use_exp) {
1241        *p++ = float_strings[OFS_E][0];
1242        exp_len = sprintf(p, "%+.02d", exp);
1243        p += exp_len;
1244    }
1245  exit:
1246    if (buf) {
1247        *p = '\0';
1248        /* It's too late if this fails, as we've already stepped on
1249           memory that isn't ours. But it's an okay debugging test. */
1250        assert(p-buf < bufsize);
1251    }
1252    if (digits)
1253        _Py_dg_freedtoa(digits);
1254
1255    return buf;
1256}
1257
1258
1259char * PyOS_double_to_string(double val,
1260                                         char format_code,
1261                                         int precision,
1262                                         int flags,
1263                                         int *type)
1264{
1265    const char * const *float_strings = lc_float_strings;
1266    int mode;
1267
1268    /* Validate format_code, and map upper and lower case. Compute the
1269       mode and make any adjustments as needed. */
1270    switch (format_code) {
1271    /* exponent */
1272    case 'E':
1273        float_strings = uc_float_strings;
1274        format_code = 'e';
1275        /* Fall through. */
1276    case 'e':
1277        mode = 2;
1278        precision++;
1279        break;
1280
1281    /* fixed */
1282    case 'F':
1283        float_strings = uc_float_strings;
1284        format_code = 'f';
1285        /* Fall through. */
1286    case 'f':
1287        mode = 3;
1288        break;
1289
1290    /* general */
1291    case 'G':
1292        float_strings = uc_float_strings;
1293        format_code = 'g';
1294        /* Fall through. */
1295    case 'g':
1296        mode = 2;
1297        /* precision 0 makes no sense for 'g' format; interpret as 1 */
1298        if (precision == 0)
1299            precision = 1;
1300        break;
1301
1302    /* repr format */
1303    case 'r':
1304        mode = 0;
1305        /* Supplied precision is unused, must be 0. */
1306        if (precision != 0) {
1307            PyErr_BadInternalCall();
1308            return NULL;
1309        }
1310        break;
1311
1312    default:
1313        PyErr_BadInternalCall();
1314        return NULL;
1315    }
1316
1317    return format_float_short(val, format_code, mode, precision,
1318                              flags & Py_DTSF_SIGN,
1319                              flags & Py_DTSF_ADD_DOT_0,
1320                              flags & Py_DTSF_ALT,
1321                              flags & Py_DTSF_NO_NEG_0,
1322                              float_strings, type);
1323}
1324#endif  // _PY_SHORT_FLOAT_REPR == 1
1325