Trim leading zeros from a floating point exponent, per C99. See issue 1600. As far as I know, this only affects Windows. Add float type 'n' to PyOS_ascii_formatd (see PEP 3101 for 'n' description).

2025-11-26 13:22:51 +00:00 · 2008-02-20 23:34:22 +00:00 · 2008-02-20 23:34:22 +00:00 · 7ef40bf9c7
commit 7ef40bf9c7
parent e1b8e9c666
3 changed files with 165 additions and 34 deletions
--- a/Lib/test/test_types.py
+++ b/Lib/test/test_types.py
@ -89,6 +89,29 @@ class TypesTests(unittest.TestCase):
        if float(1) == 1.0 and float(-1) == -1.0 and float(0) == 0.0: pass
        else: self.fail('float() does not work properly')
    def test_float_to_string(self):
        def test(f, result):
            self.assertEqual(f.__format__('e'), result)
            self.assertEqual('%e' % f, result)
        # test all 2 digit exponents, both with __format__ and with
        #  '%' formatting
        for i in range(-99, 100):
            test(float('1.5e'+str(i)), '1.500000e{0:+03d}'.format(i))
        # test some 3 digit exponents
        self.assertEqual(1.5e100.__format__('e'), '1.500000e+100')
        self.assertEqual('%e' % 1.5e100, '1.500000e+100')
        self.assertEqual(1.5e101.__format__('e'), '1.500000e+101')
        self.assertEqual('%e' % 1.5e101, '1.500000e+101')
        self.assertEqual(1.5e-100.__format__('e'), '1.500000e-100')
        self.assertEqual('%e' % 1.5e-100, '1.500000e-100')
        self.assertEqual(1.5e-101.__format__('e'), '1.500000e-101')
        self.assertEqual('%e' % 1.5e-101, '1.500000e-101')
    def test_normal_integers(self):
        # Ensure the first 256 integers are shared
        a = 256
@ -486,16 +509,17 @@ class TypesTests(unittest.TestCase):
        test(-1.0, ' f', '-1.000000')
        test( 1.0, '+f', '+1.000000')
        test(-1.0, '+f', '-1.000000')
        test(1.1234e90, 'f', '1.1234e+90')
        test(1.1234e90, 'F', '1.1234e+90')
        test(1.1234e200, 'f', '1.1234e+200')
        test(1.1234e200, 'F', '1.1234e+200')
-        # temporarily removed.  see issue 1600
+        test( 1.0, 'e', '1.000000e+00')
- #       test( 1.0, 'e', '1.000000e+00')
+        test(-1.0, 'e', '-1.000000e+00')
- #       test(-1.0, 'e', '-1.000000e+00')
+        test( 1.0, 'E', '1.000000E+00')
- #       test( 1.0, 'E', '1.000000E+00')
+        test(-1.0, 'E', '-1.000000E+00')
- #       test(-1.0, 'E', '-1.000000E+00')
+        test(1.1234e20, 'e', '1.123400e+20')
- #       test(1.1234e20, 'e', '1.123400e+20')
+        test(1.1234e20, 'E', '1.123400E+20')
 #       test(1.1234e20, 'E', '1.123400E+20')
        # % formatting
        test(-1.0, '%', '-100.000000%')
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -12,6 +12,13 @@ What's New in Python 2.6 alpha 1?
 Core and builtins
 -----------------
 - Issue #1600: Modifed PyOS_ascii_formatd to use at most 2 digit
  exponents for exponents with absolute value < 100.  Follows C99
  standard.  This is a change on Windows, which would use 3 digits.
  Also, added 'n' to the formats that PyOS_ascii_formatd understands,
  so that any alterations it does to the resulting string will be
  available in stringlib/formatter.h (for float.__format__).
 - Implemented PEP 3101, Advanced String Formatting.  This adds a new
  builtin format(); a format() method for str and unicode; a
  __format__() method to object, str, unicode, int, long, float, and
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@ -186,6 +186,15 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 }
 /* From the C99 standard, section 7.19.6:
 The exponent always contains at least two digits, and only as many more digits
 as necessary to represent the exponent.
 */
 #define MIN_EXPONENT_DIGITS 2
 /* see FORMATBUFLEN in unicodeobject.c */
 #define FLOAT_FORMATBUFLEN 120
 /**
 * PyOS_ascii_formatd:
 * @buffer: A buffer to place the resulting string in
@ -197,8 +206,10 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 * Converts a #gdouble to a string, using the '.' as
 * decimal point. To format the number you pass in
 * a printf()-style format string. Allowed conversion
- * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'. 
+ * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
 * 
 * 'n' is the same as 'g', except it uses the current locale.
 *
 * Return value: The pointer to the buffer with the converted string.
 **/
 char *
@ -207,17 +218,23 @@ PyOS_ascii_formatd(char       *buffer,
 		   const char *format, 
 		   double      d)
 {
 	struct lconv *locale_data;
 	const char *decimal_point;
 	size_t decimal_point_len, rest_len;
 	char *p;
 	char format_char;
 	size_t format_len = strlen(format);
 	/* For type 'n', we need to make a copy of the format string, because
 	   we're going to modify 'n' -> 'g', and format is const char*, so we
 	   can't modify it directly.  FLOAT_FORMATBUFLEN should be longer than
 	   we ever need this to be.  There's an upcoming check to ensure it's
 	   big enough. */
 	char tmp_format[FLOAT_FORMATBUFLEN];
 /* 	g_return_val_if_fail (buffer != NULL, NULL); */
 /* 	g_return_val_if_fail (format[0] == '%', NULL); */
 /* 	g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL); */
-	format_char = format[strlen(format) - 1];
+	/* The last character in the format string must be the format char */
 	format_char = format[format_len - 1];
 /* 	g_return_val_if_fail (format_char == 'e' || format_char == 'E' || */
 /* 			      format_char == 'f' || format_char == 'F' || */
@ -227,43 +244,126 @@ PyOS_ascii_formatd(char       *buffer,
 	if (format[0] != '%')
 		return NULL;
 	/* I'm not sure why this test is here.  It's ensuring that the format
 	   string after the first character doesn't have a single quote, a
 	   lowercase l, or a percent. This is the reverse of the commented-out
 	   test about 10 lines ago. */
 	if (strpbrk(format + 1, "'l%"))
 		return NULL;
 	if (!(format_char == 'e' || format_char == 'E' || 
 	      format_char == 'f' || format_char == 'F' || 
-	      format_char == 'g' || format_char == 'G'))
+	      format_char == 'g' || format_char == 'G' ||
 	      format_char == 'n'))
 		return NULL;
 	/* Map 'n' format_char to 'g', by copying the format string and
 	   replacing the final 'n' with a 'g' */
 	if (format_char == 'n') {
 		if (format_len + 1 >= sizeof(tmp_format)) {
 			/* The format won't fit in our copy.  Error out.  In
 			   practice, this will never happen and will be detected
 			   by returning NULL */
 			return NULL;
 		}
 		strcpy(tmp_format, format);
 		tmp_format[format_len - 1] = 'g';
 		format = tmp_format;
 	}
 	/* Have PyOS_snprintf do the hard work */
 	PyOS_snprintf(buffer, buf_len, format, d);
-	locale_data = localeconv();
+	/* Get the current local, and find the decimal point character (or
-	decimal_point = locale_data->decimal_point;
+	   string?).  Convert that string back to a dot.  Do not do this if
-	decimal_point_len = strlen(decimal_point);
+	   using the 'n' (number) format code. */
 	if (format_char != 'n') {
 		struct lconv *locale_data = localeconv();
 		const char *decimal_point = locale_data->decimal_point;
 		size_t decimal_point_len = strlen(decimal_point);
 		size_t rest_len;
-	assert(decimal_point_len != 0);
+		assert(decimal_point_len != 0);
-	if (decimal_point[0] != '.' || 
+		if (decimal_point[0] != '.' || decimal_point[1] != 0) {
-	    decimal_point[1] != 0)
+			p = buffer;
 	{
 		p = buffer;
-		if (*p == '+' || *p == '-')
+			if (*p == '+' || *p == '-')
-			p++;
+				p++;
-		while (isdigit((unsigned char)*p))
+			while (isdigit(Py_CHARMASK(*p)))
-			p++;
+				p++;
-		if (strncmp(p, decimal_point, decimal_point_len) == 0)
+			if (strncmp(p, decimal_point, decimal_point_len) == 0) {
-		{
+				*p = '.';
-			*p = '.';
+				p++;
-			p++;
+				if (decimal_point_len > 1) {
-			if (decimal_point_len > 1) {
+					rest_len = strlen(p +
-				rest_len = strlen(p + (decimal_point_len - 1));
+						      (decimal_point_len - 1));
-				memmove(p, p + (decimal_point_len - 1), 
+					memmove(p, p + (decimal_point_len - 1),
-					rest_len);
+						rest_len);
-				p[rest_len] = 0;
+					p[rest_len] = 0;
 				}
 			}
 		}
 	}
 	/* If an exponent exists, ensure that the exponent is at least
 	   MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
 	   for the extra zeros.  Also, if there are more than
 	   MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
 	   back to MIN_EXPONENT_DIGITS */
 	p = strpbrk(buffer, "eE");
 	if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
 		char *start = p + 2;
 		int exponent_digit_cnt = 0;
 		int leading_zero_cnt = 0;
 		int in_leading_zeros = 1;
 		int significant_digit_cnt;
 		p += 2;
 		while (*p && isdigit(Py_CHARMASK(*p))) {
 			if (in_leading_zeros && *p == '0')
 				++leading_zero_cnt;
 			if (*p != '0')
 				in_leading_zeros = 0;
 			++p;
 			++exponent_digit_cnt;
 		}
 		significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
 		if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
 			/* If there are 2 exactly digits, we're done,
 			   regardless of what they contain */
 		}
 		else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
 			int extra_zeros_cnt;
 			/* There are more than 2 digits in the exponent.  See
 			   if we can delete some of the leading zeros */
 			if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
 				significant_digit_cnt = MIN_EXPONENT_DIGITS;
 			extra_zeros_cnt = exponent_digit_cnt - significant_digit_cnt;
 			/* Delete extra_zeros_cnt worth of characters from the
 			   front of the exponent */
 			assert(extra_zeros_cnt >= 0);
 			/* Add one to significant_digit_cnt to copy the
 			   trailing 0 byte, thus setting the length */
 			memmove(start,
 				start + extra_zeros_cnt,
 				significant_digit_cnt + 1);
 		}
 		else {
 			/* If there are fewer than 2 digits, add zeros
 			   until there are 2, if there's enough room */
 			int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
 			if (start + zeros + exponent_digit_cnt + 1
 			      < buffer + buf_len) {
 				memmove(start + zeros, start,
 					exponent_digit_cnt + 1);
 				memset(start, '0', zeros);
 			}
 		}
 	}