Refactor to remove duplicated nan/inf parsing code in

pystrtod.c, floatobject.c and dtoa.c.
2025-11-20 02:50:14 +00:00 · 2009-05-20 22:05:25 +00:00 · 2009-05-20 22:05:25 +00:00 · bd16edd305
commit bd16edd305
parent 4db6ff683d
5 changed files with 85 additions and 125 deletions
--- a/Include/pystrtod.h
+++ b/Include/pystrtod.h
@ -21,6 +21,8 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
                                         int flags,
                                         int *type);
 PyAPI_FUNC(double) _Py_parse_inf_or_nan(const char *p, char **endptr);
 /* PyOS_double_to_string's "flags" parameter can be set to 0 or more of: */
 #define Py_DTSF_SIGN      0x01 /* always add the sign */
--- a/Lib/test/test_float.py
+++ b/Lib/test/test_float.py
@ -532,6 +532,11 @@ class InfNanTest(unittest.TestCase):
        self.assertRaises(ValueError, float, "-INFI")
        self.assertRaises(ValueError, float, "infinitys")
        self.assertRaises(ValueError, float, "++Inf")
        self.assertRaises(ValueError, float, "-+inf")
        self.assertRaises(ValueError, float, "+-infinity")
        self.assertRaises(ValueError, float, "--Infinity")
    def test_inf_as_str(self):
        self.assertEqual(repr(1e300 * 1e300), "inf")
        self.assertEqual(repr(-1e300 * 1e300), "-inf")
@ -563,6 +568,11 @@ class InfNanTest(unittest.TestCase):
        self.assertRaises(ValueError, float, "+na")
        self.assertRaises(ValueError, float, "-na")
        self.assertRaises(ValueError, float, "++nan")
        self.assertRaises(ValueError, float, "-+NAN")
        self.assertRaises(ValueError, float, "+-NaN")
        self.assertRaises(ValueError, float, "--nAn")
    def test_nan_as_str(self):
        self.assertEqual(repr(1e300 * 1e300 * 0), "nan")
        self.assertEqual(repr(-1e300 * 1e300 * 0), "nan")
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@ -1157,20 +1157,6 @@ Return a hexadecimal representation of a floating-point number.\n\
 >>> 3.14159.hex()\n\
 '0x1.921f9f01b866ep+1'");
 /* Case-insensitive locale-independent string match used for nan and inf
   detection. t should be lower-case and null-terminated.  Return a nonzero
   result if the first strlen(t) characters of s match t and 0 otherwise. */
 static int
 case_insensitive_match(const char *s, const char *t)
 {
 	while(*t && Py_TOLOWER(*s) == *t) {
 		s++;
 		t++;
 	}
 	return *t ? 0 : 1;
 }
 /* Convert a hexadecimal string to a float. */
 static PyObject *
@ -1180,7 +1166,7 @@ float_fromhex(PyObject *cls, PyObject *arg)
 	double x;
 	long exp, top_exp, lsb, key_digit;
 	char *s, *coeff_start, *s_store, *coeff_end, *exp_start, *s_end;
-	int half_eps, digit, round_up, sign=1;
+	int half_eps, digit, round_up, negate=0;
 	Py_ssize_t length, ndigits, fdigits, i;
 	/*
@ -1237,33 +1223,24 @@ float_fromhex(PyObject *cls, PyObject *arg)
 	 * Parse the string *
 	 ********************/
-	/* leading whitespace and optional sign */
+	/* leading whitespace */
 	while (Py_ISSPACE(*s))
 		s++;
 	if (*s == '-') {
 		s++;
 		sign = -1;
 	}
 	else if (*s == '+')
 		s++;
 	/* infinities and nans */
-	if (*s == 'i' || *s == 'I') {
+	x = _Py_parse_inf_or_nan(s, &coeff_end);
-		if (!case_insensitive_match(s+1, "nf"))
+	if (coeff_end != s) {
-			goto parse_error;
+		s = coeff_end;
 		s += 3;
 		x = Py_HUGE_VAL;
 		if (case_insensitive_match(s, "inity"))
 			s += 5;
 		goto finished;
 	}
-	if (*s == 'n' || *s == 'N') {
+
-		if (!case_insensitive_match(s+1, "an"))
+	/* optional sign */
-			goto parse_error;
+	if (*s == '-') {
-		s += 3;
+		s++;
-		x = Py_NAN;
+		negate = 1;
 		goto finished;
 	}
 	else if (*s == '+')
 		s++;
 	/* [0x] */
 	s_store = s;
@ -1400,7 +1377,7 @@ float_fromhex(PyObject *cls, PyObject *arg)
 		s++;
 	if (s != s_end)
 		goto parse_error;
-	result_as_float = Py_BuildValue("(d)", sign * x);
+	result_as_float = Py_BuildValue("(d)", negate ? -x : x);
 	if (result_as_float == NULL)
 		return NULL;
 	result = PyObject_CallObject(cls, result_as_float);
--- a/Python/dtoa.c
+++ b/Python/dtoa.c
@ -264,15 +264,6 @@ extern int strtod_diglim;
 #define Big0 (Frac_mask1 | Exp_msk1*(DBL_MAX_EXP+Bias-1))
 #define Big1 0xffffffff
 #ifndef NAN_WORD0
 #define NAN_WORD0 0x7ff80000
 #endif
 #ifndef NAN_WORD1
 #define NAN_WORD1 0
 #endif
 /* struct BCinfo is used to pass information from _Py_dg_strtod to bigcomp */
 typedef struct BCinfo BCinfo;
@ -1026,25 +1017,6 @@ static const double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128,
 #define Scale_Bit 0x10
 #define n_bigtens 5
 /* case insensitive string match, for recognising 'inf[inity]' and
   'nan' strings. */
 static int
 match(const char **sp, char *t)
 {
    int c, d;
    const char *s = *sp;
    while((d = *t++)) {
        if ((c = *++s) >= 'A' && c <= 'Z')
            c += 'a' - 'A';
        if (c != d)
            return 0;
    }
    *sp = s + 1;
    return 1;
 }
 #define ULbits 32
 #define kshift 5
 #define kmask 31
@ -1459,28 +1431,6 @@ _Py_dg_strtod(const char *s00, char **se)
    }
    if (!nd) {
        if (!nz && !nz0) {
            /* Check for Nan and Infinity */
            if (!bc.dplen)
                switch(c) {
                case 'i':
                case 'I':
                    if (match(&s,"nf")) {
                        --s;
                        if (!match(&s,"inity"))
                            ++s;
                        word0(&rv) = 0x7ff00000;
                        word1(&rv) = 0;
                        goto ret;
                    }
                    break;
                case 'n':
                case 'N':
                    if (match(&s, "an")) {
                        word0(&rv) = NAN_WORD0;
                        word1(&rv) = NAN_WORD1;
                        goto ret;
                    }
                }
          ret0:
            s = s00;
            sign = 0;
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@ -3,6 +3,57 @@
 #include <Python.h>
 #include <locale.h>
 /* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
   "infinity", with an optional leading sign of "+" or "-".  On success,
   return the NaN or Infinity as a double and set *endptr to point just beyond
   the successfully parsed portion of the string.  On failure, return -1.0 and
   set *endptr to point to the start of the string. */
 static int
 case_insensitive_match(const char *s, const char *t)
 {
 	while(*t && Py_TOLOWER(*s) == *t) {
 		s++;
 		t++;
 	}
 	return *t ? 0 : 1;
 }
 double
 _Py_parse_inf_or_nan(const char *p, char **endptr)
 {
 	double retval;
 	const char *s;
 	int negate = 0;
 	s = p;
 	if (*s == '-') {
 		negate = 1;
 		s++;
 	}
 	else if (*s == '+') {
 		s++;
 	}
 	if (case_insensitive_match(s, "inf")) {
 		s += 3;
 		if (case_insensitive_match(s, "inity"))
 			s += 5;
 		retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
 	}
 #ifdef Py_NAN
 	else if (case_insensitive_match(s, "nan")) {
 		s += 3;
 		retval = negate ? -Py_NAN : Py_NAN;
 	}
 #endif
 	else {
 		s = p;
 		retval = -1.0;
 	}
 	*endptr = (char *)s;
 	return retval;
 }
 /**
 * PyOS_ascii_strtod:
 * @nptr:    the string to convert to a numeric value.
@ -49,6 +100,10 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
 	result = _Py_dg_strtod(nptr, endptr);
 	_Py_SET_53BIT_PRECISION_END;
 	if (*endptr == nptr)
 		/* string might represent and inf or nan */
 		result = _Py_parse_inf_or_nan(nptr, endptr);
 	return result;
 }
@ -63,19 +118,6 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
   correctly rounded results.
 */
 /* Case-insensitive string match used for nan and inf detection; t should be
   lower-case.  Returns 1 for a successful match, 0 otherwise. */
 static int
 case_insensitive_match(const char *s, const char *t)
 {
 	while(*t && Py_TOLOWER(*s) == *t) {
 		s++;
 		t++;
 	}
 	return *t ? 0 : 1;
 }
 double
 _PyOS_ascii_strtod(const char *nptr, char **endptr)
 {
@ -101,6 +143,11 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
 	decimal_point_pos = NULL;
 	/* Parse infinities and nans */
 	val = _Py_parse_inf_or_nan(nptr, endptr);
 	if (*endptr != nptr)
 		return val;
 	/* Set errno to zero, so that we can distinguish zero results
 	   and underflows */
 	errno = 0;
@ -118,31 +165,6 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
 		p++;
 	}
 	/* Parse infinities and nans */
 	if (*p == 'i' || *p == 'I') {
 		if (case_insensitive_match(p+1, "nf")) {
 			val = Py_HUGE_VAL;
 			if (case_insensitive_match(p+3, "inity"))
 				fail_pos = (char *)p+8;
 			else
 				fail_pos = (char *)p+3;
 			goto got_val;
 		}
 		else
 			goto invalid_string;
 	}
 #ifdef Py_NAN
 	if (*p == 'n' || *p == 'N') {
 		if (case_insensitive_match(p+1, "an")) {
 			val = Py_NAN;
 			fail_pos = (char *)p+3;
 			goto got_val;
 		}
 		else
 			goto invalid_string;
 	}
 #endif
 	/* Some platform strtods accept hex floats; Python shouldn't (at the
 	   moment), so we check explicitly for strings starting with '0x'. */
 	if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
@ -231,7 +253,6 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
 	if (fail_pos == digits_pos)
 		goto invalid_string;
  got_val:
 	if (negate && fail_pos != nptr)
 		val = -val;
 	*endptr = fail_pos;