Bug #1334662 / patch #1335972: int(string, base) wrong answers.

In rare cases of strings specifying true values near sys.maxint,
and oddball bases (not decimal or a power of 2), int(string, base)
could deliver insane answers.  This repairs all such problems, and
also speeds string->int significantly.  On my box, here are %
speedups for decimal strings of various lengths:

length speedup
------ -------
 1       12.4%
 2       15.7%
 3       20.6%
 4       28.1%
 5       33.2%
 6       37.5%
 7       41.9%
 8       46.3%
 9       51.2%
10       19.5%
11       19.9%
12       23.9%
13       23.7%
14       23.3%
15       24.9%
16       25.3%
17       28.3%
18       27.9%
19       35.7%

Note that the difference between 9 and 10 is the difference between
short and long Python ints on a 32-bit box.  The patch doesn't
actually do anything to speed conversion to long:  the speedup is
due to detecting "unsigned long" overflow more quickly.

This is a bugfix candidate, but it's a non-trivial patch and it
would be painful to separate the "bug fix" from the "speed up" parts.
This commit is contained in:
Tim Peters 2006-05-23 18:45:30 +00:00
parent b63588c188
commit b713ec2531
4 changed files with 262 additions and 88 deletions

View file

@ -15,6 +15,94 @@
/* strtol and strtoul, renamed to avoid conflicts */
#include <ctype.h>
#ifndef DONT_HAVE_ERRNO_H
#include <errno.h>
#endif
/* Static overflow check values for bases 2 through 36.
* smallmax[base] is the largest unsigned long i such that
* i * base doesn't overflow unsigned long.
*/
static unsigned long smallmax[] = {
0, /* bases 0 and 1 are invalid */
0,
ULONG_MAX / 2,
ULONG_MAX / 3,
ULONG_MAX / 4,
ULONG_MAX / 5,
ULONG_MAX / 6,
ULONG_MAX / 7,
ULONG_MAX / 8,
ULONG_MAX / 9,
ULONG_MAX / 10,
ULONG_MAX / 11,
ULONG_MAX / 12,
ULONG_MAX / 13,
ULONG_MAX / 14,
ULONG_MAX / 15,
ULONG_MAX / 16,
ULONG_MAX / 17,
ULONG_MAX / 18,
ULONG_MAX / 19,
ULONG_MAX / 20,
ULONG_MAX / 21,
ULONG_MAX / 22,
ULONG_MAX / 23,
ULONG_MAX / 24,
ULONG_MAX / 25,
ULONG_MAX / 26,
ULONG_MAX / 27,
ULONG_MAX / 28,
ULONG_MAX / 29,
ULONG_MAX / 30,
ULONG_MAX / 31,
ULONG_MAX / 32,
ULONG_MAX / 33,
ULONG_MAX / 34,
ULONG_MAX / 35,
ULONG_MAX / 36,
};
/* maximum digits that can't ever overflow for bases 2 through 36,
* calculated by [int(math.floor(math.log(2**32, i))) for i in range(2, 37)].
* Note that this is pessimistic if sizeof(long) > 4.
*/
static int digitlimit[] = {
0, 0, 32, 20, 16, 13, 12, 11, 10, 10, /* 0 - 9 */
9, 9, 8, 8, 8, 8, 8, 7, 7, 7, /* 10 - 19 */
7, 7, 7, 7, 6, 6, 6, 6, 6, 6, /* 20 - 29 */
6, 6, 6, 6, 6, 6, 6}; /* 30 - 36 */
/* char-to-digit conversion for bases 2-36; all non-digits are 37 */
static int digitlookup[] = {
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 37, 37, 37, 37, 37, 37,
37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37,
37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37
};
/*
** strtoul
** This is a general purpose routine for converting
@ -28,98 +116,100 @@
** Errors due to bad pointers will probably result in
** exceptions - we don't check for them.
*/
#include <ctype.h>
#ifndef DONT_HAVE_ERRNO_H
#include <errno.h>
#endif
unsigned long
PyOS_strtoul(register char *str, char **ptr, int base)
{
register unsigned long result; /* return value of the function */
register int c; /* current input character */
register unsigned long temp; /* used in overflow testing */
int ovf; /* true if overflow occurred */
register unsigned long result = 0; /* return value of the function */
register int c; /* current input character */
register int ovlimit; /* required digits to overflow */
result = 0;
ovf = 0;
/* skip leading white space */
while (*str && isspace(Py_CHARMASK(*str)))
++str;
/* catch silly bases */
if (base != 0 && (base < 2 || base > 36))
{
/* check for leading 0 or 0x for auto-base or base 16 */
switch (base) {
case 0: /* look for leading 0, 0x or 0X */
if (*str == '0') {
++str;
if (*str == 'x' || *str == 'X') {
++str;
base = 16;
}
else
base = 8;
}
else
base = 10;
break;
case 16: /* skip leading 0x or 0X */
if (*str == '0') {
++str;
if (*str == 'x' || *str == 'X')
++str;
}
break;
}
/* catch silly bases */
if (base < 2 || base > 36) {
if (ptr)
*ptr = str;
return 0;
}
/* skip leading zeroes */
while (*str == '0')
++str;
/* base is guaranteed to be in [2, 36] at this point */
ovlimit = digitlimit[base];
/* do the conversion until non-digit character encountered */
while ((c = digitlookup[Py_CHARMASK(*str)]) < base) {
if (ovlimit > 0) /* no overflow check required */
result = result * base + c;
else { /* requires overflow check */
register unsigned long temp_result;
if (ovlimit < 0) /* guaranteed overflow */
goto overflowed;
/* there could be an overflow */
/* check overflow just from shifting */
if (result > smallmax[base])
goto overflowed;
result *= base;
/* check overflow from the digit's value */
temp_result = result + c;
if (temp_result < result)
goto overflowed;
result = temp_result;
}
++str;
--ovlimit;
}
/* set pointer to point to the last character scanned */
if (ptr)
*ptr = str;
return 0;
}
*ptr = str;
/* skip leading white space */
while (*str && isspace(Py_CHARMASK(*str)))
str++;
return result;
/* check for leading 0 or 0x for auto-base or base 16 */
switch (base)
{
case 0: /* look for leading 0, 0x or 0X */
if (*str == '0')
{
str++;
if (*str == 'x' || *str == 'X')
{
str++;
base = 16;
}
else
base = 8;
overflowed:
if (ptr) {
/* spool through remaining digit characters */
while (digitlookup[Py_CHARMASK(*str)] < base)
++str;
*ptr = str;
}
else
base = 10;
break;
case 16: /* skip leading 0x or 0X */
if (*str == '0' && (*(str+1) == 'x' || *(str+1) == 'X'))
str += 2;
break;
}
/* do the conversion */
while ((c = Py_CHARMASK(*str)) != '\0')
{
if (isdigit(c) && c - '0' < base)
c -= '0';
else
{
if (isupper(c))
c = tolower(c);
if (c >= 'a' && c <= 'z')
c -= 'a' - 10;
else /* non-"digit" character */
break;
if (c >= base) /* non-"digit" character */
break;
}
temp = result;
result = result * base + c;
if(base == 10) {
if(((long)(result - c) / base != (long)temp)) /* overflow */
ovf = 1;
}
else {
if ((result - c) / base != temp) /* overflow */
ovf = 1;
}
str++;
}
/* set pointer to point to the last character scanned */
if (ptr)
*ptr = str;
if (ovf)
{
result = (unsigned long) ~0L;
errno = ERANGE;
}
return result;
return (unsigned long)-1;
}
long
@ -127,25 +217,25 @@ PyOS_strtol(char *str, char **ptr, int base)
{
long result;
char sign;
while (*str && isspace(Py_CHARMASK(*str)))
str++;
sign = *str;
if (sign == '+' || sign == '-')
str++;
result = (long) PyOS_strtoul(str, ptr, base);
/* Signal overflow if the result appears negative,
except for the largest negative integer */
if (result < 0 && !(sign == '-' && result == -result)) {
errno = ERANGE;
result = 0x7fffffff;
}
if (sign == '-')
result = -result;
return result;
}