Optimize bytes.fromhex() and bytearray.fromhex()

Issue #25401: Optimize bytes.fromhex() and bytearray.fromhex(): they are now
between 2x and 3.5x faster. Changes:

* Use a fast-path working on a char* string for ASCII string
* Use a slow-path for non-ASCII string
* Replace slow hex_digit_to_int() function with a O(1) lookup in
  _PyLong_DigitValue precomputed table
* Use _PyBytesWriter API to handle the buffer
* Add unit tests to check the error position in error messages
This commit is contained in:
Victor Stinner 2015-10-14 11:25:33 +02:00
parent ebcf9edc05
commit 2bf8993db9
7 changed files with 101 additions and 95 deletions

View file

@ -2823,48 +2823,7 @@ static PyObject *
bytearray_fromhex_impl(PyObject*cls, PyObject *string)
/*[clinic end generated code: output=df3da60129b3700c input=907bbd2d34d9367a]*/
{
PyObject *newbytes;
char *buf;
Py_ssize_t hexlen, byteslen, i, j;
int top, bot;
void *data;
unsigned int kind;
assert(PyUnicode_Check(string));
if (PyUnicode_READY(string))
return NULL;
kind = PyUnicode_KIND(string);
data = PyUnicode_DATA(string);
hexlen = PyUnicode_GET_LENGTH(string);
byteslen = hexlen/2; /* This overestimates if there are spaces */
newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
if (!newbytes)
return NULL;
buf = PyByteArray_AS_STRING(newbytes);
for (i = j = 0; i < hexlen; i += 2) {
/* skip over spaces in the input */
while (PyUnicode_READ(kind, data, i) == ' ')
i++;
if (i >= hexlen)
break;
top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
if (top == -1 || bot == -1) {
PyErr_Format(PyExc_ValueError,
"non-hexadecimal number found in "
"fromhex() arg at position %zd", i);
goto error;
}
buf[j++] = (top << 4) + bot;
}
if (PyByteArray_Resize(newbytes, j) < 0)
goto error;
return newbytes;
error:
Py_DECREF(newbytes);
return NULL;
return _PyBytes_FromHex(string, 1);
}
PyDoc_STRVAR(hex__doc__,