mirror of
https://github.com/python/cpython.git
synced 2025-09-01 22:47:59 +00:00
gh-129349: Accept bytes in bytes.fromhex()/bytearray.fromhex() (#129844)
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
parent
405a2d74cb
commit
e0637cebe5
8 changed files with 90 additions and 69 deletions
|
@ -2744,6 +2744,10 @@ data and are closely related to string objects in a variety of other ways.
|
||||||
:meth:`bytes.fromhex` now skips all ASCII whitespace in the string,
|
:meth:`bytes.fromhex` now skips all ASCII whitespace in the string,
|
||||||
not just spaces.
|
not just spaces.
|
||||||
|
|
||||||
|
.. versionchanged:: next
|
||||||
|
:meth:`bytes.fromhex` now accepts ASCII :class:`bytes` and
|
||||||
|
:term:`bytes-like objects <bytes-like object>` as input.
|
||||||
|
|
||||||
A reverse conversion function exists to transform a bytes object into its
|
A reverse conversion function exists to transform a bytes object into its
|
||||||
hexadecimal representation.
|
hexadecimal representation.
|
||||||
|
|
||||||
|
@ -2829,6 +2833,10 @@ objects.
|
||||||
:meth:`bytearray.fromhex` now skips all ASCII whitespace in the string,
|
:meth:`bytearray.fromhex` now skips all ASCII whitespace in the string,
|
||||||
not just spaces.
|
not just spaces.
|
||||||
|
|
||||||
|
.. versionchanged:: next
|
||||||
|
:meth:`bytearray.fromhex` now accepts ASCII :class:`bytes` and
|
||||||
|
:term:`bytes-like objects <bytes-like object>` as input.
|
||||||
|
|
||||||
A reverse conversion function exists to transform a bytearray object into its
|
A reverse conversion function exists to transform a bytearray object into its
|
||||||
hexadecimal representation.
|
hexadecimal representation.
|
||||||
|
|
||||||
|
|
|
@ -354,6 +354,10 @@ Other language changes
|
||||||
(with :func:`format` or :ref:`f-strings`).
|
(with :func:`format` or :ref:`f-strings`).
|
||||||
(Contrubuted by Sergey B Kirpichev in :gh:`87790`.)
|
(Contrubuted by Sergey B Kirpichev in :gh:`87790`.)
|
||||||
|
|
||||||
|
* The :func:`bytes.fromhex` and :func:`bytearray.fromhex` methods now accept
|
||||||
|
ASCII :class:`bytes` and :term:`bytes-like objects <bytes-like object>`.
|
||||||
|
(Contributed by Daniel Pope in :gh:`129349`.)
|
||||||
|
|
||||||
* ``\B`` in :mod:`regular expression <re>` now matches empty input string.
|
* ``\B`` in :mod:`regular expression <re>` now matches empty input string.
|
||||||
Now it is always the opposite of ``\b``.
|
Now it is always the opposite of ``\b``.
|
||||||
(Contributed by Serhiy Storchaka in :gh:`124130`.)
|
(Contributed by Serhiy Storchaka in :gh:`124130`.)
|
||||||
|
|
|
@ -450,13 +450,34 @@ class BaseBytesTest:
|
||||||
|
|
||||||
# check that ASCII whitespace is ignored
|
# check that ASCII whitespace is ignored
|
||||||
self.assertEqual(self.type2test.fromhex(' 1A\n2B\t30\v'), b)
|
self.assertEqual(self.type2test.fromhex(' 1A\n2B\t30\v'), b)
|
||||||
|
self.assertEqual(self.type2test.fromhex(b' 1A\n2B\t30\v'), b)
|
||||||
for c in "\x09\x0A\x0B\x0C\x0D\x20":
|
for c in "\x09\x0A\x0B\x0C\x0D\x20":
|
||||||
self.assertEqual(self.type2test.fromhex(c), self.type2test())
|
self.assertEqual(self.type2test.fromhex(c), self.type2test())
|
||||||
for c in "\x1C\x1D\x1E\x1F\x85\xa0\u2000\u2002\u2028":
|
for c in "\x1C\x1D\x1E\x1F\x85\xa0\u2000\u2002\u2028":
|
||||||
self.assertRaises(ValueError, self.type2test.fromhex, c)
|
self.assertRaises(ValueError, self.type2test.fromhex, c)
|
||||||
|
|
||||||
|
# Check that we can parse bytes and bytearray
|
||||||
|
tests = [
|
||||||
|
("bytes", bytes),
|
||||||
|
("bytearray", bytearray),
|
||||||
|
("memoryview", memoryview),
|
||||||
|
("array.array", lambda bs: array.array('B', bs)),
|
||||||
|
]
|
||||||
|
for name, factory in tests:
|
||||||
|
with self.subTest(name=name):
|
||||||
|
self.assertEqual(self.type2test.fromhex(factory(b' 1A 2B 30 ')), b)
|
||||||
|
|
||||||
|
# Invalid bytes are rejected
|
||||||
|
for u8 in b"\0\x1C\x1D\x1E\x1F\x85\xa0":
|
||||||
|
b = bytes([30, 31, u8])
|
||||||
|
self.assertRaises(ValueError, self.type2test.fromhex, b)
|
||||||
|
|
||||||
self.assertEqual(self.type2test.fromhex('0000'), b'\0\0')
|
self.assertEqual(self.type2test.fromhex('0000'), b'\0\0')
|
||||||
self.assertRaises(TypeError, self.type2test.fromhex, b'1B')
|
with self.assertRaisesRegex(
|
||||||
|
TypeError,
|
||||||
|
r'fromhex\(\) argument must be str or bytes-like, not tuple',
|
||||||
|
):
|
||||||
|
self.type2test.fromhex(())
|
||||||
self.assertRaises(ValueError, self.type2test.fromhex, 'a')
|
self.assertRaises(ValueError, self.type2test.fromhex, 'a')
|
||||||
self.assertRaises(ValueError, self.type2test.fromhex, 'rt')
|
self.assertRaises(ValueError, self.type2test.fromhex, 'rt')
|
||||||
self.assertRaises(ValueError, self.type2test.fromhex, '1a b cd')
|
self.assertRaises(ValueError, self.type2test.fromhex, '1a b cd')
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
:meth:`bytes.fromhex` and :meth:`bytearray.fromhex` now accepts ASCII
|
||||||
|
:class:`bytes` and :term:`bytes-like objects <bytes-like object>`.
|
|
@ -2533,7 +2533,7 @@ bytearray_splitlines_impl(PyByteArrayObject *self, int keepends)
|
||||||
@classmethod
|
@classmethod
|
||||||
bytearray.fromhex
|
bytearray.fromhex
|
||||||
|
|
||||||
string: unicode
|
string: object
|
||||||
/
|
/
|
||||||
|
|
||||||
Create a bytearray object from a string of hexadecimal numbers.
|
Create a bytearray object from a string of hexadecimal numbers.
|
||||||
|
@ -2543,8 +2543,8 @@ Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef')
|
||||||
[clinic start generated code]*/
|
[clinic start generated code]*/
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
bytearray_fromhex_impl(PyTypeObject *type, PyObject *string)
|
bytearray_fromhex(PyTypeObject *type, PyObject *string)
|
||||||
/*[clinic end generated code: output=8f0f0b6d30fb3ba0 input=f033a16d1fb21f48]*/
|
/*[clinic end generated code: output=da84dc708e9c4b36 input=7e314e5b2d7ab484]*/
|
||||||
{
|
{
|
||||||
PyObject *result = _PyBytes_FromHex(string, type == &PyByteArray_Type);
|
PyObject *result = _PyBytes_FromHex(string, type == &PyByteArray_Type);
|
||||||
if (type != &PyByteArray_Type && result != NULL) {
|
if (type != &PyByteArray_Type && result != NULL) {
|
||||||
|
|
|
@ -2484,7 +2484,7 @@ bytes_splitlines_impl(PyBytesObject *self, int keepends)
|
||||||
@classmethod
|
@classmethod
|
||||||
bytes.fromhex
|
bytes.fromhex
|
||||||
|
|
||||||
string: unicode
|
string: object
|
||||||
/
|
/
|
||||||
|
|
||||||
Create a bytes object from a string of hexadecimal numbers.
|
Create a bytes object from a string of hexadecimal numbers.
|
||||||
|
@ -2494,8 +2494,8 @@ Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
|
||||||
[clinic start generated code]*/
|
[clinic start generated code]*/
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
|
bytes_fromhex(PyTypeObject *type, PyObject *string)
|
||||||
/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
|
/*[clinic end generated code: output=d458ec88195da6b3 input=f37d98ed51088a21]*/
|
||||||
{
|
{
|
||||||
PyObject *result = _PyBytes_FromHex(string, 0);
|
PyObject *result = _PyBytes_FromHex(string, 0);
|
||||||
if (type != &PyBytes_Type && result != NULL) {
|
if (type != &PyBytes_Type && result != NULL) {
|
||||||
|
@ -2510,37 +2510,55 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
|
||||||
char *buf;
|
char *buf;
|
||||||
Py_ssize_t hexlen, invalid_char;
|
Py_ssize_t hexlen, invalid_char;
|
||||||
unsigned int top, bot;
|
unsigned int top, bot;
|
||||||
const Py_UCS1 *str, *end;
|
const Py_UCS1 *str, *start, *end;
|
||||||
_PyBytesWriter writer;
|
_PyBytesWriter writer;
|
||||||
|
Py_buffer view;
|
||||||
|
view.obj = NULL;
|
||||||
|
|
||||||
_PyBytesWriter_Init(&writer);
|
_PyBytesWriter_Init(&writer);
|
||||||
writer.use_bytearray = use_bytearray;
|
writer.use_bytearray = use_bytearray;
|
||||||
|
|
||||||
assert(PyUnicode_Check(string));
|
if (PyUnicode_Check(string)) {
|
||||||
hexlen = PyUnicode_GET_LENGTH(string);
|
hexlen = PyUnicode_GET_LENGTH(string);
|
||||||
|
|
||||||
if (!PyUnicode_IS_ASCII(string)) {
|
if (!PyUnicode_IS_ASCII(string)) {
|
||||||
const void *data = PyUnicode_DATA(string);
|
const void *data = PyUnicode_DATA(string);
|
||||||
int kind = PyUnicode_KIND(string);
|
int kind = PyUnicode_KIND(string);
|
||||||
Py_ssize_t i;
|
Py_ssize_t i;
|
||||||
|
|
||||||
/* search for the first non-ASCII character */
|
/* search for the first non-ASCII character */
|
||||||
for (i = 0; i < hexlen; i++) {
|
for (i = 0; i < hexlen; i++) {
|
||||||
if (PyUnicode_READ(kind, data, i) >= 128)
|
if (PyUnicode_READ(kind, data, i) >= 128)
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
invalid_char = i;
|
||||||
|
goto error;
|
||||||
}
|
}
|
||||||
invalid_char = i;
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
|
assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
|
||||||
str = PyUnicode_1BYTE_DATA(string);
|
str = PyUnicode_1BYTE_DATA(string);
|
||||||
|
}
|
||||||
|
else if (PyObject_CheckBuffer(string)) {
|
||||||
|
if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
hexlen = view.len;
|
||||||
|
str = view.buf;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
PyErr_Format(PyExc_TypeError,
|
||||||
|
"fromhex() argument must be str or bytes-like, not %T",
|
||||||
|
string);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/* This overestimates if there are spaces */
|
/* This overestimates if there are spaces */
|
||||||
buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
|
buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
|
||||||
if (buf == NULL)
|
if (buf == NULL) {
|
||||||
return NULL;
|
goto release_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
start = str;
|
||||||
end = str + hexlen;
|
end = str + hexlen;
|
||||||
while (str < end) {
|
while (str < end) {
|
||||||
/* skip over spaces in the input */
|
/* skip over spaces in the input */
|
||||||
|
@ -2554,7 +2572,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
|
||||||
|
|
||||||
top = _PyLong_DigitValue[*str];
|
top = _PyLong_DigitValue[*str];
|
||||||
if (top >= 16) {
|
if (top >= 16) {
|
||||||
invalid_char = str - PyUnicode_1BYTE_DATA(string);
|
invalid_char = str - start;
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
str++;
|
str++;
|
||||||
|
@ -2565,7 +2583,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
|
||||||
if (str >= end){
|
if (str >= end){
|
||||||
invalid_char = -1;
|
invalid_char = -1;
|
||||||
} else {
|
} else {
|
||||||
invalid_char = str - PyUnicode_1BYTE_DATA(string);
|
invalid_char = str - start;
|
||||||
}
|
}
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
@ -2574,6 +2592,9 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
|
||||||
*buf++ = (unsigned char)((top << 4) + bot);
|
*buf++ = (unsigned char)((top << 4) + bot);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (view.obj != NULL) {
|
||||||
|
PyBuffer_Release(&view);
|
||||||
|
}
|
||||||
return _PyBytesWriter_Finish(&writer, buf);
|
return _PyBytesWriter_Finish(&writer, buf);
|
||||||
|
|
||||||
error:
|
error:
|
||||||
|
@ -2586,6 +2607,11 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
|
||||||
"fromhex() arg at position %zd", invalid_char);
|
"fromhex() arg at position %zd", invalid_char);
|
||||||
}
|
}
|
||||||
_PyBytesWriter_Dealloc(&writer);
|
_PyBytesWriter_Dealloc(&writer);
|
||||||
|
|
||||||
|
release_buffer:
|
||||||
|
if (view.obj != NULL) {
|
||||||
|
PyBuffer_Release(&view);
|
||||||
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
22
Objects/clinic/bytearrayobject.c.h
generated
22
Objects/clinic/bytearrayobject.c.h
generated
|
@ -1601,26 +1601,6 @@ PyDoc_STRVAR(bytearray_fromhex__doc__,
|
||||||
#define BYTEARRAY_FROMHEX_METHODDEF \
|
#define BYTEARRAY_FROMHEX_METHODDEF \
|
||||||
{"fromhex", (PyCFunction)bytearray_fromhex, METH_O|METH_CLASS, bytearray_fromhex__doc__},
|
{"fromhex", (PyCFunction)bytearray_fromhex, METH_O|METH_CLASS, bytearray_fromhex__doc__},
|
||||||
|
|
||||||
static PyObject *
|
|
||||||
bytearray_fromhex_impl(PyTypeObject *type, PyObject *string);
|
|
||||||
|
|
||||||
static PyObject *
|
|
||||||
bytearray_fromhex(PyTypeObject *type, PyObject *arg)
|
|
||||||
{
|
|
||||||
PyObject *return_value = NULL;
|
|
||||||
PyObject *string;
|
|
||||||
|
|
||||||
if (!PyUnicode_Check(arg)) {
|
|
||||||
_PyArg_BadArgument("fromhex", "argument", "str", arg);
|
|
||||||
goto exit;
|
|
||||||
}
|
|
||||||
string = arg;
|
|
||||||
return_value = bytearray_fromhex_impl(type, string);
|
|
||||||
|
|
||||||
exit:
|
|
||||||
return return_value;
|
|
||||||
}
|
|
||||||
|
|
||||||
PyDoc_STRVAR(bytearray_hex__doc__,
|
PyDoc_STRVAR(bytearray_hex__doc__,
|
||||||
"hex($self, /, sep=<unrepresentable>, bytes_per_sep=1)\n"
|
"hex($self, /, sep=<unrepresentable>, bytes_per_sep=1)\n"
|
||||||
"--\n"
|
"--\n"
|
||||||
|
@ -1789,4 +1769,4 @@ bytearray_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored))
|
||||||
{
|
{
|
||||||
return bytearray_sizeof_impl((PyByteArrayObject *)self);
|
return bytearray_sizeof_impl((PyByteArrayObject *)self);
|
||||||
}
|
}
|
||||||
/*[clinic end generated code: output=7c924a56e0a8bfe6 input=a9049054013a1b77]*/
|
/*[clinic end generated code: output=13a4231325b7d3c1 input=a9049054013a1b77]*/
|
||||||
|
|
22
Objects/clinic/bytesobject.c.h
generated
22
Objects/clinic/bytesobject.c.h
generated
|
@ -1204,26 +1204,6 @@ PyDoc_STRVAR(bytes_fromhex__doc__,
|
||||||
#define BYTES_FROMHEX_METHODDEF \
|
#define BYTES_FROMHEX_METHODDEF \
|
||||||
{"fromhex", (PyCFunction)bytes_fromhex, METH_O|METH_CLASS, bytes_fromhex__doc__},
|
{"fromhex", (PyCFunction)bytes_fromhex, METH_O|METH_CLASS, bytes_fromhex__doc__},
|
||||||
|
|
||||||
static PyObject *
|
|
||||||
bytes_fromhex_impl(PyTypeObject *type, PyObject *string);
|
|
||||||
|
|
||||||
static PyObject *
|
|
||||||
bytes_fromhex(PyTypeObject *type, PyObject *arg)
|
|
||||||
{
|
|
||||||
PyObject *return_value = NULL;
|
|
||||||
PyObject *string;
|
|
||||||
|
|
||||||
if (!PyUnicode_Check(arg)) {
|
|
||||||
_PyArg_BadArgument("fromhex", "argument", "str", arg);
|
|
||||||
goto exit;
|
|
||||||
}
|
|
||||||
string = arg;
|
|
||||||
return_value = bytes_fromhex_impl(type, string);
|
|
||||||
|
|
||||||
exit:
|
|
||||||
return return_value;
|
|
||||||
}
|
|
||||||
|
|
||||||
PyDoc_STRVAR(bytes_hex__doc__,
|
PyDoc_STRVAR(bytes_hex__doc__,
|
||||||
"hex($self, /, sep=<unrepresentable>, bytes_per_sep=1)\n"
|
"hex($self, /, sep=<unrepresentable>, bytes_per_sep=1)\n"
|
||||||
"--\n"
|
"--\n"
|
||||||
|
@ -1404,4 +1384,4 @@ skip_optional_pos:
|
||||||
exit:
|
exit:
|
||||||
return return_value;
|
return return_value;
|
||||||
}
|
}
|
||||||
/*[clinic end generated code: output=61cb2cf6506df4c6 input=a9049054013a1b77]*/
|
/*[clinic end generated code: output=967aae4b46423586 input=a9049054013a1b77]*/
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue