mirror of
https://github.com/python/cpython.git
synced 2025-07-07 11:25:30 +00:00
gh-133968: Add PyUnicodeWriter_WriteASCII() function (#133973)
Replace most PyUnicodeWriter_WriteUTF8() calls with PyUnicodeWriter_WriteASCII(). Unrelated change to please the linter: remove an unused import in test_ctypes. Co-authored-by: Peter Bierma <zintensitydev@gmail.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
parent
4109a9c6b3
commit
f49a07b531
17 changed files with 103 additions and 31 deletions
|
@ -1802,9 +1802,24 @@ object.
|
|||
|
||||
See also :c:func:`PyUnicodeWriter_DecodeUTF8Stateful`.
|
||||
|
||||
.. c:function:: int PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, const char *str, Py_ssize_t size)
|
||||
|
||||
Write the ASCII string *str* into *writer*.
|
||||
|
||||
*size* is the string length in bytes. If *size* is equal to ``-1``, call
|
||||
``strlen(str)`` to get the string length.
|
||||
|
||||
*str* must only contain ASCII characters. The behavior is undefined if
|
||||
*str* contains non-ASCII characters.
|
||||
|
||||
On success, return ``0``.
|
||||
On error, set an exception, leave the writer unchanged, and return ``-1``.
|
||||
|
||||
.. versionadded:: next
|
||||
|
||||
.. c:function:: int PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer, const wchar_t *str, Py_ssize_t size)
|
||||
|
||||
Writer the wide string *str* into *writer*.
|
||||
Write the wide string *str* into *writer*.
|
||||
|
||||
*size* is a number of wide characters. If *size* is equal to ``-1``, call
|
||||
``wcslen(str)`` to get the string length.
|
||||
|
|
|
@ -226,6 +226,13 @@ New features
|
|||
functions as replacements for :c:func:`PySys_GetObject`.
|
||||
(Contributed by Serhiy Storchaka in :gh:`108512`.)
|
||||
|
||||
* Add :c:func:`PyUnicodeWriter_WriteASCII` function to write an ASCII string
|
||||
into a :c:type:`PyUnicodeWriter`. The function is faster than
|
||||
:c:func:`PyUnicodeWriter_WriteUTF8`, but has an undefined behavior if the
|
||||
input string contains non-ASCII characters.
|
||||
(Contributed by Victor Stinner in :gh:`133968`.)
|
||||
|
||||
|
||||
Porting to Python 3.15
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -478,6 +478,10 @@ PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8(
|
|||
PyUnicodeWriter *writer,
|
||||
const char *str,
|
||||
Py_ssize_t size);
|
||||
PyAPI_FUNC(int) PyUnicodeWriter_WriteASCII(
|
||||
PyUnicodeWriter *writer,
|
||||
const char *str,
|
||||
Py_ssize_t size);
|
||||
PyAPI_FUNC(int) PyUnicodeWriter_WriteWideChar(
|
||||
PyUnicodeWriter *writer,
|
||||
const wchar_t *str,
|
||||
|
|
|
@ -1776,6 +1776,13 @@ class PyUnicodeWriterTest(unittest.TestCase):
|
|||
self.assertEqual(writer.finish(),
|
||||
"ascii-latin1=\xE9-euro=\u20AC.")
|
||||
|
||||
def test_ascii(self):
|
||||
writer = self.create_writer(0)
|
||||
writer.write_ascii(b"Hello ", -1)
|
||||
writer.write_ascii(b"", 0)
|
||||
writer.write_ascii(b"Python! <truncated>", 6)
|
||||
self.assertEqual(writer.finish(), "Hello Python")
|
||||
|
||||
def test_invalid_utf8(self):
|
||||
writer = self.create_writer(0)
|
||||
with self.assertRaises(UnicodeDecodeError):
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
import ctypes
|
||||
import unittest
|
||||
import warnings
|
||||
from ctypes import Structure, POINTER, pointer, c_char_p
|
||||
|
||||
# String-based "incomplete pointers" were implemented in ctypes 0.6.3 (2003, when
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
Add :c:func:`PyUnicodeWriter_WriteASCII` function to write an ASCII string
|
||||
into a :c:type:`PyUnicodeWriter`. The function is faster than
|
||||
:c:func:`PyUnicodeWriter_WriteUTF8`, but has an undefined behavior if the
|
||||
input string contains non-ASCII characters. Patch by Victor Stinner.
|
|
@ -1476,13 +1476,13 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
|
|||
int rv;
|
||||
|
||||
if (obj == Py_None) {
|
||||
return PyUnicodeWriter_WriteUTF8(writer, "null", 4);
|
||||
return PyUnicodeWriter_WriteASCII(writer, "null", 4);
|
||||
}
|
||||
else if (obj == Py_True) {
|
||||
return PyUnicodeWriter_WriteUTF8(writer, "true", 4);
|
||||
return PyUnicodeWriter_WriteASCII(writer, "true", 4);
|
||||
}
|
||||
else if (obj == Py_False) {
|
||||
return PyUnicodeWriter_WriteUTF8(writer, "false", 5);
|
||||
return PyUnicodeWriter_WriteASCII(writer, "false", 5);
|
||||
}
|
||||
else if (PyUnicode_Check(obj)) {
|
||||
PyObject *encoded = encoder_encode_string(s, obj);
|
||||
|
@ -1649,7 +1649,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
|
|||
|
||||
if (PyDict_GET_SIZE(dct) == 0) {
|
||||
/* Fast path */
|
||||
return PyUnicodeWriter_WriteUTF8(writer, "{}", 2);
|
||||
return PyUnicodeWriter_WriteASCII(writer, "{}", 2);
|
||||
}
|
||||
|
||||
if (s->markers != Py_None) {
|
||||
|
@ -1753,7 +1753,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
|
|||
return -1;
|
||||
if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
|
||||
Py_DECREF(s_fast);
|
||||
return PyUnicodeWriter_WriteUTF8(writer, "[]", 2);
|
||||
return PyUnicodeWriter_WriteASCII(writer, "[]", 2);
|
||||
}
|
||||
|
||||
if (s->markers != Py_None) {
|
||||
|
|
|
@ -563,7 +563,7 @@ fill_and_set_sslerror(_sslmodulestate *state,
|
|||
goto fail;
|
||||
}
|
||||
}
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, "] ", 2) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, "] ", 2) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -332,6 +332,27 @@ writer_write_utf8(PyObject *self_raw, PyObject *args)
|
|||
}
|
||||
|
||||
|
||||
static PyObject*
|
||||
writer_write_ascii(PyObject *self_raw, PyObject *args)
|
||||
{
|
||||
WriterObject *self = (WriterObject *)self_raw;
|
||||
if (writer_check(self) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *str;
|
||||
Py_ssize_t size;
|
||||
if (!PyArg_ParseTuple(args, "yn", &str, &size)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (PyUnicodeWriter_WriteASCII(self->writer, str, size) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
|
||||
static PyObject*
|
||||
writer_write_widechar(PyObject *self_raw, PyObject *args)
|
||||
{
|
||||
|
@ -513,6 +534,7 @@ writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args))
|
|||
static PyMethodDef writer_methods[] = {
|
||||
{"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS},
|
||||
{"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS},
|
||||
{"write_ascii", _PyCFunction_CAST(writer_write_ascii), METH_VARARGS},
|
||||
{"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS},
|
||||
{"write_ucs4", _PyCFunction_CAST(writer_write_ucs4), METH_VARARGS},
|
||||
{"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS},
|
||||
|
|
|
@ -65,7 +65,7 @@ ga_repr_items_list(PyUnicodeWriter *writer, PyObject *p)
|
|||
|
||||
for (Py_ssize_t i = 0; i < len; i++) {
|
||||
if (i > 0) {
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
@ -109,7 +109,7 @@ ga_repr(PyObject *self)
|
|||
}
|
||||
for (Py_ssize_t i = 0; i < len; i++) {
|
||||
if (i > 0) {
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
@ -126,7 +126,7 @@ ga_repr(PyObject *self)
|
|||
}
|
||||
if (len == 0) {
|
||||
// for something like tuple[()] we should print a "()"
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, "()", 2) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, "()", 2) < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -192,7 +192,7 @@ constevaluator_call(PyObject *self, PyObject *args, PyObject *kwargs)
|
|||
for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(value); i++) {
|
||||
PyObject *item = PyTuple_GET_ITEM(value, i);
|
||||
if (i > 0) {
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
|
||||
PyUnicodeWriter_Discard(writer);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -273,7 +273,7 @@ _Py_typing_type_repr(PyUnicodeWriter *writer, PyObject *p)
|
|||
}
|
||||
|
||||
if (p == (PyObject *)&_PyNone_Type) {
|
||||
return PyUnicodeWriter_WriteUTF8(writer, "None", 4);
|
||||
return PyUnicodeWriter_WriteASCII(writer, "None", 4);
|
||||
}
|
||||
|
||||
if ((rc = PyObject_HasAttrWithError(p, &_Py_ID(__origin__))) > 0 &&
|
||||
|
|
|
@ -14083,6 +14083,20 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer,
|
||||
const char *str,
|
||||
Py_ssize_t size)
|
||||
{
|
||||
assert(writer != NULL);
|
||||
_Py_AssertHoldsTstate();
|
||||
|
||||
_PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer;
|
||||
return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
|
||||
const char *str,
|
||||
|
|
|
@ -290,7 +290,7 @@ union_repr(PyObject *self)
|
|||
}
|
||||
|
||||
for (Py_ssize_t i = 0; i < len; i++) {
|
||||
if (i > 0 && PyUnicodeWriter_WriteUTF8(writer, " | ", 3) < 0) {
|
||||
if (i > 0 && PyUnicodeWriter_WriteASCII(writer, " | ", 3) < 0) {
|
||||
goto error;
|
||||
}
|
||||
PyObject *p = PyTuple_GET_ITEM(alias->args, i);
|
||||
|
@ -300,12 +300,12 @@ union_repr(PyObject *self)
|
|||
}
|
||||
|
||||
#if 0
|
||||
PyUnicodeWriter_WriteUTF8(writer, "|args=", 6);
|
||||
PyUnicodeWriter_WriteASCII(writer, "|args=", 6);
|
||||
PyUnicodeWriter_WriteRepr(writer, alias->args);
|
||||
PyUnicodeWriter_WriteUTF8(writer, "|h=", 3);
|
||||
PyUnicodeWriter_WriteASCII(writer, "|h=", 3);
|
||||
PyUnicodeWriter_WriteRepr(writer, alias->hashable_args);
|
||||
if (alias->unhashable_args) {
|
||||
PyUnicodeWriter_WriteUTF8(writer, "|u=", 3);
|
||||
PyUnicodeWriter_WriteASCII(writer, "|u=", 3);
|
||||
PyUnicodeWriter_WriteRepr(writer, alias->unhashable_args);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1512,7 +1512,7 @@ ast_repr_list(PyObject *list, int depth)
|
|||
|
||||
for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) {
|
||||
if (i > 0) {
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
@ -1536,7 +1536,7 @@ ast_repr_list(PyObject *list, int depth)
|
|||
}
|
||||
|
||||
if (i == 0 && length > 2) {
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
@ -1640,7 +1640,7 @@ ast_repr_max_depth(AST_object *self, int depth)
|
|||
}
|
||||
|
||||
if (i > 0) {
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
|
||||
Py_DECREF(name);
|
||||
Py_DECREF(value_repr);
|
||||
goto error;
|
||||
|
|
6
Python/Python-ast.c
generated
6
Python/Python-ast.c
generated
|
@ -5796,7 +5796,7 @@ ast_repr_list(PyObject *list, int depth)
|
|||
|
||||
for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) {
|
||||
if (i > 0) {
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
@ -5820,7 +5820,7 @@ ast_repr_list(PyObject *list, int depth)
|
|||
}
|
||||
|
||||
if (i == 0 && length > 2) {
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
@ -5924,7 +5924,7 @@ ast_repr_max_depth(AST_object *self, int depth)
|
|||
}
|
||||
|
||||
if (i > 0) {
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
|
||||
Py_DECREF(name);
|
||||
Py_DECREF(value_repr);
|
||||
goto error;
|
||||
|
|
|
@ -979,7 +979,7 @@ contextvar_tp_repr(PyObject *op)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, "<ContextVar name=", 17) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, "<ContextVar name=", 17) < 0) {
|
||||
goto error;
|
||||
}
|
||||
if (PyUnicodeWriter_WriteRepr(writer, self->var_name) < 0) {
|
||||
|
@ -987,7 +987,7 @@ contextvar_tp_repr(PyObject *op)
|
|||
}
|
||||
|
||||
if (self->var_default != NULL) {
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, " default=", 9) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, " default=", 9) < 0) {
|
||||
goto error;
|
||||
}
|
||||
if (PyUnicodeWriter_WriteRepr(writer, self->var_default) < 0) {
|
||||
|
@ -1182,15 +1182,15 @@ token_tp_repr(PyObject *op)
|
|||
if (writer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, "<Token", 6) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, "<Token", 6) < 0) {
|
||||
goto error;
|
||||
}
|
||||
if (self->tok_used) {
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, " used", 5) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, " used", 5) < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, " var=", 5) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, " var=", 5) < 0) {
|
||||
goto error;
|
||||
}
|
||||
if (PyUnicodeWriter_WriteRepr(writer, (PyObject *)self->tok_var) < 0) {
|
||||
|
|
|
@ -1176,7 +1176,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
|
|||
}
|
||||
|
||||
if (key_or_null == NULL) {
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, "NULL:\n", -1) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, "NULL:\n", 6) < 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
@ -1194,7 +1194,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
|
|||
}
|
||||
}
|
||||
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
@ -1915,7 +1915,7 @@ hamt_node_array_dump(PyHamtNode_Array *node,
|
|||
goto error;
|
||||
}
|
||||
|
||||
if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) {
|
||||
if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue