mirror of
https://github.com/python/cpython.git
synced 2025-10-15 19:29:46 +00:00
gh-119182: Rewrite PyUnicodeWriter tests in Python (#120845)
This commit is contained in:
parent
4123226bbd
commit
913a956d85
2 changed files with 388 additions and 314 deletions
|
@ -16,6 +16,10 @@ try:
|
||||||
import _testinternalcapi
|
import _testinternalcapi
|
||||||
except ImportError:
|
except ImportError:
|
||||||
_testinternalcapi = None
|
_testinternalcapi = None
|
||||||
|
try:
|
||||||
|
import ctypes
|
||||||
|
except ImportError:
|
||||||
|
ctypes = None
|
||||||
|
|
||||||
|
|
||||||
NULL = None
|
NULL = None
|
||||||
|
@ -352,13 +356,13 @@ class CAPITest(unittest.TestCase):
|
||||||
self.assertRaises(TypeError, fromobject, [])
|
self.assertRaises(TypeError, fromobject, [])
|
||||||
# CRASHES fromobject(NULL)
|
# CRASHES fromobject(NULL)
|
||||||
|
|
||||||
|
@unittest.skipIf(ctypes is None, 'need ctypes')
|
||||||
def test_from_format(self):
|
def test_from_format(self):
|
||||||
"""Test PyUnicode_FromFormat()"""
|
"""Test PyUnicode_FromFormat()"""
|
||||||
# Length modifiers "j" and "t" are not tested here because ctypes does
|
# Length modifiers "j" and "t" are not tested here because ctypes does
|
||||||
# not expose types for intmax_t and ptrdiff_t.
|
# not expose types for intmax_t and ptrdiff_t.
|
||||||
# _testlimitedcapi.test_string_from_format() has a wider coverage of all
|
# _testlimitedcapi.test_string_from_format() has a wider coverage of all
|
||||||
# formats.
|
# formats.
|
||||||
import_helper.import_module('ctypes')
|
|
||||||
from ctypes import (
|
from ctypes import (
|
||||||
c_char_p,
|
c_char_p,
|
||||||
pythonapi, py_object, sizeof,
|
pythonapi, py_object, sizeof,
|
||||||
|
@ -1676,5 +1680,149 @@ class CAPITest(unittest.TestCase):
|
||||||
self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
|
self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
|
||||||
|
|
||||||
|
|
||||||
|
class PyUnicodeWriterTest(unittest.TestCase):
|
||||||
|
def create_writer(self, size):
|
||||||
|
return _testcapi.PyUnicodeWriter(size)
|
||||||
|
|
||||||
|
def test_basic(self):
|
||||||
|
writer = self.create_writer(100)
|
||||||
|
|
||||||
|
# test PyUnicodeWriter_WriteUTF8()
|
||||||
|
writer.write_utf8(b'var', -1)
|
||||||
|
|
||||||
|
# test PyUnicodeWriter_WriteChar()
|
||||||
|
writer.write_char('=')
|
||||||
|
|
||||||
|
# test PyUnicodeWriter_WriteSubstring()
|
||||||
|
writer.write_substring("[long]", 1, 5);
|
||||||
|
|
||||||
|
# test PyUnicodeWriter_WriteStr()
|
||||||
|
writer.write_str(" value ")
|
||||||
|
|
||||||
|
# test PyUnicodeWriter_WriteRepr()
|
||||||
|
writer.write_repr("repr")
|
||||||
|
|
||||||
|
self.assertEqual(writer.finish(),
|
||||||
|
"var=long value 'repr'")
|
||||||
|
|
||||||
|
def test_utf8(self):
|
||||||
|
writer = self.create_writer(0)
|
||||||
|
writer.write_utf8(b"ascii", -1)
|
||||||
|
writer.write_char('-')
|
||||||
|
writer.write_utf8(b"latin1=\xC3\xA9", -1)
|
||||||
|
writer.write_char('-')
|
||||||
|
writer.write_utf8(b"euro=\xE2\x82\xAC", -1)
|
||||||
|
writer.write_char('.')
|
||||||
|
self.assertEqual(writer.finish(),
|
||||||
|
"ascii-latin1=\xE9-euro=\u20AC.")
|
||||||
|
|
||||||
|
def test_invalid_utf8(self):
|
||||||
|
writer = self.create_writer(0)
|
||||||
|
with self.assertRaises(UnicodeDecodeError):
|
||||||
|
writer.write_utf8(b"invalid=\xFF", -1)
|
||||||
|
|
||||||
|
def test_recover_utf8_error(self):
|
||||||
|
# test recovering from PyUnicodeWriter_WriteUTF8() error
|
||||||
|
writer = self.create_writer(0)
|
||||||
|
writer.write_utf8(b"value=", -1)
|
||||||
|
|
||||||
|
# write fails with an invalid string
|
||||||
|
with self.assertRaises(UnicodeDecodeError):
|
||||||
|
writer.write_utf8(b"invalid\xFF", -1)
|
||||||
|
|
||||||
|
# retry write with a valid string
|
||||||
|
writer.write_utf8(b"valid", -1)
|
||||||
|
|
||||||
|
self.assertEqual(writer.finish(),
|
||||||
|
"value=valid")
|
||||||
|
|
||||||
|
def test_decode_utf8(self):
|
||||||
|
# test PyUnicodeWriter_DecodeUTF8Stateful()
|
||||||
|
writer = self.create_writer(0)
|
||||||
|
writer.decodeutf8stateful(b"ign\xFFore", -1, b"ignore")
|
||||||
|
writer.write_char('-')
|
||||||
|
writer.decodeutf8stateful(b"replace\xFF", -1, b"replace")
|
||||||
|
writer.write_char('-')
|
||||||
|
|
||||||
|
# incomplete trailing UTF-8 sequence
|
||||||
|
writer.decodeutf8stateful(b"incomplete\xC3", -1, b"replace")
|
||||||
|
|
||||||
|
self.assertEqual(writer.finish(),
|
||||||
|
"ignore-replace\uFFFD-incomplete\uFFFD")
|
||||||
|
|
||||||
|
def test_decode_utf8_consumed(self):
|
||||||
|
# test PyUnicodeWriter_DecodeUTF8Stateful() with consumed
|
||||||
|
writer = self.create_writer(0)
|
||||||
|
|
||||||
|
# valid string
|
||||||
|
consumed = writer.decodeutf8stateful(b"text", -1, b"strict", True)
|
||||||
|
self.assertEqual(consumed, 4)
|
||||||
|
writer.write_char('-')
|
||||||
|
|
||||||
|
# non-ASCII
|
||||||
|
consumed = writer.decodeutf8stateful(b"\xC3\xA9-\xE2\x82\xAC", 6, b"strict", True)
|
||||||
|
self.assertEqual(consumed, 6)
|
||||||
|
writer.write_char('-')
|
||||||
|
|
||||||
|
# invalid UTF-8 (consumed is 0 on error)
|
||||||
|
with self.assertRaises(UnicodeDecodeError):
|
||||||
|
writer.decodeutf8stateful(b"invalid\xFF", -1, b"strict", True)
|
||||||
|
|
||||||
|
# ignore error handler
|
||||||
|
consumed = writer.decodeutf8stateful(b"more\xFF", -1, b"ignore", True)
|
||||||
|
self.assertEqual(consumed, 5)
|
||||||
|
writer.write_char('-')
|
||||||
|
|
||||||
|
# incomplete trailing UTF-8 sequence
|
||||||
|
consumed = writer.decodeutf8stateful(b"incomplete\xC3", -1, b"ignore", True)
|
||||||
|
self.assertEqual(consumed, 10)
|
||||||
|
|
||||||
|
self.assertEqual(writer.finish(), "text-\xE9-\u20AC-more-incomplete")
|
||||||
|
|
||||||
|
def test_widechar(self):
|
||||||
|
writer = self.create_writer(0)
|
||||||
|
writer.write_widechar("latin1=\xE9")
|
||||||
|
writer.write_widechar("-")
|
||||||
|
writer.write_widechar("euro=\u20AC")
|
||||||
|
writer.write_char('.')
|
||||||
|
self.assertEqual(writer.finish(), "latin1=\xE9-euro=\u20AC.")
|
||||||
|
|
||||||
|
|
||||||
|
@unittest.skipIf(ctypes is None, 'need ctypes')
|
||||||
|
class PyUnicodeWriterFormatTest(unittest.TestCase):
|
||||||
|
def create_writer(self, size):
|
||||||
|
return _testcapi.PyUnicodeWriter(size)
|
||||||
|
|
||||||
|
def writer_format(self, writer, *args):
|
||||||
|
from ctypes import c_char_p, pythonapi, c_int, c_void_p
|
||||||
|
_PyUnicodeWriter_Format = getattr(pythonapi, "PyUnicodeWriter_Format")
|
||||||
|
_PyUnicodeWriter_Format.argtypes = (c_void_p, c_char_p,)
|
||||||
|
_PyUnicodeWriter_Format.restype = c_int
|
||||||
|
|
||||||
|
if _PyUnicodeWriter_Format(writer.get_pointer(), *args) < 0:
|
||||||
|
raise ValueError("PyUnicodeWriter_Format failed")
|
||||||
|
|
||||||
|
def test_format(self):
|
||||||
|
from ctypes import c_int
|
||||||
|
writer = self.create_writer(0)
|
||||||
|
self.writer_format(writer, b'%s %i', b'abc', c_int(123))
|
||||||
|
writer.write_char('.')
|
||||||
|
self.assertEqual(writer.finish(), 'abc 123.')
|
||||||
|
|
||||||
|
def test_recover_error(self):
|
||||||
|
# test recovering from PyUnicodeWriter_Format() error
|
||||||
|
writer = self.create_writer(0)
|
||||||
|
self.writer_format(writer, b"%s ", b"Hello")
|
||||||
|
|
||||||
|
# PyUnicodeWriter_Format() fails with an invalid format string
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
self.writer_format(writer, b"%s\xff", b"World")
|
||||||
|
|
||||||
|
# Retry PyUnicodeWriter_Format() with a valid format string
|
||||||
|
self.writer_format(writer, b"%s.", b"World")
|
||||||
|
|
||||||
|
self.assertEqual(writer.finish(), 'Hello World.')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -221,370 +221,294 @@ unicode_copycharacters(PyObject *self, PyObject *args)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// --- PyUnicodeWriter type -------------------------------------------------
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
PyObject_HEAD
|
||||||
|
PyUnicodeWriter *writer;
|
||||||
|
} WriterObject;
|
||||||
|
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
test_unicodewriter(PyObject *self, PyObject *Py_UNUSED(args))
|
writer_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||||
{
|
{
|
||||||
PyUnicodeWriter *writer = PyUnicodeWriter_Create(100);
|
WriterObject *self = (WriterObject *)type->tp_alloc(type, 0);
|
||||||
if (writer == NULL) {
|
if (!self) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
self->writer = NULL;
|
||||||
// test PyUnicodeWriter_WriteUTF8()
|
return (PyObject*)self;
|
||||||
if (PyUnicodeWriter_WriteUTF8(writer, "var", -1) < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// test PyUnicodeWriter_WriteChar()
|
|
||||||
if (PyUnicodeWriter_WriteChar(writer, '=') < 0) {
|
static int
|
||||||
goto error;
|
writer_init(PyObject *self_raw, PyObject *args, PyObject *kwargs)
|
||||||
|
{
|
||||||
|
WriterObject *self = (WriterObject *)self_raw;
|
||||||
|
|
||||||
|
Py_ssize_t size;
|
||||||
|
if (!PyArg_ParseTuple(args, "n", &size)) {
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// test PyUnicodeWriter_WriteSubstring()
|
if (self->writer) {
|
||||||
PyObject *str = PyUnicode_FromString("[long]");
|
PyUnicodeWriter_Discard(self->writer);
|
||||||
if (str == NULL) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
int ret = PyUnicodeWriter_WriteSubstring(writer, str, 1, 5);
|
|
||||||
Py_CLEAR(str);
|
|
||||||
if (ret < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// test PyUnicodeWriter_WriteStr()
|
self->writer = PyUnicodeWriter_Create(size);
|
||||||
str = PyUnicode_FromString(" value ");
|
if (self->writer == NULL) {
|
||||||
if (str == NULL) {
|
return -1;
|
||||||
goto error;
|
|
||||||
}
|
}
|
||||||
ret = PyUnicodeWriter_WriteStr(writer, str);
|
return 0;
|
||||||
Py_CLEAR(str);
|
|
||||||
if (ret < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// test PyUnicodeWriter_WriteRepr()
|
|
||||||
str = PyUnicode_FromString("repr");
|
static void
|
||||||
if (str == NULL) {
|
writer_dealloc(PyObject *self_raw)
|
||||||
goto error;
|
{
|
||||||
|
WriterObject *self = (WriterObject *)self_raw;
|
||||||
|
PyTypeObject *tp = Py_TYPE(self);
|
||||||
|
if (self->writer) {
|
||||||
|
PyUnicodeWriter_Discard(self->writer);
|
||||||
}
|
}
|
||||||
ret = PyUnicodeWriter_WriteRepr(writer, str);
|
tp->tp_free(self);
|
||||||
Py_CLEAR(str);
|
Py_DECREF(tp);
|
||||||
if (ret < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
||||||
if (result == NULL) {
|
static inline int
|
||||||
return NULL;
|
writer_check(WriterObject *self)
|
||||||
|
{
|
||||||
|
if (self->writer == NULL) {
|
||||||
|
PyErr_SetString(PyExc_ValueError, "operation on finished writer");
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
assert(PyUnicode_EqualToUTF8(result, "var=long value 'repr'"));
|
return 0;
|
||||||
Py_DECREF(result);
|
|
||||||
|
|
||||||
Py_RETURN_NONE;
|
|
||||||
|
|
||||||
error:
|
|
||||||
PyUnicodeWriter_Discard(writer);
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
test_unicodewriter_utf8(PyObject *self, PyObject *Py_UNUSED(args))
|
writer_write_char(PyObject *self_raw, PyObject *args)
|
||||||
{
|
{
|
||||||
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
WriterObject *self = (WriterObject *)self_raw;
|
||||||
if (writer == NULL) {
|
if (writer_check(self) < 0) {
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
if (PyUnicodeWriter_WriteUTF8(writer, "ascii", -1) < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (PyUnicodeWriter_WriteUTF8(writer, "latin1=\xC3\xA9", -1) < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (PyUnicodeWriter_WriteUTF8(writer, "euro=\xE2\x82\xAC", -1) < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
||||||
if (result == NULL) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
assert(PyUnicode_EqualToUTF8(result,
|
|
||||||
"ascii-latin1=\xC3\xA9-euro=\xE2\x82\xAC."));
|
|
||||||
Py_DECREF(result);
|
|
||||||
|
|
||||||
Py_RETURN_NONE;
|
|
||||||
|
|
||||||
error:
|
|
||||||
PyUnicodeWriter_Discard(writer);
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyObject *str;
|
||||||
static PyObject *
|
if (!PyArg_ParseTuple(args, "U", &str)) {
|
||||||
test_unicodewriter_invalid_utf8(PyObject *self, PyObject *Py_UNUSED(args))
|
|
||||||
{
|
|
||||||
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
|
||||||
if (writer == NULL) {
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
assert(PyUnicodeWriter_WriteUTF8(writer, "invalid=\xFF", -1) < 0);
|
if (PyUnicode_GET_LENGTH(str) != 1) {
|
||||||
PyUnicodeWriter_Discard(writer);
|
PyErr_SetString(PyExc_ValueError, "expect a single character");
|
||||||
|
}
|
||||||
assert(PyErr_ExceptionMatches(PyExc_UnicodeDecodeError));
|
Py_UCS4 ch = PyUnicode_READ_CHAR(str, 0);
|
||||||
PyErr_Clear();
|
|
||||||
|
|
||||||
|
if (PyUnicodeWriter_WriteChar(self->writer, ch) < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
test_unicodewriter_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
|
writer_write_utf8(PyObject *self_raw, PyObject *args)
|
||||||
{
|
{
|
||||||
// test recovering from PyUnicodeWriter_WriteUTF8() error
|
WriterObject *self = (WriterObject *)self_raw;
|
||||||
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
if (writer_check(self) < 0) {
|
||||||
if (writer == NULL) {
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
assert(PyUnicodeWriter_WriteUTF8(writer, "value=", -1) == 0);
|
|
||||||
|
|
||||||
// write fails with an invalid string
|
char *str;
|
||||||
assert(PyUnicodeWriter_WriteUTF8(writer, "invalid\xFF", -1) < 0);
|
Py_ssize_t size;
|
||||||
PyErr_Clear();
|
if (!PyArg_ParseTuple(args, "yn", &str, &size)) {
|
||||||
|
|
||||||
// retry write with a valid string
|
|
||||||
assert(PyUnicodeWriter_WriteUTF8(writer, "valid", -1) == 0);
|
|
||||||
|
|
||||||
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
||||||
if (result == NULL) {
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
assert(PyUnicode_EqualToUTF8(result, "value=valid"));
|
|
||||||
Py_DECREF(result);
|
|
||||||
|
|
||||||
|
if (PyUnicodeWriter_WriteUTF8(self->writer, str, size) < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
test_unicodewriter_decode_utf8(PyObject *self, PyObject *Py_UNUSED(args))
|
writer_write_widechar(PyObject *self_raw, PyObject *args)
|
||||||
{
|
{
|
||||||
// test PyUnicodeWriter_DecodeUTF8Stateful()
|
WriterObject *self = (WriterObject *)self_raw;
|
||||||
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
if (writer_check(self) < 0) {
|
||||||
if (writer == NULL) {
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "ign\xFFore", -1, "ignore", NULL) < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "replace\xFF", -1, "replace", NULL) < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
// incomplete trailing UTF-8 sequence
|
PyObject *str;
|
||||||
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "incomplete\xC3", -1, "replace", NULL) < 0) {
|
if (!PyArg_ParseTuple(args, "U", &str)) {
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
||||||
if (result == NULL) {
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
assert(PyUnicode_EqualToUTF8(result,
|
|
||||||
"ignore-replace\xef\xbf\xbd"
|
|
||||||
"-incomplete\xef\xbf\xbd"));
|
|
||||||
Py_DECREF(result);
|
|
||||||
|
|
||||||
|
Py_ssize_t size;
|
||||||
|
wchar_t *wstr = PyUnicode_AsWideCharString(str, &size);
|
||||||
|
if (wstr == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int res = PyUnicodeWriter_WriteWideChar(self->writer, wstr, size);
|
||||||
|
PyMem_Free(wstr);
|
||||||
|
if (res < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
|
|
||||||
error:
|
|
||||||
PyUnicodeWriter_Discard(writer);
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
test_unicodewriter_decode_utf8_consumed(PyObject *self, PyObject *Py_UNUSED(args))
|
writer_write_str(PyObject *self_raw, PyObject *args)
|
||||||
{
|
{
|
||||||
// test PyUnicodeWriter_DecodeUTF8Stateful()
|
WriterObject *self = (WriterObject *)self_raw;
|
||||||
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
if (writer_check(self) < 0) {
|
||||||
if (writer == NULL) {
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
Py_ssize_t consumed;
|
|
||||||
|
|
||||||
// valid string
|
PyObject *obj;
|
||||||
consumed = 12345;
|
if (!PyArg_ParseTuple(args, "O", &obj)) {
|
||||||
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "text", -1, NULL, &consumed) < 0) {
|
return NULL;
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
assert(consumed == 4);
|
|
||||||
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// non-ASCII
|
if (PyUnicodeWriter_WriteStr(self->writer, obj) < 0) {
|
||||||
consumed = 12345;
|
return NULL;
|
||||||
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "\xC3\xA9-\xE2\x82\xAC", 6, NULL, &consumed) < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
}
|
||||||
assert(consumed == 6);
|
Py_RETURN_NONE;
|
||||||
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// consumed is 0 if write fails
|
|
||||||
consumed = 12345;
|
static PyObject*
|
||||||
assert(PyUnicodeWriter_DecodeUTF8Stateful(writer, "invalid\xFF", -1, NULL, &consumed) < 0);
|
writer_write_repr(PyObject *self_raw, PyObject *args)
|
||||||
PyErr_Clear();
|
{
|
||||||
|
WriterObject *self = (WriterObject *)self_raw;
|
||||||
|
if (writer_check(self) < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *obj;
|
||||||
|
if (!PyArg_ParseTuple(args, "O", &obj)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PyUnicodeWriter_WriteRepr(self->writer, obj) < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
writer_write_substring(PyObject *self_raw, PyObject *args)
|
||||||
|
{
|
||||||
|
WriterObject *self = (WriterObject *)self_raw;
|
||||||
|
if (writer_check(self) < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *str;
|
||||||
|
Py_ssize_t start, end;
|
||||||
|
if (!PyArg_ParseTuple(args, "Unn", &str, &start, &end)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PyUnicodeWriter_WriteSubstring(self->writer, str, start, end) < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
writer_decodeutf8stateful(PyObject *self_raw, PyObject *args)
|
||||||
|
{
|
||||||
|
WriterObject *self = (WriterObject *)self_raw;
|
||||||
|
if (writer_check(self) < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *str;
|
||||||
|
Py_ssize_t len;
|
||||||
|
const char *errors;
|
||||||
|
int use_consumed = 0;
|
||||||
|
if (!PyArg_ParseTuple(args, "yny|i", &str, &len, &errors, &use_consumed)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_ssize_t consumed = 12345;
|
||||||
|
Py_ssize_t *pconsumed = use_consumed ? &consumed : NULL;
|
||||||
|
if (PyUnicodeWriter_DecodeUTF8Stateful(self->writer, str, len,
|
||||||
|
errors, pconsumed) < 0) {
|
||||||
|
if (use_consumed) {
|
||||||
assert(consumed == 0);
|
assert(consumed == 0);
|
||||||
|
|
||||||
// ignore error handler
|
|
||||||
consumed = 12345;
|
|
||||||
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "more\xFF", -1, "ignore", &consumed) < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
}
|
||||||
assert(consumed == 5);
|
|
||||||
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
// incomplete trailing UTF-8 sequence
|
|
||||||
consumed = 12345;
|
|
||||||
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "incomplete\xC3", -1, "ignore", &consumed) < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
assert(consumed == 10);
|
|
||||||
|
|
||||||
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
||||||
if (result == NULL) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
assert(PyUnicode_EqualToUTF8(result,
|
|
||||||
"text-\xC3\xA9-\xE2\x82\xAC-"
|
|
||||||
"more-incomplete"));
|
|
||||||
Py_DECREF(result);
|
|
||||||
|
|
||||||
Py_RETURN_NONE;
|
|
||||||
|
|
||||||
error:
|
|
||||||
PyUnicodeWriter_Discard(writer);
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (use_consumed) {
|
||||||
static PyObject *
|
return PyLong_FromSsize_t(consumed);
|
||||||
test_unicodewriter_format(PyObject *self, PyObject *Py_UNUSED(args))
|
|
||||||
{
|
|
||||||
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
|
||||||
if (writer == NULL) {
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// test PyUnicodeWriter_Format()
|
|
||||||
if (PyUnicodeWriter_Format(writer, "%s %i", "Hello", 123) < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
// test PyUnicodeWriter_WriteChar()
|
|
||||||
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
||||||
if (result == NULL) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
assert(PyUnicode_EqualToUTF8(result, "Hello 123."));
|
|
||||||
Py_DECREF(result);
|
|
||||||
|
|
||||||
Py_RETURN_NONE;
|
|
||||||
|
|
||||||
error:
|
|
||||||
PyUnicodeWriter_Discard(writer);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static PyObject *
|
|
||||||
test_unicodewriter_format_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
|
|
||||||
{
|
|
||||||
// test recovering from PyUnicodeWriter_Format() error
|
|
||||||
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
|
||||||
if (writer == NULL) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(PyUnicodeWriter_Format(writer, "%s ", "Hello") == 0);
|
|
||||||
|
|
||||||
// PyUnicodeWriter_Format() fails with an invalid format string
|
|
||||||
assert(PyUnicodeWriter_Format(writer, "%s\xff", "World") < 0);
|
|
||||||
PyErr_Clear();
|
|
||||||
|
|
||||||
// Retry PyUnicodeWriter_Format() with a valid format string
|
|
||||||
assert(PyUnicodeWriter_Format(writer, "%s.", "World") == 0);
|
|
||||||
|
|
||||||
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
||||||
if (result == NULL) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
assert(PyUnicode_EqualToUTF8(result, "Hello World."));
|
|
||||||
Py_DECREF(result);
|
|
||||||
|
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
test_unicodewriter_widechar(PyObject *self, PyObject *Py_UNUSED(args))
|
writer_get_pointer(PyObject *self_raw, PyObject *args)
|
||||||
{
|
{
|
||||||
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
WriterObject *self = (WriterObject *)self_raw;
|
||||||
if (writer == NULL) {
|
if (writer_check(self) < 0) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (PyUnicodeWriter_WriteWideChar(writer, L"latin1=\xE9 IGNORED", 8) < 0) {
|
|
||||||
goto error;
|
return PyLong_FromVoidPtr(self->writer);
|
||||||
}
|
|
||||||
if (PyUnicodeWriter_WriteWideChar(writer, L"-", 1) < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (PyUnicodeWriter_WriteWideChar(writer, L"euro=\u20AC", -1) < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
||||||
if (result == NULL) {
|
static PyObject*
|
||||||
|
writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args))
|
||||||
|
{
|
||||||
|
WriterObject *self = (WriterObject *)self_raw;
|
||||||
|
if (writer_check(self) < 0) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
assert(PyUnicode_EqualToUTF8(result,
|
|
||||||
"latin1=\xC3\xA9-euro=\xE2\x82\xAC."));
|
|
||||||
Py_DECREF(result);
|
|
||||||
|
|
||||||
Py_RETURN_NONE;
|
PyObject *str = PyUnicodeWriter_Finish(self->writer);
|
||||||
|
self->writer = NULL;
|
||||||
error:
|
return str;
|
||||||
PyUnicodeWriter_Discard(writer);
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyMethodDef writer_methods[] = {
|
||||||
|
{"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS},
|
||||||
|
{"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS},
|
||||||
|
{"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS},
|
||||||
|
{"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS},
|
||||||
|
{"write_repr", _PyCFunction_CAST(writer_write_repr), METH_VARARGS},
|
||||||
|
{"write_substring", _PyCFunction_CAST(writer_write_substring), METH_VARARGS},
|
||||||
|
{"decodeutf8stateful", _PyCFunction_CAST(writer_decodeutf8stateful), METH_VARARGS},
|
||||||
|
{"get_pointer", _PyCFunction_CAST(writer_get_pointer), METH_VARARGS},
|
||||||
|
{"finish", _PyCFunction_CAST(writer_finish), METH_NOARGS},
|
||||||
|
{NULL, NULL} /* sentinel */
|
||||||
|
};
|
||||||
|
|
||||||
|
static PyType_Slot Writer_Type_slots[] = {
|
||||||
|
{Py_tp_new, writer_new},
|
||||||
|
{Py_tp_init, writer_init},
|
||||||
|
{Py_tp_dealloc, writer_dealloc},
|
||||||
|
{Py_tp_methods, writer_methods},
|
||||||
|
{0, 0}, /* sentinel */
|
||||||
|
};
|
||||||
|
|
||||||
|
static PyType_Spec Writer_spec = {
|
||||||
|
.name = "_testcapi.PyUnicodeWriter",
|
||||||
|
.basicsize = sizeof(WriterObject),
|
||||||
|
.flags = Py_TPFLAGS_DEFAULT,
|
||||||
|
.slots = Writer_Type_slots,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
static PyMethodDef TestMethods[] = {
|
static PyMethodDef TestMethods[] = {
|
||||||
{"unicode_new", unicode_new, METH_VARARGS},
|
{"unicode_new", unicode_new, METH_VARARGS},
|
||||||
{"unicode_fill", unicode_fill, METH_VARARGS},
|
{"unicode_fill", unicode_fill, METH_VARARGS},
|
||||||
|
@ -593,15 +517,6 @@ static PyMethodDef TestMethods[] = {
|
||||||
{"unicode_asucs4copy", unicode_asucs4copy, METH_VARARGS},
|
{"unicode_asucs4copy", unicode_asucs4copy, METH_VARARGS},
|
||||||
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
|
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
|
||||||
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
|
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
|
||||||
{"test_unicodewriter", test_unicodewriter, METH_NOARGS},
|
|
||||||
{"test_unicodewriter_utf8", test_unicodewriter_utf8, METH_NOARGS},
|
|
||||||
{"test_unicodewriter_invalid_utf8", test_unicodewriter_invalid_utf8, METH_NOARGS},
|
|
||||||
{"test_unicodewriter_recover_error", test_unicodewriter_recover_error, METH_NOARGS},
|
|
||||||
{"test_unicodewriter_decode_utf8", test_unicodewriter_decode_utf8, METH_NOARGS},
|
|
||||||
{"test_unicodewriter_decode_utf8_consumed", test_unicodewriter_decode_utf8_consumed, METH_NOARGS},
|
|
||||||
{"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS},
|
|
||||||
{"test_unicodewriter_format_recover_error", test_unicodewriter_format_recover_error, METH_NOARGS},
|
|
||||||
{"test_unicodewriter_widechar", test_unicodewriter_widechar, METH_NOARGS},
|
|
||||||
{NULL},
|
{NULL},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -610,5 +525,16 @@ _PyTestCapi_Init_Unicode(PyObject *m) {
|
||||||
if (PyModule_AddFunctions(m, TestMethods) < 0) {
|
if (PyModule_AddFunctions(m, TestMethods) < 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyTypeObject *writer_type = (PyTypeObject *)PyType_FromSpec(&Writer_spec);
|
||||||
|
if (writer_type == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (PyModule_AddType(m, writer_type) < 0) {
|
||||||
|
Py_DECREF(writer_type);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
Py_DECREF(writer_type);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue