mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
Add _PyUnicodeWriter_WriteASCIIString() function
This commit is contained in:
parent
4d3f109ad3
commit
4a58707a34
4 changed files with 96 additions and 38 deletions
|
@ -962,12 +962,20 @@ _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
|
||||||
Py_ssize_t end
|
Py_ssize_t end
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/* Append a ASCII-encoded byte string.
|
||||||
|
Return 0 on success, raise an exception and return -1 on error. */
|
||||||
|
PyAPI_FUNC(int)
|
||||||
|
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
|
||||||
|
const char *str, /* ASCII-encoded byte string */
|
||||||
|
Py_ssize_t len /* number of bytes, or -1 if unknown */
|
||||||
|
);
|
||||||
|
|
||||||
/* Append a latin1-encoded byte string.
|
/* Append a latin1-encoded byte string.
|
||||||
Return 0 on success, raise an exception and return -1 on error. */
|
Return 0 on success, raise an exception and return -1 on error. */
|
||||||
PyAPI_FUNC(int)
|
PyAPI_FUNC(int)
|
||||||
_PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer,
|
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
|
||||||
const char *str, /* latin1-encoded byte string */
|
const char *str, /* latin1-encoded byte string */
|
||||||
Py_ssize_t len /* length in bytes */
|
Py_ssize_t len /* length in bytes */
|
||||||
);
|
);
|
||||||
|
|
||||||
/* Get the value of the writer as an Unicode string. Clear the
|
/* Get the value of the writer as an Unicode string. Clear the
|
||||||
|
@ -979,6 +987,9 @@ _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
|
||||||
/* Deallocate memory of a writer (clear its internal buffer). */
|
/* Deallocate memory of a writer (clear its internal buffer). */
|
||||||
PyAPI_FUNC(void)
|
PyAPI_FUNC(void)
|
||||||
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
|
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
|
||||||
|
|
||||||
|
PyAPI_FUNC(int) _PyObject_ReprWriter(_PyUnicodeWriter *writer,
|
||||||
|
PyObject *v);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef Py_LIMITED_API
|
#ifndef Py_LIMITED_API
|
||||||
|
|
|
@ -339,19 +339,12 @@ list_repr(PyListObject *v)
|
||||||
{
|
{
|
||||||
Py_ssize_t i;
|
Py_ssize_t i;
|
||||||
PyObject *s;
|
PyObject *s;
|
||||||
static PyObject *sep = NULL;
|
|
||||||
_PyUnicodeWriter writer;
|
_PyUnicodeWriter writer;
|
||||||
|
|
||||||
if (Py_SIZE(v) == 0) {
|
if (Py_SIZE(v) == 0) {
|
||||||
return PyUnicode_FromString("[]");
|
return PyUnicode_FromString("[]");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sep == NULL) {
|
|
||||||
sep = PyUnicode_FromString(", ");
|
|
||||||
if (sep == NULL)
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
i = Py_ReprEnter((PyObject*)v);
|
i = Py_ReprEnter((PyObject*)v);
|
||||||
if (i != 0) {
|
if (i != 0) {
|
||||||
return i > 0 ? PyUnicode_FromString("[...]") : NULL;
|
return i > 0 ? PyUnicode_FromString("[...]") : NULL;
|
||||||
|
@ -369,7 +362,7 @@ list_repr(PyListObject *v)
|
||||||
so must refetch the list size on each iteration. */
|
so must refetch the list size on each iteration. */
|
||||||
for (i = 0; i < Py_SIZE(v); ++i) {
|
for (i = 0; i < Py_SIZE(v); ++i) {
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
if (_PyUnicodeWriter_WriteStr(&writer, sep) < 0)
|
if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0)
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -140,9 +140,9 @@ extern "C" {
|
||||||
buffer where the result characters are written to. */
|
buffer where the result characters are written to. */
|
||||||
#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
|
#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
|
||||||
do { \
|
do { \
|
||||||
to_type *_to = (to_type *) to; \
|
to_type *_to = (to_type *)(to); \
|
||||||
const from_type *_iter = (begin); \
|
const from_type *_iter = (from_type *)(begin); \
|
||||||
const from_type *_end = (end); \
|
const from_type *_end = (from_type *)(end); \
|
||||||
Py_ssize_t n = (_end) - (_iter); \
|
Py_ssize_t n = (_end) - (_iter); \
|
||||||
const from_type *_unrolled_end = \
|
const from_type *_unrolled_end = \
|
||||||
_iter + _Py_SIZE_ROUND_DOWN(n, 4); \
|
_iter + _Py_SIZE_ROUND_DOWN(n, 4); \
|
||||||
|
@ -2562,7 +2562,6 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
|
||||||
precision = len;
|
precision = len;
|
||||||
|
|
||||||
arglen = Py_MAX(precision, width);
|
arglen = Py_MAX(precision, width);
|
||||||
assert(ucs1lib_find_max_char((Py_UCS1*)buffer, (Py_UCS1*)buffer + len) <= 127);
|
|
||||||
if (_PyUnicodeWriter_Prepare(writer, arglen, 127) == -1)
|
if (_PyUnicodeWriter_Prepare(writer, arglen, 127) == -1)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -2581,8 +2580,8 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
|
||||||
writer->pos += fill;
|
writer->pos += fill;
|
||||||
}
|
}
|
||||||
|
|
||||||
unicode_write_cstr(writer->buffer, writer->pos, buffer, len);
|
if (_PyUnicodeWriter_WriteASCIIString(writer, buffer, len) < 0)
|
||||||
writer->pos += len;
|
return NULL;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2604,11 +2603,8 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
|
||||||
len += 2;
|
len += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(ucs1lib_find_max_char((Py_UCS1*)number, (Py_UCS1*)number + len) <= 127);
|
if (_PyUnicodeWriter_WriteASCIIString(writer, number, len) < 0)
|
||||||
if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
|
|
||||||
return NULL;
|
return NULL;
|
||||||
unicode_write_cstr(writer->buffer, writer->pos, number, len);
|
|
||||||
writer->pos += len;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2707,7 +2703,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
|
||||||
skip the code, since there's no way to know what's in the
|
skip the code, since there's no way to know what's in the
|
||||||
argument list) */
|
argument list) */
|
||||||
len = strlen(p);
|
len = strlen(p);
|
||||||
if (_PyUnicodeWriter_WriteCstr(writer, p, len) == -1)
|
if (_PyUnicodeWriter_WriteLatin1String(writer, p, len) == -1)
|
||||||
return NULL;
|
return NULL;
|
||||||
f = p+len;
|
f = p+len;
|
||||||
return f;
|
return f;
|
||||||
|
@ -2759,10 +2755,9 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
|
|
||||||
if (*p == '\0')
|
if (*p == '\0')
|
||||||
writer.overallocate = 0;
|
writer.overallocate = 0;
|
||||||
if (_PyUnicodeWriter_Prepare(&writer, len, 127) == -1)
|
|
||||||
|
if (_PyUnicodeWriter_WriteASCIIString(&writer, f, len) < 0)
|
||||||
goto fail;
|
goto fail;
|
||||||
unicode_write_cstr(writer.buffer, writer.pos, f, len);
|
|
||||||
writer.pos += len;
|
|
||||||
|
|
||||||
f = p;
|
f = p;
|
||||||
}
|
}
|
||||||
|
@ -13461,7 +13456,68 @@ _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
_PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, const char *str, Py_ssize_t len)
|
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
|
||||||
|
const char *ascii, Py_ssize_t len)
|
||||||
|
{
|
||||||
|
if (len == -1)
|
||||||
|
len = strlen(ascii);
|
||||||
|
|
||||||
|
assert(ucs1lib_find_max_char((Py_UCS1*)ascii, (Py_UCS1*)ascii + len) < 128);
|
||||||
|
|
||||||
|
if (writer->buffer == NULL && !writer->overallocate) {
|
||||||
|
PyObject *str;
|
||||||
|
|
||||||
|
str = _PyUnicode_FromASCII(ascii, len);
|
||||||
|
if (str == NULL)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
writer->readonly = 1;
|
||||||
|
writer->buffer = str;
|
||||||
|
_PyUnicodeWriter_Update(writer);
|
||||||
|
writer->pos += len;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
switch (writer->kind)
|
||||||
|
{
|
||||||
|
case PyUnicode_1BYTE_KIND:
|
||||||
|
{
|
||||||
|
const Py_UCS1 *str = (const Py_UCS1 *)ascii;
|
||||||
|
Py_UCS1 *data = writer->data;
|
||||||
|
|
||||||
|
Py_MEMCPY(data + writer->pos, str, len);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PyUnicode_2BYTE_KIND:
|
||||||
|
{
|
||||||
|
_PyUnicode_CONVERT_BYTES(
|
||||||
|
Py_UCS1, Py_UCS2,
|
||||||
|
ascii, ascii + len,
|
||||||
|
(Py_UCS2 *)writer->data + writer->pos);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PyUnicode_4BYTE_KIND:
|
||||||
|
{
|
||||||
|
_PyUnicode_CONVERT_BYTES(
|
||||||
|
Py_UCS1, Py_UCS4,
|
||||||
|
ascii, ascii + len,
|
||||||
|
(Py_UCS4 *)writer->data + writer->pos);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
writer->pos += len;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
|
||||||
|
const char *str, Py_ssize_t len)
|
||||||
{
|
{
|
||||||
Py_UCS4 maxchar;
|
Py_UCS4 maxchar;
|
||||||
|
|
||||||
|
@ -13828,12 +13884,10 @@ formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
|
||||||
return -1;
|
return -1;
|
||||||
len = strlen(p);
|
len = strlen(p);
|
||||||
if (writer) {
|
if (writer) {
|
||||||
if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) {
|
if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
|
||||||
PyMem_Free(p);
|
PyMem_Free(p);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
unicode_write_cstr(writer->buffer, writer->pos, p, len);
|
|
||||||
writer->pos += len;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
*p_output = _PyUnicode_FromASCII(p, len);
|
*p_output = _PyUnicode_FromASCII(p, len);
|
||||||
|
|
|
@ -1053,24 +1053,24 @@ format_float_internal(PyObject *value,
|
||||||
n_digits += 1;
|
n_digits += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Since there is no unicode version of PyOS_double_to_string,
|
|
||||||
just use the 8 bit version and then convert to unicode. */
|
|
||||||
unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
|
|
||||||
PyMem_Free(buf);
|
|
||||||
if (unicode_tmp == NULL)
|
|
||||||
goto done;
|
|
||||||
|
|
||||||
if (format->sign != '+' && format->sign != ' '
|
if (format->sign != '+' && format->sign != ' '
|
||||||
&& format->width == -1
|
&& format->width == -1
|
||||||
&& format->type != 'n'
|
&& format->type != 'n'
|
||||||
&& !format->thousands_separators)
|
&& !format->thousands_separators)
|
||||||
{
|
{
|
||||||
/* Fast path */
|
/* Fast path */
|
||||||
result = _PyUnicodeWriter_WriteStr(writer, unicode_tmp);
|
result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
|
||||||
Py_DECREF(unicode_tmp);
|
PyMem_Free(buf);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Since there is no unicode version of PyOS_double_to_string,
|
||||||
|
just use the 8 bit version and then convert to unicode. */
|
||||||
|
unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
|
||||||
|
PyMem_Free(buf);
|
||||||
|
if (unicode_tmp == NULL)
|
||||||
|
goto done;
|
||||||
|
|
||||||
/* Is a sign character present in the output? If so, remember it
|
/* Is a sign character present in the output? If so, remember it
|
||||||
and skip it */
|
and skip it */
|
||||||
index = 0;
|
index = 0;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue