mirror of
https://github.com/python/cpython.git
synced 2025-07-17 00:05:20 +00:00
Add 'U'/'U#' format characters to Py_BuildValue (and thus
to PyObject_CallFunction()) that take a char * (and a size in the case of 'U#') and create a unicode object out of it. Add functions PyUnicode_FromFormat() and PyUnicode_FromFormatV() that work similar to PyString_FromFormat(), but create a unicode object (also a %U format character has been added, that takes a PyObject *, which must point to a unicode object). Change the encoding and reason attributes of UnicodeEncodeError, UnicodeDecodeError and UnicodeTranslateError to be unicode objects.
This commit is contained in:
parent
5550731d9c
commit
d2034310d6
6 changed files with 376 additions and 113 deletions
|
@ -393,15 +393,9 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
|
|||
return (PyObject *)unicode;
|
||||
}
|
||||
|
||||
PyObject *PyUnicode_FromString(const char *u)
|
||||
PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
|
||||
{
|
||||
PyUnicodeObject *unicode;
|
||||
size_t size = strlen(u);
|
||||
if (size > PY_SSIZE_T_MAX) {
|
||||
PyErr_SetString(PyExc_OverflowError, "input too long");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* If the Unicode data is known at construction time, we can apply
|
||||
some optimizations which share commonly used objects. */
|
||||
if (u != NULL) {
|
||||
|
@ -441,6 +435,17 @@ PyObject *PyUnicode_FromString(const char *u)
|
|||
return (PyObject *)unicode;
|
||||
}
|
||||
|
||||
PyObject *PyUnicode_FromString(const char *u)
|
||||
{
|
||||
size_t size = strlen(u);
|
||||
if (size > PY_SSIZE_T_MAX) {
|
||||
PyErr_SetString(PyExc_OverflowError, "input too long");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return PyUnicode_FromStringAndSize(u, size);
|
||||
}
|
||||
|
||||
#ifdef HAVE_WCHAR_H
|
||||
|
||||
PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
|
||||
|
@ -473,6 +478,223 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
|
|||
return (PyObject *)unicode;
|
||||
}
|
||||
|
||||
#define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;}
|
||||
|
||||
PyObject *
|
||||
PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||
{
|
||||
va_list count;
|
||||
Py_ssize_t n = 0;
|
||||
const char* f;
|
||||
Py_UNICODE *s;
|
||||
PyObject *string;
|
||||
/* used by sprintf */
|
||||
char buffer[21];
|
||||
const char *copy;
|
||||
|
||||
#ifdef VA_LIST_IS_ARRAY
|
||||
Py_MEMCPY(count, vargs, sizeof(va_list));
|
||||
#else
|
||||
#ifdef __va_copy
|
||||
__va_copy(count, vargs);
|
||||
#else
|
||||
count = vargs;
|
||||
#endif
|
||||
#endif
|
||||
/* step 1: figure out how large a buffer we need */
|
||||
for (f = format; *f; f++) {
|
||||
if (*f == '%') {
|
||||
const char* p = f;
|
||||
while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
|
||||
;
|
||||
|
||||
/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
|
||||
* they don't affect the amount of space we reserve.
|
||||
*/
|
||||
if ((*f == 'l' || *f == 'z') &&
|
||||
(f[1] == 'd' || f[1] == 'u'))
|
||||
++f;
|
||||
|
||||
switch (*f) {
|
||||
case 'c':
|
||||
(void)va_arg(count, int);
|
||||
/* fall through... */
|
||||
case '%':
|
||||
n++;
|
||||
break;
|
||||
case 'd': case 'u': case 'i': case 'x':
|
||||
(void) va_arg(count, int);
|
||||
/* 20 bytes is enough to hold a 64-bit
|
||||
integer. Decimal takes the most space.
|
||||
This isn't enough for octal. */
|
||||
n += 20;
|
||||
break;
|
||||
case 's':
|
||||
n += strlen(va_arg(count, char*));
|
||||
break;
|
||||
case 'U':
|
||||
{
|
||||
PyObject *obj = va_arg(count, PyObject *);
|
||||
assert(obj && PyUnicode_Check(obj));
|
||||
n += PyUnicode_GET_SIZE(obj);
|
||||
break;
|
||||
}
|
||||
case 'p':
|
||||
(void) va_arg(count, int);
|
||||
/* maximum 64-bit pointer representation:
|
||||
* 0xffffffffffffffff
|
||||
* so 19 characters is enough.
|
||||
* XXX I count 18 -- what's the extra for?
|
||||
*/
|
||||
n += 19;
|
||||
break;
|
||||
default:
|
||||
/* if we stumble upon an unknown
|
||||
formatting code, copy the rest of
|
||||
the format string to the output
|
||||
string. (we cannot just skip the
|
||||
code, since there's no way to know
|
||||
what's in the argument list) */
|
||||
n += strlen(p);
|
||||
goto expand;
|
||||
}
|
||||
} else
|
||||
n++;
|
||||
}
|
||||
expand:
|
||||
/* step 2: fill the buffer */
|
||||
/* Since we've analyzed how much space we need for the worst case,
|
||||
we don't have to resize the string. */
|
||||
string = PyUnicode_FromUnicode(NULL, n);
|
||||
if (!string)
|
||||
return NULL;
|
||||
|
||||
s = PyUnicode_AS_UNICODE(string);
|
||||
|
||||
for (f = format; *f; f++) {
|
||||
if (*f == '%') {
|
||||
const char* p = f++;
|
||||
int longflag = 0;
|
||||
int size_tflag = 0;
|
||||
/* parse the width.precision part (we're only
|
||||
interested in the precision value, if any) */
|
||||
n = 0;
|
||||
while (isdigit(Py_CHARMASK(*f)))
|
||||
n = (n*10) + *f++ - '0';
|
||||
if (*f == '.') {
|
||||
f++;
|
||||
n = 0;
|
||||
while (isdigit(Py_CHARMASK(*f)))
|
||||
n = (n*10) + *f++ - '0';
|
||||
}
|
||||
while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
|
||||
f++;
|
||||
/* handle the long flag, but only for %ld and %lu.
|
||||
others can be added when necessary. */
|
||||
if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
|
||||
longflag = 1;
|
||||
++f;
|
||||
}
|
||||
/* handle the size_t flag. */
|
||||
if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
|
||||
size_tflag = 1;
|
||||
++f;
|
||||
}
|
||||
|
||||
switch (*f) {
|
||||
case 'c':
|
||||
*s++ = va_arg(vargs, int);
|
||||
break;
|
||||
case 'd':
|
||||
if (longflag)
|
||||
sprintf(buffer, "%ld", va_arg(vargs, long));
|
||||
else if (size_tflag)
|
||||
sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
|
||||
va_arg(vargs, Py_ssize_t));
|
||||
else
|
||||
sprintf(buffer, "%d", va_arg(vargs, int));
|
||||
appendstring(buffer);
|
||||
break;
|
||||
case 'u':
|
||||
if (longflag)
|
||||
sprintf(buffer, "%lu",
|
||||
va_arg(vargs, unsigned long));
|
||||
else if (size_tflag)
|
||||
sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
|
||||
va_arg(vargs, size_t));
|
||||
else
|
||||
sprintf(buffer, "%u",
|
||||
va_arg(vargs, unsigned int));
|
||||
appendstring(buffer);
|
||||
break;
|
||||
case 'i':
|
||||
sprintf(buffer, "%i", va_arg(vargs, int));
|
||||
appendstring(buffer);
|
||||
break;
|
||||
case 'x':
|
||||
sprintf(buffer, "%x", va_arg(vargs, int));
|
||||
appendstring(buffer);
|
||||
break;
|
||||
case 's':
|
||||
p = va_arg(vargs, char*);
|
||||
appendstring(p);
|
||||
break;
|
||||
case 'U':
|
||||
{
|
||||
PyObject *obj = va_arg(vargs, PyObject *);
|
||||
Py_UNICODE *ucopy = PyUnicode_AS_UNICODE(obj);
|
||||
Py_ssize_t usize = PyUnicode_GET_SIZE(obj);
|
||||
Py_ssize_t upos;
|
||||
for (upos = 0; upos<usize;)
|
||||
*s++ = ucopy[upos++];
|
||||
break;
|
||||
}
|
||||
case 'p':
|
||||
sprintf(buffer, "%p", va_arg(vargs, void*));
|
||||
/* %p is ill-defined: ensure leading 0x. */
|
||||
if (buffer[1] == 'X')
|
||||
buffer[1] = 'x';
|
||||
else if (buffer[1] != 'x') {
|
||||
memmove(buffer+2, buffer, strlen(buffer)+1);
|
||||
buffer[0] = '0';
|
||||
buffer[1] = 'x';
|
||||
}
|
||||
appendstring(buffer);
|
||||
break;
|
||||
case '%':
|
||||
*s++ = '%';
|
||||
break;
|
||||
default:
|
||||
appendstring(p);
|
||||
goto end;
|
||||
}
|
||||
} else
|
||||
*s++ = *f;
|
||||
}
|
||||
|
||||
end:
|
||||
_PyUnicode_Resize(&string, s - PyUnicode_AS_UNICODE(string));
|
||||
return string;
|
||||
}
|
||||
|
||||
#undef appendstring
|
||||
|
||||
PyObject *
|
||||
PyUnicode_FromFormat(const char *format, ...)
|
||||
{
|
||||
PyObject* ret;
|
||||
va_list vargs;
|
||||
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
va_start(vargs, format);
|
||||
#else
|
||||
va_start(vargs);
|
||||
#endif
|
||||
ret = PyUnicode_FromFormatV(format, vargs);
|
||||
va_end(vargs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode,
|
||||
wchar_t *w,
|
||||
Py_ssize_t size)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue