mirror of
https://github.com/python/cpython.git
synced 2025-10-14 02:43:49 +00:00
SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support
decoding incomplete input (when the input stream is temporarily exhausted). codecs.StreamReader now implements buffering, which enables proper readline support for the UTF-16 decoders. codecs.StreamReader.read() has a new argument chars which specifies the number of characters to return. codecs.StreamReader.readline() and codecs.StreamReader.readlines() have a new argument keepends. Trailing "\n"s will be stripped from the lines if keepends is false. Added C APIs PyUnicode_DecodeUTF8Stateful and PyUnicode_DecodeUTF16Stateful.
This commit is contained in:
parent
a708d6e3b0
commit
69652035bc
12 changed files with 419 additions and 173 deletions
|
@ -160,7 +160,9 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
|||
# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
|
||||
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
|
||||
# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
|
||||
# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
|
||||
# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
|
||||
# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
|
||||
# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
|
||||
# define PyUnicode_Encode PyUnicodeUCS2_Encode
|
||||
# define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
|
||||
|
@ -233,7 +235,9 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
|||
# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
|
||||
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
|
||||
# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
|
||||
# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
|
||||
# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
|
||||
# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
|
||||
# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
|
||||
# define PyUnicode_Encode PyUnicodeUCS4_Encode
|
||||
# define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
|
||||
|
@ -658,6 +662,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
|
|||
const char *errors /* error handling */
|
||||
);
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
|
||||
const char *string, /* UTF-8 encoded string */
|
||||
int length, /* size of string */
|
||||
const char *errors, /* error handling */
|
||||
int *consumed /* bytes consumed */
|
||||
);
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
|
||||
PyObject *unicode /* Unicode object */
|
||||
);
|
||||
|
@ -702,6 +713,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
|
|||
exit */
|
||||
);
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
|
||||
const char *string, /* UTF-16 encoded string */
|
||||
int length, /* size of string */
|
||||
const char *errors, /* error handling */
|
||||
int *byteorder, /* pointer to byteorder to use
|
||||
0=native;-1=LE,1=BE; updated on
|
||||
exit */
|
||||
int *consumed /* bytes consumed */
|
||||
);
|
||||
|
||||
/* Returns a Python string using the UTF-16 encoding in native byte
|
||||
order. The string always starts with a BOM mark. */
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue