mirror of
https://github.com/python/cpython.git
synced 2025-07-28 21:55:21 +00:00
Backport r57105 and r57145 from the py3k branch: UTF-32 codecs.
This commit is contained in:
parent
437e6a3b15
commit
6e39080649
12 changed files with 999 additions and 2 deletions
|
@ -145,6 +145,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
|||
# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
|
||||
# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
|
||||
# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
|
||||
# define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String
|
||||
# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
|
||||
# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
|
||||
# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
|
||||
|
@ -159,6 +160,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
|||
# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
|
||||
# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
|
||||
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
|
||||
# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
|
||||
# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
|
||||
# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
|
||||
# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
|
||||
# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
|
||||
|
@ -170,6 +173,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
|||
# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
|
||||
# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
|
||||
# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
|
||||
# define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
|
||||
# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
|
||||
# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
|
||||
# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
|
||||
|
@ -223,6 +227,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
|||
# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
|
||||
# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
|
||||
# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
|
||||
# define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String
|
||||
# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
|
||||
# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
|
||||
# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
|
||||
|
@ -237,6 +242,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
|||
# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
|
||||
# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
|
||||
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
|
||||
# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
|
||||
# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
|
||||
# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
|
||||
# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
|
||||
# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
|
||||
|
@ -248,6 +255,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
|||
# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
|
||||
# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
|
||||
# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
|
||||
# define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
|
||||
# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
|
||||
# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
|
||||
# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
|
||||
|
@ -701,6 +709,80 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
|
|||
const char *errors /* error handling */
|
||||
);
|
||||
|
||||
/* --- UTF-32 Codecs ------------------------------------------------------ */
|
||||
|
||||
/* Decodes length bytes from a UTF-32 encoded buffer string and returns
|
||||
the corresponding Unicode object.
|
||||
|
||||
errors (if non-NULL) defines the error handling. It defaults
|
||||
to "strict".
|
||||
|
||||
If byteorder is non-NULL, the decoder starts decoding using the
|
||||
given byte order:
|
||||
|
||||
*byteorder == -1: little endian
|
||||
*byteorder == 0: native order
|
||||
*byteorder == 1: big endian
|
||||
|
||||
In native mode, the first four bytes of the stream are checked for a
|
||||
BOM mark. If found, the BOM mark is analysed, the byte order
|
||||
adjusted and the BOM skipped. In the other modes, no BOM mark
|
||||
interpretation is done. After completion, *byteorder is set to the
|
||||
current byte order at the end of input data.
|
||||
|
||||
If byteorder is NULL, the codec starts in native order mode.
|
||||
|
||||
*/
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
|
||||
const char *string, /* UTF-32 encoded string */
|
||||
Py_ssize_t length, /* size of string */
|
||||
const char *errors, /* error handling */
|
||||
int *byteorder /* pointer to byteorder to use
|
||||
0=native;-1=LE,1=BE; updated on
|
||||
exit */
|
||||
);
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
|
||||
const char *string, /* UTF-32 encoded string */
|
||||
Py_ssize_t length, /* size of string */
|
||||
const char *errors, /* error handling */
|
||||
int *byteorder, /* pointer to byteorder to use
|
||||
0=native;-1=LE,1=BE; updated on
|
||||
exit */
|
||||
Py_ssize_t *consumed /* bytes consumed */
|
||||
);
|
||||
|
||||
/* Returns a Python string using the UTF-32 encoding in native byte
|
||||
order. The string always starts with a BOM mark. */
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
|
||||
PyObject *unicode /* Unicode object */
|
||||
);
|
||||
|
||||
/* Returns a Python string object holding the UTF-32 encoded value of
|
||||
the Unicode data.
|
||||
|
||||
If byteorder is not 0, output is written according to the following
|
||||
byte order:
|
||||
|
||||
byteorder == -1: little endian
|
||||
byteorder == 0: native byte order (writes a BOM mark)
|
||||
byteorder == 1: big endian
|
||||
|
||||
If byteorder is 0, the output string will always start with the
|
||||
Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
|
||||
prepended.
|
||||
|
||||
*/
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
|
||||
const Py_UNICODE *data, /* Unicode char buffer */
|
||||
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
|
||||
const char *errors, /* error handling */
|
||||
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
|
||||
);
|
||||
|
||||
/* --- UTF-16 Codecs ------------------------------------------------------ */
|
||||
|
||||
/* Decodes length bytes from a UTF-16 encoded buffer string and returns
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue