mirror of
https://github.com/python/cpython.git
synced 2025-08-03 16:39:00 +00:00
SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support
decoding incomplete input (when the input stream is temporarily exhausted). codecs.StreamReader now implements buffering, which enables proper readline support for the UTF-16 decoders. codecs.StreamReader.read() has a new argument chars which specifies the number of characters to return. codecs.StreamReader.readline() and codecs.StreamReader.readlines() have a new argument keepends. Trailing "\n"s will be stripped from the lines if keepends is false. Added C APIs PyUnicode_DecodeUTF8Stateful and PyUnicode_DecodeUTF16Stateful.
This commit is contained in:
parent
a708d6e3b0
commit
69652035bc
12 changed files with 419 additions and 173 deletions
|
@ -1076,6 +1076,17 @@ These are the UTF-8 codec APIs:
|
|||
by the codec.
|
||||
\end{cfuncdesc}
|
||||
|
||||
\begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeUTF8Stateful}{const char *s,
|
||||
int size,
|
||||
const char *errors,
|
||||
int *consumed}
|
||||
If \var{consumed} is \NULL{}, behaves like \cfunction{PyUnicode_DecodeUTF8()}.
|
||||
If \var{consumed} is not \NULL{}, trailing incomplete UTF-8 byte sequences
|
||||
will not be treated as an error. Those bytes will not be decoded and the
|
||||
number of bytes that have been decoded will be stored in \var{consumed}.
|
||||
\versionadded{2.4}
|
||||
\end{cfuncdesc}
|
||||
|
||||
\begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeUTF8}{const Py_UNICODE *s,
|
||||
int size,
|
||||
const char *errors}
|
||||
|
@ -1121,6 +1132,20 @@ These are the UTF-16 codec APIs:
|
|||
Returns \NULL{} if an exception was raised by the codec.
|
||||
\end{cfuncdesc}
|
||||
|
||||
\begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeUTF16Stateful}{const char *s,
|
||||
int size,
|
||||
const char *errors,
|
||||
int *byteorder,
|
||||
int *consumed}
|
||||
If \var{consumed} is \NULL{}, behaves like
|
||||
\cfunction{PyUnicode_DecodeUTF16()}. If \var{consumed} is not \NULL{},
|
||||
\cfunction{PyUnicode_DecodeUTF16Stateful()} will not treat trailing incomplete
|
||||
UTF-16 byte sequences (i.e. an odd number of bytes or a split surrogate pair)
|
||||
as an error. Those bytes will not be decoded and the number of bytes that
|
||||
have been decoded will be stored in \var{consumed}.
|
||||
\versionadded{2.4}
|
||||
\end{cfuncdesc}
|
||||
|
||||
\begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeUTF16}{const Py_UNICODE *s,
|
||||
int size,
|
||||
const char *errors,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue