SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support

decoding incomplete input (when the input stream is temporarily exhausted).
codecs.StreamReader now implements buffering, which enables proper
readline support for the UTF-16 decoders. codecs.StreamReader.read()
has a new argument chars which specifies the number of characters to
return. codecs.StreamReader.readline() and codecs.StreamReader.readlines()
have a new argument keepends. Trailing "\n"s will be stripped from the lines
if keepends is false. Added C APIs PyUnicode_DecodeUTF8Stateful and
PyUnicode_DecodeUTF16Stateful.
This commit is contained in:
Walter Dörwald 2004-09-07 20:24:22 +00:00
parent a708d6e3b0
commit 69652035bc
12 changed files with 419 additions and 173 deletions

View file

@ -1076,6 +1076,17 @@ These are the UTF-8 codec APIs:
by the codec.
\end{cfuncdesc}
\begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeUTF8Stateful}{const char *s,
int size,
const char *errors,
int *consumed}
If \var{consumed} is \NULL{}, behaves like \cfunction{PyUnicode_DecodeUTF8()}.
If \var{consumed} is not \NULL{}, trailing incomplete UTF-8 byte sequences
will not be treated as an error. Those bytes will not be decoded and the
number of bytes that have been decoded will be stored in \var{consumed}.
\versionadded{2.4}
\end{cfuncdesc}
\begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeUTF8}{const Py_UNICODE *s,
int size,
const char *errors}
@ -1121,6 +1132,20 @@ These are the UTF-16 codec APIs:
Returns \NULL{} if an exception was raised by the codec.
\end{cfuncdesc}
\begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeUTF16Stateful}{const char *s,
int size,
const char *errors,
int *byteorder,
int *consumed}
If \var{consumed} is \NULL{}, behaves like
\cfunction{PyUnicode_DecodeUTF16()}. If \var{consumed} is not \NULL{},
\cfunction{PyUnicode_DecodeUTF16Stateful()} will not treat trailing incomplete
UTF-16 byte sequences (i.e. an odd number of bytes or a split surrogate pair)
as an error. Those bytes will not be decoded and the number of bytes that
have been decoded will be stored in \var{consumed}.
\versionadded{2.4}
\end{cfuncdesc}
\begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeUTF16}{const Py_UNICODE *s,
int size,
const char *errors,