mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support
decoding incomplete input (when the input stream is temporarily exhausted). codecs.StreamReader now implements buffering, which enables proper readline support for the UTF-16 decoders. codecs.StreamReader.read() has a new argument chars which specifies the number of characters to return. codecs.StreamReader.readline() and codecs.StreamReader.readlines() have a new argument keepends. Trailing "\n"s will be stripped from the lines if keepends is false. Added C APIs PyUnicode_DecodeUTF8Stateful and PyUnicode_DecodeUTF16Stateful.
This commit is contained in:
parent
a708d6e3b0
commit
69652035bc
12 changed files with 419 additions and 173 deletions
|
@ -1076,6 +1076,17 @@ These are the UTF-8 codec APIs:
|
||||||
by the codec.
|
by the codec.
|
||||||
\end{cfuncdesc}
|
\end{cfuncdesc}
|
||||||
|
|
||||||
|
\begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeUTF8Stateful}{const char *s,
|
||||||
|
int size,
|
||||||
|
const char *errors,
|
||||||
|
int *consumed}
|
||||||
|
If \var{consumed} is \NULL{}, behaves like \cfunction{PyUnicode_DecodeUTF8()}.
|
||||||
|
If \var{consumed} is not \NULL{}, trailing incomplete UTF-8 byte sequences
|
||||||
|
will not be treated as an error. Those bytes will not be decoded and the
|
||||||
|
number of bytes that have been decoded will be stored in \var{consumed}.
|
||||||
|
\versionadded{2.4}
|
||||||
|
\end{cfuncdesc}
|
||||||
|
|
||||||
\begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeUTF8}{const Py_UNICODE *s,
|
\begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeUTF8}{const Py_UNICODE *s,
|
||||||
int size,
|
int size,
|
||||||
const char *errors}
|
const char *errors}
|
||||||
|
@ -1121,6 +1132,20 @@ These are the UTF-16 codec APIs:
|
||||||
Returns \NULL{} if an exception was raised by the codec.
|
Returns \NULL{} if an exception was raised by the codec.
|
||||||
\end{cfuncdesc}
|
\end{cfuncdesc}
|
||||||
|
|
||||||
|
\begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeUTF16Stateful}{const char *s,
|
||||||
|
int size,
|
||||||
|
const char *errors,
|
||||||
|
int *byteorder,
|
||||||
|
int *consumed}
|
||||||
|
If \var{consumed} is \NULL{}, behaves like
|
||||||
|
\cfunction{PyUnicode_DecodeUTF16()}. If \var{consumed} is not \NULL{},
|
||||||
|
\cfunction{PyUnicode_DecodeUTF16Stateful()} will not treat trailing incomplete
|
||||||
|
UTF-16 byte sequences (i.e. an odd number of bytes or a split surrogate pair)
|
||||||
|
as an error. Those bytes will not be decoded and the number of bytes that
|
||||||
|
have been decoded will be stored in \var{consumed}.
|
||||||
|
\versionadded{2.4}
|
||||||
|
\end{cfuncdesc}
|
||||||
|
|
||||||
\begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeUTF16}{const Py_UNICODE *s,
|
\begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeUTF16}{const Py_UNICODE *s,
|
||||||
int size,
|
int size,
|
||||||
const char *errors,
|
const char *errors,
|
||||||
|
|
|
@ -394,9 +394,14 @@ order to be compatible to the Python codec registry.
|
||||||
be extended with \function{register_error()}.
|
be extended with \function{register_error()}.
|
||||||
\end{classdesc}
|
\end{classdesc}
|
||||||
|
|
||||||
\begin{methoddesc}{read}{\optional{size}}
|
\begin{methoddesc}{read}{\optional{size\optional{, chars}}}
|
||||||
Decodes data from the stream and returns the resulting object.
|
Decodes data from the stream and returns the resulting object.
|
||||||
|
|
||||||
|
\var{chars} indicates the number of characters to read from the
|
||||||
|
stream. \function{read()} will never return more than \vars{chars}
|
||||||
|
characters, but it might return less, if there are not enough
|
||||||
|
characters available.
|
||||||
|
|
||||||
\var{size} indicates the approximate maximum number of bytes to read
|
\var{size} indicates the approximate maximum number of bytes to read
|
||||||
from the stream for decoding purposes. The decoder can modify this
|
from the stream for decoding purposes. The decoder can modify this
|
||||||
setting as appropriate. The default value -1 indicates to read and
|
setting as appropriate. The default value -1 indicates to read and
|
||||||
|
@ -407,29 +412,29 @@ order to be compatible to the Python codec registry.
|
||||||
read as much data as is allowed within the definition of the encoding
|
read as much data as is allowed within the definition of the encoding
|
||||||
and the given size, e.g. if optional encoding endings or state
|
and the given size, e.g. if optional encoding endings or state
|
||||||
markers are available on the stream, these should be read too.
|
markers are available on the stream, these should be read too.
|
||||||
|
|
||||||
|
\versionchanged[\var{chars} argument added]{2.4}
|
||||||
\end{methoddesc}
|
\end{methoddesc}
|
||||||
|
|
||||||
\begin{methoddesc}{readline}{[size]}
|
\begin{methoddesc}{readline}{\optional{size\optional{, keepends}}}
|
||||||
Read one line from the input stream and return the
|
Read one line from the input stream and return the
|
||||||
decoded data.
|
decoded data.
|
||||||
|
|
||||||
Unlike the \method{readlines()} method, this method inherits
|
|
||||||
the line breaking knowledge from the underlying stream's
|
|
||||||
\method{readline()} method -- there is currently no support for line
|
|
||||||
breaking using the codec decoder due to lack of line buffering.
|
|
||||||
Sublcasses should however, if possible, try to implement this method
|
|
||||||
using their own knowledge of line breaking.
|
|
||||||
|
|
||||||
\var{size}, if given, is passed as size argument to the stream's
|
\var{size}, if given, is passed as size argument to the stream's
|
||||||
\method{readline()} method.
|
\method{readline()} method.
|
||||||
|
|
||||||
|
If \var{keepends} is false lineends will be stripped from the
|
||||||
|
lines returned.
|
||||||
|
|
||||||
|
\versionchanged[\var{keepends} argument added]{2.4}
|
||||||
\end{methoddesc}
|
\end{methoddesc}
|
||||||
|
|
||||||
\begin{methoddesc}{readlines}{[sizehint]}
|
\begin{methoddesc}{readlines}{\optional{sizehint\optional{, keepends}}}
|
||||||
Read all lines available on the input stream and return them as list
|
Read all lines available on the input stream and return them as list
|
||||||
of lines.
|
of lines.
|
||||||
|
|
||||||
Line breaks are implemented using the codec's decoder method and are
|
Line breaks are implemented using the codec's decoder method and are
|
||||||
included in the list entries.
|
included in the list entries if \var{keepends} is true.
|
||||||
|
|
||||||
\var{sizehint}, if given, is passed as \var{size} argument to the
|
\var{sizehint}, if given, is passed as \var{size} argument to the
|
||||||
stream's \method{read()} method.
|
stream's \method{read()} method.
|
||||||
|
|
|
@ -160,7 +160,9 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
||||||
# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
|
# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
|
||||||
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
|
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
|
||||||
# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
|
# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
|
||||||
|
# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
|
||||||
# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
|
# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
|
||||||
|
# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
|
||||||
# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
|
# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
|
||||||
# define PyUnicode_Encode PyUnicodeUCS2_Encode
|
# define PyUnicode_Encode PyUnicodeUCS2_Encode
|
||||||
# define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
|
# define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
|
||||||
|
@ -233,7 +235,9 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
||||||
# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
|
# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
|
||||||
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
|
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
|
||||||
# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
|
# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
|
||||||
|
# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
|
||||||
# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
|
# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
|
||||||
|
# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
|
||||||
# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
|
# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
|
||||||
# define PyUnicode_Encode PyUnicodeUCS4_Encode
|
# define PyUnicode_Encode PyUnicodeUCS4_Encode
|
||||||
# define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
|
# define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
|
||||||
|
@ -658,6 +662,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
|
||||||
const char *errors /* error handling */
|
const char *errors /* error handling */
|
||||||
);
|
);
|
||||||
|
|
||||||
|
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
|
||||||
|
const char *string, /* UTF-8 encoded string */
|
||||||
|
int length, /* size of string */
|
||||||
|
const char *errors, /* error handling */
|
||||||
|
int *consumed /* bytes consumed */
|
||||||
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
|
PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
|
||||||
PyObject *unicode /* Unicode object */
|
PyObject *unicode /* Unicode object */
|
||||||
);
|
);
|
||||||
|
@ -702,6 +713,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
|
||||||
exit */
|
exit */
|
||||||
);
|
);
|
||||||
|
|
||||||
|
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
|
||||||
|
const char *string, /* UTF-16 encoded string */
|
||||||
|
int length, /* size of string */
|
||||||
|
const char *errors, /* error handling */
|
||||||
|
int *byteorder, /* pointer to byteorder to use
|
||||||
|
0=native;-1=LE,1=BE; updated on
|
||||||
|
exit */
|
||||||
|
int *consumed /* bytes consumed */
|
||||||
|
);
|
||||||
|
|
||||||
/* Returns a Python string using the UTF-16 encoding in native byte
|
/* Returns a Python string using the UTF-16 encoding in native byte
|
||||||
order. The string always starts with a BOM mark. */
|
order. The string always starts with a BOM mark. */
|
||||||
|
|
||||||
|
|
108
Lib/codecs.py
108
Lib/codecs.py
|
@ -228,12 +228,22 @@ class StreamReader(Codec):
|
||||||
"""
|
"""
|
||||||
self.stream = stream
|
self.stream = stream
|
||||||
self.errors = errors
|
self.errors = errors
|
||||||
|
self.bytebuffer = ""
|
||||||
|
self.charbuffer = u""
|
||||||
|
|
||||||
def read(self, size=-1):
|
def decode(self, input, errors='strict'):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def read(self, size=-1, chars=-1):
|
||||||
|
|
||||||
""" Decodes data from the stream self.stream and returns the
|
""" Decodes data from the stream self.stream and returns the
|
||||||
resulting object.
|
resulting object.
|
||||||
|
|
||||||
|
chars indicates the number of characters to read from the
|
||||||
|
stream. read() will never return more than chars
|
||||||
|
characters, but it might return less, if there are not enough
|
||||||
|
characters available.
|
||||||
|
|
||||||
size indicates the approximate maximum number of bytes to
|
size indicates the approximate maximum number of bytes to
|
||||||
read from the stream for decoding purposes. The decoder
|
read from the stream for decoding purposes. The decoder
|
||||||
can modify this setting as appropriate. The default value
|
can modify this setting as appropriate. The default value
|
||||||
|
@ -248,54 +258,70 @@ class StreamReader(Codec):
|
||||||
on the stream, these should be read too.
|
on the stream, these should be read too.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# Unsliced reading:
|
# read until we get the required number of characters (if available)
|
||||||
if size < 0:
|
done = False
|
||||||
return self.decode(self.stream.read(), self.errors)[0]
|
while True:
|
||||||
|
# can the request can be satisfied from the character buffer?
|
||||||
# Sliced reading:
|
if chars < 0:
|
||||||
read = self.stream.read
|
if self.charbuffer:
|
||||||
decode = self.decode
|
done = True
|
||||||
data = read(size)
|
|
||||||
i = 0
|
|
||||||
while 1:
|
|
||||||
try:
|
|
||||||
object, decodedbytes = decode(data, self.errors)
|
|
||||||
except ValueError, why:
|
|
||||||
# This method is slow but should work under pretty much
|
|
||||||
# all conditions; at most 10 tries are made
|
|
||||||
i = i + 1
|
|
||||||
newdata = read(1)
|
|
||||||
if not newdata or i > 10:
|
|
||||||
raise
|
|
||||||
data = data + newdata
|
|
||||||
else:
|
else:
|
||||||
return object
|
if len(self.charbuffer) >= chars:
|
||||||
|
done = True
|
||||||
|
if done:
|
||||||
|
if chars < 0:
|
||||||
|
result = self.charbuffer
|
||||||
|
self.charbuffer = u""
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
result = self.charbuffer[:chars]
|
||||||
|
self.charbuffer = self.charbuffer[chars:]
|
||||||
|
break
|
||||||
|
# we need more data
|
||||||
|
if size < 0:
|
||||||
|
newdata = self.stream.read()
|
||||||
|
else:
|
||||||
|
newdata = self.stream.read(size)
|
||||||
|
data = self.bytebuffer + newdata
|
||||||
|
object, decodedbytes = self.decode(data, self.errors)
|
||||||
|
# keep undecoded bytes until the next call
|
||||||
|
self.bytebuffer = data[decodedbytes:]
|
||||||
|
# put new characters in the character buffer
|
||||||
|
self.charbuffer += object
|
||||||
|
# there was no data available
|
||||||
|
if not newdata:
|
||||||
|
done = True
|
||||||
|
return result
|
||||||
|
|
||||||
def readline(self, size=None):
|
def readline(self, size=None, keepends=True):
|
||||||
|
|
||||||
""" Read one line from the input stream and return the
|
""" Read one line from the input stream and return the
|
||||||
decoded data.
|
decoded data.
|
||||||
|
|
||||||
Note: Unlike the .readlines() method, this method inherits
|
size, if given, is passed as size argument to the
|
||||||
the line breaking knowledge from the underlying stream's
|
read() method.
|
||||||
.readline() method -- there is currently no support for
|
|
||||||
line breaking using the codec decoder due to lack of line
|
|
||||||
buffering. Subclasses should however, if possible, try to
|
|
||||||
implement this method using their own knowledge of line
|
|
||||||
breaking.
|
|
||||||
|
|
||||||
size, if given, is passed as size argument to the stream's
|
|
||||||
.readline() method.
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if size is None:
|
if size is None:
|
||||||
line = self.stream.readline()
|
size = 10
|
||||||
else:
|
line = u""
|
||||||
line = self.stream.readline(size)
|
while True:
|
||||||
return self.decode(line, self.errors)[0]
|
data = self.read(size)
|
||||||
|
line += data
|
||||||
|
pos = line.find("\n")
|
||||||
|
if pos>=0:
|
||||||
|
self.charbuffer = line[pos+1:] + self.charbuffer
|
||||||
|
if keepends:
|
||||||
|
line = line[:pos+1]
|
||||||
|
else:
|
||||||
|
line = line[:pos]
|
||||||
|
return line
|
||||||
|
elif not data:
|
||||||
|
return line
|
||||||
|
if size<8000:
|
||||||
|
size *= 2
|
||||||
|
|
||||||
|
def readlines(self, sizehint=None, keepends=True):
|
||||||
def readlines(self, sizehint=None):
|
|
||||||
|
|
||||||
""" Read all lines available on the input stream
|
""" Read all lines available on the input stream
|
||||||
and return them as list of lines.
|
and return them as list of lines.
|
||||||
|
@ -307,8 +333,8 @@ class StreamReader(Codec):
|
||||||
way to finding the true end-of-line.
|
way to finding the true end-of-line.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
data = self.stream.read()
|
data = self.read()
|
||||||
return self.decode(data, self.errors)[0].splitlines(1)
|
return self.splitlines(keepends)
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
|
|
||||||
|
|
|
@ -10,54 +10,40 @@ import codecs, sys
|
||||||
|
|
||||||
### Codec APIs
|
### Codec APIs
|
||||||
|
|
||||||
class Codec(codecs.Codec):
|
encode = codecs.utf_16_encode
|
||||||
|
|
||||||
# Note: Binding these as C functions will result in the class not
|
def decode(input, errors='strict'):
|
||||||
# converting them to methods. This is intended.
|
return codecs.utf_16_decode(input, errors, True)
|
||||||
encode = codecs.utf_16_encode
|
|
||||||
decode = codecs.utf_16_decode
|
|
||||||
|
|
||||||
class StreamWriter(Codec,codecs.StreamWriter):
|
class StreamWriter(codecs.StreamWriter):
|
||||||
def __init__(self, stream, errors='strict'):
|
def __init__(self, stream, errors='strict'):
|
||||||
self.bom_written = 0
|
self.bom_written = False
|
||||||
codecs.StreamWriter.__init__(self, stream, errors)
|
codecs.StreamWriter.__init__(self, stream, errors)
|
||||||
|
|
||||||
def write(self, data):
|
def encode(self, input, errors='strict'):
|
||||||
result = codecs.StreamWriter.write(self, data)
|
self.bom_written = True
|
||||||
if not self.bom_written:
|
result = codecs.utf_16_encode(input, errors)
|
||||||
self.bom_written = 1
|
if sys.byteorder == 'little':
|
||||||
if sys.byteorder == 'little':
|
self.encode = codecs.utf_16_le_encode
|
||||||
self.encode = codecs.utf_16_le_encode
|
else:
|
||||||
else:
|
self.encode = codecs.utf_16_be_encode
|
||||||
self.encode = codecs.utf_16_be_encode
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
class StreamReader(Codec,codecs.StreamReader):
|
class StreamReader(codecs.StreamReader):
|
||||||
def __init__(self, stream, errors='strict'):
|
|
||||||
self.bom_read = 0
|
|
||||||
codecs.StreamReader.__init__(self, stream, errors)
|
|
||||||
|
|
||||||
def read(self, size=-1):
|
def decode(self, input, errors='strict'):
|
||||||
if not self.bom_read:
|
(object, consumed, byteorder) = \
|
||||||
signature = self.stream.read(2)
|
codecs.utf_16_ex_decode(input, errors, 0, False)
|
||||||
if signature == codecs.BOM_BE:
|
if byteorder == -1:
|
||||||
self.decode = codecs.utf_16_be_decode
|
self.decode = codecs.utf_16_le_decode
|
||||||
elif signature == codecs.BOM_LE:
|
elif byteorder == 1:
|
||||||
self.decode = codecs.utf_16_le_decode
|
self.decode = codecs.utf_16_be_decode
|
||||||
else:
|
elif consumed>=2:
|
||||||
raise UnicodeError,"UTF-16 stream does not start with BOM"
|
raise UnicodeError,"UTF-16 stream does not start with BOM"
|
||||||
if size > 2:
|
return (object, consumed)
|
||||||
size -= 2
|
|
||||||
elif size >= 0:
|
|
||||||
size = 0
|
|
||||||
self.bom_read = 1
|
|
||||||
return codecs.StreamReader.read(self, size)
|
|
||||||
|
|
||||||
def readline(self, size=None):
|
|
||||||
raise NotImplementedError, '.readline() is not implemented for UTF-16'
|
|
||||||
|
|
||||||
### encodings module API
|
### encodings module API
|
||||||
|
|
||||||
def getregentry():
|
def getregentry():
|
||||||
|
|
||||||
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
return (encode,decode,StreamReader,StreamWriter)
|
||||||
|
|
|
@ -10,23 +10,19 @@ import codecs
|
||||||
|
|
||||||
### Codec APIs
|
### Codec APIs
|
||||||
|
|
||||||
class Codec(codecs.Codec):
|
encode = codecs.utf_16_be_encode
|
||||||
|
|
||||||
# Note: Binding these as C functions will result in the class not
|
def decode(input, errors='strict'):
|
||||||
# converting them to methods. This is intended.
|
return codecs.utf_16_be_decode(input, errors, True)
|
||||||
|
|
||||||
|
class StreamWriter(codecs.StreamWriter):
|
||||||
encode = codecs.utf_16_be_encode
|
encode = codecs.utf_16_be_encode
|
||||||
|
|
||||||
|
class StreamReader(codecs.StreamReader):
|
||||||
decode = codecs.utf_16_be_decode
|
decode = codecs.utf_16_be_decode
|
||||||
|
|
||||||
class StreamWriter(Codec,codecs.StreamWriter):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class StreamReader(Codec,codecs.StreamReader):
|
|
||||||
|
|
||||||
def readline(self, size=None):
|
|
||||||
raise NotImplementedError, '.readline() is not implemented for UTF-16-BE'
|
|
||||||
|
|
||||||
### encodings module API
|
### encodings module API
|
||||||
|
|
||||||
def getregentry():
|
def getregentry():
|
||||||
|
|
||||||
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
return (encode,decode,StreamReader,StreamWriter)
|
||||||
|
|
|
@ -10,23 +10,20 @@ import codecs
|
||||||
|
|
||||||
### Codec APIs
|
### Codec APIs
|
||||||
|
|
||||||
class Codec(codecs.Codec):
|
encode = codecs.utf_16_le_encode
|
||||||
|
|
||||||
# Note: Binding these as C functions will result in the class not
|
def decode(input, errors='strict'):
|
||||||
# converting them to methods. This is intended.
|
return codecs.utf_16_le_decode(input, errors, True)
|
||||||
|
|
||||||
|
class StreamWriter(codecs.StreamWriter):
|
||||||
encode = codecs.utf_16_le_encode
|
encode = codecs.utf_16_le_encode
|
||||||
|
|
||||||
|
class StreamReader(codecs.StreamReader):
|
||||||
decode = codecs.utf_16_le_decode
|
decode = codecs.utf_16_le_decode
|
||||||
|
|
||||||
class StreamWriter(Codec,codecs.StreamWriter):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class StreamReader(Codec,codecs.StreamReader):
|
|
||||||
|
|
||||||
def readline(self, size=None):
|
|
||||||
raise NotImplementedError, '.readline() is not implemented for UTF-16-LE'
|
|
||||||
|
|
||||||
### encodings module API
|
### encodings module API
|
||||||
|
|
||||||
def getregentry():
|
def getregentry():
|
||||||
|
|
||||||
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
return (encode,decode,StreamReader,StreamWriter)
|
||||||
|
|
|
@ -10,21 +10,19 @@ import codecs
|
||||||
|
|
||||||
### Codec APIs
|
### Codec APIs
|
||||||
|
|
||||||
class Codec(codecs.Codec):
|
encode = codecs.utf_8_encode
|
||||||
|
|
||||||
# Note: Binding these as C functions will result in the class not
|
def decode(input, errors='strict'):
|
||||||
# converting them to methods. This is intended.
|
return codecs.utf_8_decode(input, errors, True)
|
||||||
|
|
||||||
|
class StreamWriter(codecs.StreamWriter):
|
||||||
encode = codecs.utf_8_encode
|
encode = codecs.utf_8_encode
|
||||||
|
|
||||||
|
class StreamReader(codecs.StreamReader):
|
||||||
decode = codecs.utf_8_decode
|
decode = codecs.utf_8_decode
|
||||||
|
|
||||||
class StreamWriter(Codec,codecs.StreamWriter):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class StreamReader(Codec,codecs.StreamReader):
|
|
||||||
pass
|
|
||||||
|
|
||||||
### encodings module API
|
### encodings module API
|
||||||
|
|
||||||
def getregentry():
|
def getregentry():
|
||||||
|
|
||||||
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
return (encode,decode,StreamReader,StreamWriter)
|
||||||
|
|
|
@ -3,7 +3,45 @@ import unittest
|
||||||
import codecs
|
import codecs
|
||||||
import StringIO
|
import StringIO
|
||||||
|
|
||||||
class UTF16Test(unittest.TestCase):
|
class Queue(object):
|
||||||
|
"""
|
||||||
|
queue: write bytes at one end, read bytes from the other end
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
self._buffer = ""
|
||||||
|
|
||||||
|
def write(self, chars):
|
||||||
|
self._buffer += chars
|
||||||
|
|
||||||
|
def read(self, size=-1):
|
||||||
|
if size<0:
|
||||||
|
s = self._buffer
|
||||||
|
self._buffer = ""
|
||||||
|
return s
|
||||||
|
else:
|
||||||
|
s = self._buffer[:size]
|
||||||
|
self._buffer = self._buffer[size:]
|
||||||
|
return s
|
||||||
|
|
||||||
|
class PartialReadTest(unittest.TestCase):
|
||||||
|
def check_partial(self, encoding, input, partialresults):
|
||||||
|
# get a StreamReader for the encoding and feed the bytestring version
|
||||||
|
# of input to the reader byte by byte. Read every available from
|
||||||
|
# the StreamReader and check that the results equal the appropriate
|
||||||
|
# entries from partialresults.
|
||||||
|
q = Queue()
|
||||||
|
r = codecs.getreader(encoding)(q)
|
||||||
|
result = u""
|
||||||
|
for (c, partialresult) in zip(input.encode(encoding), partialresults):
|
||||||
|
q.write(c)
|
||||||
|
result += r.read()
|
||||||
|
self.assertEqual(result, partialresult)
|
||||||
|
# check that there's nothing left in the buffers
|
||||||
|
self.assertEqual(r.read(), u"")
|
||||||
|
self.assertEqual(r.bytebuffer, "")
|
||||||
|
self.assertEqual(r.charbuffer, u"")
|
||||||
|
|
||||||
|
class UTF16Test(PartialReadTest):
|
||||||
|
|
||||||
spamle = '\xff\xfes\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00'
|
spamle = '\xff\xfes\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00'
|
||||||
spambe = '\xfe\xff\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m'
|
spambe = '\xfe\xff\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m'
|
||||||
|
@ -23,6 +61,81 @@ class UTF16Test(unittest.TestCase):
|
||||||
f = reader(s)
|
f = reader(s)
|
||||||
self.assertEquals(f.read(), u"spamspam")
|
self.assertEquals(f.read(), u"spamspam")
|
||||||
|
|
||||||
|
def test_partial(self):
|
||||||
|
self.check_partial(
|
||||||
|
"utf-16",
|
||||||
|
u"\x00\xff\u0100\uffff",
|
||||||
|
[
|
||||||
|
u"", # first byte of BOM read
|
||||||
|
u"", # second byte of BOM read => byteorder known
|
||||||
|
u"",
|
||||||
|
u"\x00",
|
||||||
|
u"\x00",
|
||||||
|
u"\x00\xff",
|
||||||
|
u"\x00\xff",
|
||||||
|
u"\x00\xff\u0100",
|
||||||
|
u"\x00\xff\u0100",
|
||||||
|
u"\x00\xff\u0100\uffff",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
class UTF16LETest(PartialReadTest):
|
||||||
|
|
||||||
|
def test_partial(self):
|
||||||
|
self.check_partial(
|
||||||
|
"utf-16-le",
|
||||||
|
u"\x00\xff\u0100\uffff",
|
||||||
|
[
|
||||||
|
u"",
|
||||||
|
u"\x00",
|
||||||
|
u"\x00",
|
||||||
|
u"\x00\xff",
|
||||||
|
u"\x00\xff",
|
||||||
|
u"\x00\xff\u0100",
|
||||||
|
u"\x00\xff\u0100",
|
||||||
|
u"\x00\xff\u0100\uffff",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
class UTF16BETest(PartialReadTest):
|
||||||
|
|
||||||
|
def test_partial(self):
|
||||||
|
self.check_partial(
|
||||||
|
"utf-16-be",
|
||||||
|
u"\x00\xff\u0100\uffff",
|
||||||
|
[
|
||||||
|
u"",
|
||||||
|
u"\x00",
|
||||||
|
u"\x00",
|
||||||
|
u"\x00\xff",
|
||||||
|
u"\x00\xff",
|
||||||
|
u"\x00\xff\u0100",
|
||||||
|
u"\x00\xff\u0100",
|
||||||
|
u"\x00\xff\u0100\uffff",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
class UTF8Test(PartialReadTest):
|
||||||
|
|
||||||
|
def test_partial(self):
|
||||||
|
self.check_partial(
|
||||||
|
"utf-8",
|
||||||
|
u"\x00\xff\u07ff\u0800\uffff",
|
||||||
|
[
|
||||||
|
u"\x00",
|
||||||
|
u"\x00",
|
||||||
|
u"\x00\xff",
|
||||||
|
u"\x00\xff",
|
||||||
|
u"\x00\xff\u07ff",
|
||||||
|
u"\x00\xff\u07ff",
|
||||||
|
u"\x00\xff\u07ff",
|
||||||
|
u"\x00\xff\u07ff\u0800",
|
||||||
|
u"\x00\xff\u07ff\u0800",
|
||||||
|
u"\x00\xff\u07ff\u0800",
|
||||||
|
u"\x00\xff\u07ff\u0800\uffff",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
class EscapeDecodeTest(unittest.TestCase):
|
class EscapeDecodeTest(unittest.TestCase):
|
||||||
def test_empty_escape_decode(self):
|
def test_empty_escape_decode(self):
|
||||||
self.assertEquals(codecs.escape_decode(""), ("", 0))
|
self.assertEquals(codecs.escape_decode(""), ("", 0))
|
||||||
|
@ -348,6 +461,9 @@ class CodecsModuleTest(unittest.TestCase):
|
||||||
def test_main():
|
def test_main():
|
||||||
test_support.run_unittest(
|
test_support.run_unittest(
|
||||||
UTF16Test,
|
UTF16Test,
|
||||||
|
UTF16LETest,
|
||||||
|
UTF16BETest,
|
||||||
|
UTF8Test,
|
||||||
EscapeDecodeTest,
|
EscapeDecodeTest,
|
||||||
RecodingTest,
|
RecodingTest,
|
||||||
PunycodeTest,
|
PunycodeTest,
|
||||||
|
|
13
Misc/NEWS
13
Misc/NEWS
|
@ -22,7 +22,14 @@ Extension modules
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
...
|
- SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support
|
||||||
|
decoding incomplete input (when the input stream is temporarily exhausted).
|
||||||
|
``codecs.StreamReader`` now implements buffering, which enables proper
|
||||||
|
readline support for the UTF-16 decoders. ``codecs.StreamReader.read()``
|
||||||
|
has a new argument ``chars`` which specifies the number of characters to
|
||||||
|
return. ``codecs.StreamReader.readline()`` and
|
||||||
|
``codecs.StreamReader.readlines()`` have a new argument ``keepends``.
|
||||||
|
Trailing "\n"s will be stripped from the lines if ``keepends`` is false.
|
||||||
|
|
||||||
Build
|
Build
|
||||||
-----
|
-----
|
||||||
|
@ -32,7 +39,9 @@ Build
|
||||||
C API
|
C API
|
||||||
-----
|
-----
|
||||||
|
|
||||||
...
|
- SF patch #998993: ``PyUnicode_DecodeUTF8Stateful`` and
|
||||||
|
``PyUnicode_DecodeUTF16Stateful`` have been added, which implement stateful
|
||||||
|
decoding.
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
-------------
|
-------------
|
||||||
|
|
|
@ -269,13 +269,20 @@ utf_8_decode(PyObject *self,
|
||||||
const char *data;
|
const char *data;
|
||||||
int size;
|
int size;
|
||||||
const char *errors = NULL;
|
const char *errors = NULL;
|
||||||
|
int final = 0;
|
||||||
if (!PyArg_ParseTuple(args, "t#|z:utf_8_decode",
|
int consumed;
|
||||||
&data, &size, &errors))
|
PyObject *decoded = NULL;
|
||||||
return NULL;
|
|
||||||
|
|
||||||
return codec_tuple(PyUnicode_DecodeUTF8(data, size, errors),
|
if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode",
|
||||||
size);
|
&data, &size, &errors, &final))
|
||||||
|
return NULL;
|
||||||
|
consumed = size;
|
||||||
|
|
||||||
|
decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors,
|
||||||
|
final ? NULL : &consumed);
|
||||||
|
if (decoded == NULL)
|
||||||
|
return NULL;
|
||||||
|
return codec_tuple(decoded, consumed);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -286,12 +293,19 @@ utf_16_decode(PyObject *self,
|
||||||
int size;
|
int size;
|
||||||
const char *errors = NULL;
|
const char *errors = NULL;
|
||||||
int byteorder = 0;
|
int byteorder = 0;
|
||||||
|
int final = 0;
|
||||||
if (!PyArg_ParseTuple(args, "t#|z:utf_16_decode",
|
int consumed;
|
||||||
&data, &size, &errors))
|
PyObject *decoded;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode",
|
||||||
|
&data, &size, &errors, &final))
|
||||||
return NULL;
|
return NULL;
|
||||||
return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
|
consumed = size;
|
||||||
size);
|
decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
|
||||||
|
final ? NULL : &consumed);
|
||||||
|
if (decoded == NULL)
|
||||||
|
return NULL;
|
||||||
|
return codec_tuple(decoded, consumed);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -302,12 +316,20 @@ utf_16_le_decode(PyObject *self,
|
||||||
int size;
|
int size;
|
||||||
const char *errors = NULL;
|
const char *errors = NULL;
|
||||||
int byteorder = -1;
|
int byteorder = -1;
|
||||||
|
int final = 0;
|
||||||
|
int consumed;
|
||||||
|
PyObject *decoded = NULL;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "t#|z:utf_16_le_decode",
|
if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode",
|
||||||
&data, &size, &errors))
|
&data, &size, &errors, &final))
|
||||||
return NULL;
|
return NULL;
|
||||||
return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
|
consumed = size;
|
||||||
size);
|
decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
|
||||||
|
&byteorder, final ? NULL : &consumed);
|
||||||
|
if (decoded == NULL)
|
||||||
|
return NULL;
|
||||||
|
return codec_tuple(decoded, consumed);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -318,12 +340,19 @@ utf_16_be_decode(PyObject *self,
|
||||||
int size;
|
int size;
|
||||||
const char *errors = NULL;
|
const char *errors = NULL;
|
||||||
int byteorder = 1;
|
int byteorder = 1;
|
||||||
|
int final = 0;
|
||||||
|
int consumed;
|
||||||
|
PyObject *decoded = NULL;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "t#|z:utf_16_be_decode",
|
if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode",
|
||||||
&data, &size, &errors))
|
&data, &size, &errors, &final))
|
||||||
return NULL;
|
return NULL;
|
||||||
return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
|
consumed = size;
|
||||||
size);
|
decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
|
||||||
|
&byteorder, final ? NULL : &consumed);
|
||||||
|
if (decoded == NULL)
|
||||||
|
return NULL;
|
||||||
|
return codec_tuple(decoded, consumed);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This non-standard version also provides access to the byteorder
|
/* This non-standard version also provides access to the byteorder
|
||||||
|
@ -343,15 +372,19 @@ utf_16_ex_decode(PyObject *self,
|
||||||
const char *errors = NULL;
|
const char *errors = NULL;
|
||||||
int byteorder = 0;
|
int byteorder = 0;
|
||||||
PyObject *unicode, *tuple;
|
PyObject *unicode, *tuple;
|
||||||
|
int final = 0;
|
||||||
if (!PyArg_ParseTuple(args, "t#|zi:utf_16_ex_decode",
|
int consumed;
|
||||||
&data, &size, &errors, &byteorder))
|
|
||||||
|
if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode",
|
||||||
|
&data, &size, &errors, &byteorder, &final))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
unicode = PyUnicode_DecodeUTF16(data, size, errors, &byteorder);
|
consumed = size;
|
||||||
|
unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
|
||||||
|
final ? NULL : &consumed);
|
||||||
if (unicode == NULL)
|
if (unicode == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
tuple = Py_BuildValue("Oii", unicode, size, byteorder);
|
tuple = Py_BuildValue("Oii", unicode, consumed, byteorder);
|
||||||
Py_DECREF(unicode);
|
Py_DECREF(unicode);
|
||||||
return tuple;
|
return tuple;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1135,6 +1135,14 @@ char utf8_code_length[256] = {
|
||||||
PyObject *PyUnicode_DecodeUTF8(const char *s,
|
PyObject *PyUnicode_DecodeUTF8(const char *s,
|
||||||
int size,
|
int size,
|
||||||
const char *errors)
|
const char *errors)
|
||||||
|
{
|
||||||
|
return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *PyUnicode_DecodeUTF8Stateful(const char *s,
|
||||||
|
int size,
|
||||||
|
const char *errors,
|
||||||
|
int *consumed)
|
||||||
{
|
{
|
||||||
const char *starts = s;
|
const char *starts = s;
|
||||||
int n;
|
int n;
|
||||||
|
@ -1153,8 +1161,11 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
|
||||||
unicode = _PyUnicode_New(size);
|
unicode = _PyUnicode_New(size);
|
||||||
if (!unicode)
|
if (!unicode)
|
||||||
return NULL;
|
return NULL;
|
||||||
if (size == 0)
|
if (size == 0) {
|
||||||
|
if (consumed)
|
||||||
|
*consumed = 0;
|
||||||
return (PyObject *)unicode;
|
return (PyObject *)unicode;
|
||||||
|
}
|
||||||
|
|
||||||
/* Unpack UTF-8 encoded data */
|
/* Unpack UTF-8 encoded data */
|
||||||
p = unicode->str;
|
p = unicode->str;
|
||||||
|
@ -1172,10 +1183,14 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
|
||||||
n = utf8_code_length[ch];
|
n = utf8_code_length[ch];
|
||||||
|
|
||||||
if (s + n > e) {
|
if (s + n > e) {
|
||||||
errmsg = "unexpected end of data";
|
if (consumed)
|
||||||
startinpos = s-starts;
|
break;
|
||||||
endinpos = size;
|
else {
|
||||||
goto utf8Error;
|
errmsg = "unexpected end of data";
|
||||||
|
startinpos = s-starts;
|
||||||
|
endinpos = size;
|
||||||
|
goto utf8Error;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (n) {
|
switch (n) {
|
||||||
|
@ -1293,6 +1308,8 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
|
||||||
(PyObject **)&unicode, &outpos, &p))
|
(PyObject **)&unicode, &outpos, &p))
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
if (consumed)
|
||||||
|
*consumed = s-starts;
|
||||||
|
|
||||||
/* Adjust length */
|
/* Adjust length */
|
||||||
if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)
|
if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)
|
||||||
|
@ -1427,6 +1444,16 @@ PyUnicode_DecodeUTF16(const char *s,
|
||||||
int size,
|
int size,
|
||||||
const char *errors,
|
const char *errors,
|
||||||
int *byteorder)
|
int *byteorder)
|
||||||
|
{
|
||||||
|
return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *
|
||||||
|
PyUnicode_DecodeUTF16Stateful(const char *s,
|
||||||
|
int size,
|
||||||
|
const char *errors,
|
||||||
|
int *byteorder,
|
||||||
|
int *consumed)
|
||||||
{
|
{
|
||||||
const char *starts = s;
|
const char *starts = s;
|
||||||
int startinpos;
|
int startinpos;
|
||||||
|
@ -1467,26 +1494,28 @@ PyUnicode_DecodeUTF16(const char *s,
|
||||||
mark is skipped, in all other modes, it is copied to the output
|
mark is skipped, in all other modes, it is copied to the output
|
||||||
stream as-is (giving a ZWNBSP character). */
|
stream as-is (giving a ZWNBSP character). */
|
||||||
if (bo == 0) {
|
if (bo == 0) {
|
||||||
const Py_UNICODE bom = (q[ihi] << 8) | q[ilo];
|
if (size >= 2) {
|
||||||
|
const Py_UNICODE bom = (q[ihi] << 8) | q[ilo];
|
||||||
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
|
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
|
||||||
if (bom == 0xFEFF) {
|
if (bom == 0xFEFF) {
|
||||||
q += 2;
|
q += 2;
|
||||||
bo = -1;
|
bo = -1;
|
||||||
}
|
}
|
||||||
else if (bom == 0xFFFE) {
|
else if (bom == 0xFFFE) {
|
||||||
q += 2;
|
q += 2;
|
||||||
bo = 1;
|
bo = 1;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (bom == 0xFEFF) {
|
if (bom == 0xFEFF) {
|
||||||
q += 2;
|
q += 2;
|
||||||
bo = 1;
|
bo = 1;
|
||||||
}
|
}
|
||||||
else if (bom == 0xFFFE) {
|
else if (bom == 0xFFFE) {
|
||||||
q += 2;
|
q += 2;
|
||||||
bo = -1;
|
bo = -1;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bo == -1) {
|
if (bo == -1) {
|
||||||
|
@ -1502,8 +1531,10 @@ PyUnicode_DecodeUTF16(const char *s,
|
||||||
|
|
||||||
while (q < e) {
|
while (q < e) {
|
||||||
Py_UNICODE ch;
|
Py_UNICODE ch;
|
||||||
/* remaing bytes at the end? (size should be even) */
|
/* remaining bytes at the end? (size should be even) */
|
||||||
if (e-q<2) {
|
if (e-q<2) {
|
||||||
|
if (consumed)
|
||||||
|
break;
|
||||||
errmsg = "truncated data";
|
errmsg = "truncated data";
|
||||||
startinpos = ((const char *)q)-starts;
|
startinpos = ((const char *)q)-starts;
|
||||||
endinpos = ((const char *)e)-starts;
|
endinpos = ((const char *)e)-starts;
|
||||||
|
@ -1565,6 +1596,9 @@ PyUnicode_DecodeUTF16(const char *s,
|
||||||
if (byteorder)
|
if (byteorder)
|
||||||
*byteorder = bo;
|
*byteorder = bo;
|
||||||
|
|
||||||
|
if (consumed)
|
||||||
|
*consumed = (const char *)q-starts;
|
||||||
|
|
||||||
/* Adjust length */
|
/* Adjust length */
|
||||||
if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)
|
if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)
|
||||||
goto onError;
|
goto onError;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue