mirror of
https://github.com/python/cpython.git
synced 2025-11-25 21:11:09 +00:00
The incremental decoder for utf-7 must preserve its state between calls.
Solves issue1460. Might not be a backport candidate: a new API function was added, and some code may rely on details in utf-7.py.
This commit is contained in:
parent
8c4592a77a
commit
5087980c1e
5 changed files with 61 additions and 23 deletions
|
|
@ -674,6 +674,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
|
|||
const char *errors /* error handling */
|
||||
);
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
|
||||
const char *string, /* UTF-7 encoded string */
|
||||
Py_ssize_t length, /* size of string */
|
||||
const char *errors, /* error handling */
|
||||
Py_ssize_t *consumed /* bytes consumed */
|
||||
);
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
|
||||
const Py_UNICODE *data, /* Unicode char buffer */
|
||||
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
|
||||
|
|
|
|||
|
|
@ -6,34 +6,31 @@ import codecs
|
|||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
# Note: Binding these as C functions will result in the class not
|
||||
# converting them to methods. This is intended.
|
||||
encode = codecs.utf_7_encode
|
||||
decode = codecs.utf_7_decode
|
||||
|
||||
def decode(input, errors='strict'):
|
||||
return codecs.utf_7_decode(input, errors, True)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.utf_7_encode(input, self.errors)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
|
||||
def _buffer_decode(self, input, errors, final):
|
||||
return codecs.utf_7_decode(input, self.errors)
|
||||
_buffer_decode = codecs.utf_7_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
class StreamWriter(codecs.StreamWriter):
|
||||
encode = codecs.utf_7_encode
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
class StreamReader(codecs.StreamReader):
|
||||
decode = codecs.utf_7_decode
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
return codecs.CodecInfo(
|
||||
name='utf-7',
|
||||
encode=Codec.encode,
|
||||
decode=Codec.decode,
|
||||
encode=encode,
|
||||
decode=decode,
|
||||
incrementalencoder=IncrementalEncoder,
|
||||
incrementaldecoder=IncrementalDecoder,
|
||||
streamreader=StreamReader,
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ class ReadTest(unittest.TestCase):
|
|||
self.assertEqual(d.decode("", True), u"")
|
||||
self.assertEqual(d.buffer, "")
|
||||
|
||||
# Check whether the rest method works properly
|
||||
# Check whether the reset method works properly
|
||||
d.reset()
|
||||
result = u""
|
||||
for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
|
||||
|
|
@ -491,7 +491,17 @@ class UTF8Test(ReadTest):
|
|||
class UTF7Test(ReadTest):
|
||||
encoding = "utf-7"
|
||||
|
||||
# No test_partial() yet, because UTF-7 doesn't support it.
|
||||
def test_partial(self):
|
||||
self.check_partial(
|
||||
u"a+-b",
|
||||
[
|
||||
u"a",
|
||||
u"a",
|
||||
u"a+",
|
||||
u"a+-",
|
||||
u"a+-b",
|
||||
]
|
||||
)
|
||||
|
||||
class UTF16ExTest(unittest.TestCase):
|
||||
|
||||
|
|
|
|||
|
|
@ -235,13 +235,20 @@ utf_7_decode(PyObject *self,
|
|||
const char *data;
|
||||
Py_ssize_t size;
|
||||
const char *errors = NULL;
|
||||
int final = 0;
|
||||
Py_ssize_t consumed;
|
||||
PyObject *decoded = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
|
||||
&data, &size, &errors))
|
||||
if (!PyArg_ParseTuple(args, "t#|zi:utf_7_decode",
|
||||
&data, &size, &errors, &final))
|
||||
return NULL;
|
||||
consumed = size;
|
||||
|
||||
return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
|
||||
size);
|
||||
decoded = PyUnicode_DecodeUTF7Stateful(data, size, errors,
|
||||
final ? NULL : &consumed);
|
||||
if (decoded == NULL)
|
||||
return NULL;
|
||||
return codec_tuple(decoded, consumed);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
|
|
|||
|
|
@ -943,6 +943,14 @@ char utf7_special[128] = {
|
|||
PyObject *PyUnicode_DecodeUTF7(const char *s,
|
||||
Py_ssize_t size,
|
||||
const char *errors)
|
||||
{
|
||||
return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL);
|
||||
}
|
||||
|
||||
PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
|
||||
Py_ssize_t size,
|
||||
const char *errors,
|
||||
Py_ssize_t *consumed)
|
||||
{
|
||||
const char *starts = s;
|
||||
Py_ssize_t startinpos;
|
||||
|
|
@ -962,8 +970,11 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
|
|||
unicode = _PyUnicode_New(size);
|
||||
if (!unicode)
|
||||
return NULL;
|
||||
if (size == 0)
|
||||
if (size == 0) {
|
||||
if (consumed)
|
||||
*consumed = 0;
|
||||
return (PyObject *)unicode;
|
||||
}
|
||||
|
||||
p = unicode->str;
|
||||
e = s + size;
|
||||
|
|
@ -1049,7 +1060,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
|
|||
goto onError;
|
||||
}
|
||||
|
||||
if (inShift) {
|
||||
if (inShift && !consumed) {
|
||||
outpos = p-PyUnicode_AS_UNICODE(unicode);
|
||||
endinpos = size;
|
||||
if (unicode_decode_call_errorhandler(
|
||||
|
|
@ -1061,6 +1072,12 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
|
|||
if (s < e)
|
||||
goto restart;
|
||||
}
|
||||
if (consumed) {
|
||||
if(inShift)
|
||||
*consumed = startinpos;
|
||||
else
|
||||
*consumed = s-starts;
|
||||
}
|
||||
|
||||
if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)) < 0)
|
||||
goto onError;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue