mirror of
https://github.com/python/cpython.git
synced 2025-10-14 10:53:40 +00:00
Rename the surrogates error handler to surrogatepass.
This commit is contained in:
parent
cf7925dfc6
commit
e0a2b72e61
6 changed files with 25 additions and 25 deletions
|
@ -327,15 +327,15 @@ and implemented by all standard Python codecs:
|
||||||
|
|
||||||
In addition, the following error handlers are specific to a single codec:
|
In addition, the following error handlers are specific to a single codec:
|
||||||
|
|
||||||
+------------------+---------+--------------------------------------------+
|
+-------------------+---------+-------------------------------------------+
|
||||||
| Value | Codec | Meaning |
|
| Value | Codec | Meaning |
|
||||||
+==================+=========+============================================+
|
+===================+=========+===========================================+
|
||||||
| ``'surrogates'`` | utf-8 | Allow encoding and decoding of surrogate |
|
|``'surrogatepass'``| utf-8 | Allow encoding and decoding of surrogate |
|
||||||
| | | codes in UTF-8. |
|
| | | codes in UTF-8. |
|
||||||
+------------------+---------+--------------------------------------------+
|
+-------------------+---------+-------------------------------------------+
|
||||||
|
|
||||||
.. versionadded:: 3.1
|
.. versionadded:: 3.1
|
||||||
The ``'utf8b'`` and ``'surrogates'`` error handlers.
|
The ``'utf8b'`` and ``'surrogatepass'`` error handlers.
|
||||||
|
|
||||||
The set of allowed values can be extended via :meth:`register_error`.
|
The set of allowed values can be extended via :meth:`register_error`.
|
||||||
|
|
||||||
|
|
|
@ -545,12 +545,12 @@ class UTF8Test(ReadTest):
|
||||||
self.assertRaises(UnicodeEncodeError, "\ud800".encode, "utf-8")
|
self.assertRaises(UnicodeEncodeError, "\ud800".encode, "utf-8")
|
||||||
self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "utf-8")
|
self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "utf-8")
|
||||||
|
|
||||||
def test_surrogates_handler(self):
|
def test_surrogatepass_handler(self):
|
||||||
self.assertEquals("abc\ud800def".encode("utf-8", "surrogates"),
|
self.assertEquals("abc\ud800def".encode("utf-8", "surrogatepass"),
|
||||||
b"abc\xed\xa0\x80def")
|
b"abc\xed\xa0\x80def")
|
||||||
self.assertEquals(b"abc\xed\xa0\x80def".decode("utf-8", "surrogates"),
|
self.assertEquals(b"abc\xed\xa0\x80def".decode("utf-8", "surrogatepass"),
|
||||||
"abc\ud800def")
|
"abc\ud800def")
|
||||||
self.assertTrue(codecs.lookup_error("surrogates"))
|
self.assertTrue(codecs.lookup_error("surrogatepass"))
|
||||||
|
|
||||||
class UTF7Test(ReadTest):
|
class UTF7Test(ReadTest):
|
||||||
encoding = "utf-7"
|
encoding = "utf-7"
|
||||||
|
@ -1040,12 +1040,12 @@ class NameprepTest(unittest.TestCase):
|
||||||
# Skipped
|
# Skipped
|
||||||
continue
|
continue
|
||||||
# The Unicode strings are given in UTF-8
|
# The Unicode strings are given in UTF-8
|
||||||
orig = str(orig, "utf-8", "surrogates")
|
orig = str(orig, "utf-8", "surrogatepass")
|
||||||
if prepped is None:
|
if prepped is None:
|
||||||
# Input contains prohibited characters
|
# Input contains prohibited characters
|
||||||
self.assertRaises(UnicodeError, nameprep, orig)
|
self.assertRaises(UnicodeError, nameprep, orig)
|
||||||
else:
|
else:
|
||||||
prepped = str(prepped, "utf-8", "surrogates")
|
prepped = str(prepped, "utf-8", "surrogatepass")
|
||||||
try:
|
try:
|
||||||
self.assertEquals(nameprep(orig), prepped)
|
self.assertEquals(nameprep(orig), prepped)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -906,10 +906,10 @@ class UnicodeTest(
|
||||||
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
|
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
|
||||||
self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82')
|
self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82')
|
||||||
self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96')
|
self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96')
|
||||||
self.assertEqual('\ud800'.encode('utf-8', 'surrogates'), b'\xed\xa0\x80')
|
self.assertEqual('\ud800'.encode('utf-8', 'surrogatepass'), b'\xed\xa0\x80')
|
||||||
self.assertEqual('\udc00'.encode('utf-8', 'surrogates'), b'\xed\xb0\x80')
|
self.assertEqual('\udc00'.encode('utf-8', 'surrogatepass'), b'\xed\xb0\x80')
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
('\ud800\udc02'*1000).encode('utf-8', 'surrogates'),
|
('\ud800\udc02'*1000).encode('utf-8', 'surrogatepass'),
|
||||||
b'\xf0\x90\x80\x82'*1000
|
b'\xf0\x90\x80\x82'*1000
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
|
|
|
@ -56,7 +56,7 @@ Core and Builtins
|
||||||
- Issue #4426: The UTF-7 decoder was too strict and didn't accept some legal
|
- Issue #4426: The UTF-7 decoder was too strict and didn't accept some legal
|
||||||
sequences. Patch by Nick Barnes and Victor Stinner.
|
sequences. Patch by Nick Barnes and Victor Stinner.
|
||||||
|
|
||||||
- Issue #3672: Reject surrogates in utf-8 codec; add surrogates error handler.
|
- Issue #3672: Reject surrogates in utf-8 codec; add surrogatepass error handler.
|
||||||
|
|
||||||
- Issue #5883: In the io module, the BufferedIOBase and TextIOBase ABCs have
|
- Issue #5883: In the io module, the BufferedIOBase and TextIOBase ABCs have
|
||||||
received a new method, detach(). detach() disconnects the underlying stream
|
received a new method, detach(). detach() disconnects the underlying stream
|
||||||
|
|
|
@ -751,7 +751,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
|
||||||
/* This handler is declared static until someone demonstrates
|
/* This handler is declared static until someone demonstrates
|
||||||
a need to call it directly. */
|
a need to call it directly. */
|
||||||
static PyObject *
|
static PyObject *
|
||||||
PyCodec_SurrogateErrors(PyObject *exc)
|
PyCodec_SurrogatePassErrors(PyObject *exc)
|
||||||
{
|
{
|
||||||
PyObject *restuple;
|
PyObject *restuple;
|
||||||
PyObject *object;
|
PyObject *object;
|
||||||
|
@ -935,9 +935,9 @@ static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
|
||||||
return PyCodec_BackslashReplaceErrors(exc);
|
return PyCodec_BackslashReplaceErrors(exc);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *surrogates_errors(PyObject *self, PyObject *exc)
|
static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc)
|
||||||
{
|
{
|
||||||
return PyCodec_SurrogateErrors(exc);
|
return PyCodec_SurrogatePassErrors(exc);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *utf8b_errors(PyObject *self, PyObject *exc)
|
static PyObject *utf8b_errors(PyObject *self, PyObject *exc)
|
||||||
|
@ -993,10 +993,10 @@ static int _PyCodecRegistry_Init(void)
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"surrogates",
|
"surrogatepass",
|
||||||
{
|
{
|
||||||
"surrogates",
|
"surrogatepass",
|
||||||
surrogates_errors,
|
surrogatepass_errors,
|
||||||
METH_O
|
METH_O
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
|
@ -314,7 +314,7 @@ w_object(PyObject *v, WFILE *p)
|
||||||
PyObject *utf8;
|
PyObject *utf8;
|
||||||
utf8 = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(v),
|
utf8 = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(v),
|
||||||
PyUnicode_GET_SIZE(v),
|
PyUnicode_GET_SIZE(v),
|
||||||
"surrogates");
|
"surrogatepass");
|
||||||
if (utf8 == NULL) {
|
if (utf8 == NULL) {
|
||||||
p->depth--;
|
p->depth--;
|
||||||
p->error = WFERR_UNMARSHALLABLE;
|
p->error = WFERR_UNMARSHALLABLE;
|
||||||
|
@ -809,7 +809,7 @@ r_object(RFILE *p)
|
||||||
retval = NULL;
|
retval = NULL;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
v = PyUnicode_DecodeUTF8(buffer, n, "surrogates");
|
v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
|
||||||
PyMem_DEL(buffer);
|
PyMem_DEL(buffer);
|
||||||
retval = v;
|
retval = v;
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue