Issue #25301: The UTF-8 decoder is now up to 15 times as fast for error

handlers: ``ignore``, ``replace`` and ``surrogateescape``.
2025-10-15 03:10:29 +00:00 · 2015-10-05 13:43:50 +02:00 · 2015-10-05 13:43:50 +02:00 · 1d65d9192d
commit 1d65d9192d
parent 7dbe6dd963
4 changed files with 57 additions and 9 deletions
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@ -788,6 +788,18 @@ class UTF8Test(ReadTest, unittest.TestCase):
        self.check_state_handling_decode(self.encoding,
                                         u, u.encode(self.encoding))

+    def test_decode_error(self):
+        for data, error_handler, expected in (
+            (b'[\x80\xff]', 'ignore', '[]'),
+            (b'[\x80\xff]', 'replace', '[\ufffd\ufffd]'),
+            (b'[\x80\xff]', 'surrogateescape', '[\udc80\udcff]'),
+            (b'[\x80\xff]', 'backslashreplace', '[\\x80\\xff]'),
+        ):
+            with self.subTest(data=data, error_handler=error_handler,
+                              expected=expected):
+                self.assertEqual(data.decode(self.encoding, error_handler),
+                                 expected)
+
    def test_lone_surrogates(self):
        super().test_lone_surrogates()
        # not sure if this is making sense for