mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 03:44:55 +00:00 
			
		
		
		
	They only ignore the first byte of an invalid byte sequence.
For example, b'\xff\n'.decode('gb2312', 'replace') gives '\ufffd\n' instead of
'\ufffd'.
		
	
			
		
			
				
	
	
		
			74 lines
		
	
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			74 lines
		
	
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#!/usr/bin/env python3
 | 
						|
#
 | 
						|
# test_codecencodings_kr.py
 | 
						|
#   Codec encoding tests for ROK encodings.
 | 
						|
#
 | 
						|
 | 
						|
from test import support
 | 
						|
from test import test_multibytecodec_support
 | 
						|
import unittest
 | 
						|
 | 
						|
class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase):
 | 
						|
    encoding = 'cp949'
 | 
						|
    tstring = test_multibytecodec_support.load_teststring('cp949')
 | 
						|
    codectests = (
 | 
						|
        # invalid bytes
 | 
						|
        (b"abc\x80\x80\xc1\xc4", "strict",  None),
 | 
						|
        (b"abc\xc8", "strict",  None),
 | 
						|
        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\uc894"),
 | 
						|
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"),
 | 
						|
        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\uc894"),
 | 
						|
    )
 | 
						|
 | 
						|
class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
 | 
						|
    encoding = 'euc_kr'
 | 
						|
    tstring = test_multibytecodec_support.load_teststring('euc_kr')
 | 
						|
    codectests = (
 | 
						|
        # invalid bytes
 | 
						|
        (b"abc\x80\x80\xc1\xc4", "strict",  None),
 | 
						|
        (b"abc\xc8", "strict",  None),
 | 
						|
        (b"abc\x80\x80\xc1\xc4", "replace", 'abc\ufffd\ufffd\uc894'),
 | 
						|
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"),
 | 
						|
        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\uc894"),
 | 
						|
 | 
						|
        # composed make-up sequence errors
 | 
						|
        (b"\xa4\xd4", "strict", None),
 | 
						|
        (b"\xa4\xd4\xa4", "strict", None),
 | 
						|
        (b"\xa4\xd4\xa4\xb6", "strict", None),
 | 
						|
        (b"\xa4\xd4\xa4\xb6\xa4", "strict", None),
 | 
						|
        (b"\xa4\xd4\xa4\xb6\xa4\xd0", "strict", None),
 | 
						|
        (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4", "strict", None),
 | 
						|
        (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "strict", "\uc4d4"),
 | 
						|
        (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4x", "strict", "\uc4d4x"),
 | 
						|
        (b"a\xa4\xd4\xa4\xb6\xa4", "replace", 'a\ufffd'),
 | 
						|
        (b"\xa4\xd4\xa3\xb6\xa4\xd0\xa4\xd4", "strict", None),
 | 
						|
        (b"\xa4\xd4\xa4\xb6\xa3\xd0\xa4\xd4", "strict", None),
 | 
						|
        (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa3\xd4", "strict", None),
 | 
						|
        (b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", '\ufffd\u6e21\ufffd\u3160\ufffd'),
 | 
						|
        (b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", '\ufffd\u6e21\ub544\ufffd\ufffd'),
 | 
						|
        (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", '\ufffd\u6e21\ub544\u572d\ufffd'),
 | 
						|
        (b"\xa4\xd4\xff\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "replace", '\ufffd\ufffd\ufffd\uc4d4'),
 | 
						|
        (b"\xc1\xc4", "strict", "\uc894"),
 | 
						|
    )
 | 
						|
 | 
						|
class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
 | 
						|
    encoding = 'johab'
 | 
						|
    tstring = test_multibytecodec_support.load_teststring('johab')
 | 
						|
    codectests = (
 | 
						|
        # invalid bytes
 | 
						|
        (b"abc\x80\x80\xc1\xc4", "strict",  None),
 | 
						|
        (b"abc\xc8", "strict",  None),
 | 
						|
        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\ucd27"),
 | 
						|
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\ucd27\ufffd"),
 | 
						|
        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\ucd27"),
 | 
						|
        (b"\xD8abc", "replace",  "\uFFFDabc"),
 | 
						|
        (b"\xD8\xFFabc", "replace",  "\uFFFD\uFFFDabc"),
 | 
						|
        (b"\x84bxy", "replace",  "\uFFFDbxy"),
 | 
						|
        (b"\x8CBxy", "replace",  "\uFFFDBxy"),
 | 
						|
    )
 | 
						|
 | 
						|
def test_main():
 | 
						|
    support.run_unittest(__name__)
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    test_main()
 |