mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 11:49:12 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			91 lines
		
	
	
	
		
			2.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			91 lines
		
	
	
	
		
			2.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from test.test_support import verbose, TestFailed, TestSkipped, verify
 | 
						|
import sys
 | 
						|
import os
 | 
						|
from unicodedata import normalize
 | 
						|
 | 
						|
TESTDATAFILE = "NormalizationTest-3.2.0" + os.extsep + "txt"
 | 
						|
 | 
						|
# This search allows using a build directory just inside the source
 | 
						|
# directory, and saving just one copy of the test data in the source
 | 
						|
# tree, rather than having a copy in each build directory.
 | 
						|
# There might be a better way to do this.
 | 
						|
 | 
						|
for path in [os.path.curdir, os.path.pardir]:
 | 
						|
    fn = os.path.join(path, TESTDATAFILE)
 | 
						|
    skip_expected = not os.path.exists(fn)
 | 
						|
    if not skip_expected:
 | 
						|
        TESTDATAFILE = fn
 | 
						|
        break
 | 
						|
 | 
						|
class RangeError:
 | 
						|
    pass
 | 
						|
 | 
						|
def NFC(str):
 | 
						|
    return normalize("NFC", str)
 | 
						|
 | 
						|
def NFKC(str):
 | 
						|
    return normalize("NFKC", str)
 | 
						|
 | 
						|
def NFD(str):
 | 
						|
    return normalize("NFD", str)
 | 
						|
 | 
						|
def NFKD(str):
 | 
						|
    return normalize("NFKD", str)
 | 
						|
 | 
						|
def unistr(data):
 | 
						|
    data = [int(x, 16) for x in data.split(" ")]
 | 
						|
    for x in data:
 | 
						|
        if x > sys.maxunicode:
 | 
						|
            raise RangeError
 | 
						|
    return u"".join([unichr(x) for x in data])
 | 
						|
 | 
						|
def test_main():
 | 
						|
    if skip_expected:
 | 
						|
        raise TestSkipped(TESTDATAFILE + " not found, download from " +
 | 
						|
                    "http://www.unicode.org/Public/3.2-Update/" + TESTDATAFILE)
 | 
						|
 | 
						|
    part1_data = {}
 | 
						|
    for line in open(TESTDATAFILE):
 | 
						|
        if '#' in line:
 | 
						|
            line = line.split('#')[0]
 | 
						|
        line = line.strip()
 | 
						|
        if not line:
 | 
						|
            continue
 | 
						|
        if line.startswith("@Part"):
 | 
						|
            part = line
 | 
						|
            continue
 | 
						|
        try:
 | 
						|
            c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
 | 
						|
        except RangeError:
 | 
						|
            # Skip unsupported characters
 | 
						|
            continue
 | 
						|
 | 
						|
        if verbose:
 | 
						|
            print line
 | 
						|
 | 
						|
        # Perform tests
 | 
						|
        verify(c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3), line)
 | 
						|
        verify(c4 ==  NFC(c4) ==  NFC(c5), line)
 | 
						|
        verify(c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3), line)
 | 
						|
        verify(c5 ==  NFD(c4) ==  NFD(c5), line)
 | 
						|
        verify(c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5),
 | 
						|
               line)
 | 
						|
        verify(c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5),
 | 
						|
               line)
 | 
						|
 | 
						|
        # Record part 1 data
 | 
						|
        if part == "@Part1":
 | 
						|
            part1_data[c1] = 1
 | 
						|
 | 
						|
    # Perform tests for all other data
 | 
						|
    for c in range(sys.maxunicode+1):
 | 
						|
        X = unichr(c)
 | 
						|
        if X in part1_data:
 | 
						|
            continue
 | 
						|
        assert X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c
 | 
						|
 | 
						|
    # Check for bug 834676
 | 
						|
    normalize('NFC',u'\ud55c\uae00')
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    test_main()
 |