mirror of
				https://github.com/python/cpython.git
				synced 2025-10-24 15:36:26 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			122 lines
		
	
	
	
		
			3.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			122 lines
		
	
	
	
		
			3.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import unittest
 | |
| from test.test_tools import skip_if_missing, imports_under_tool
 | |
| from test import support
 | |
| from test.support.hypothesis_helper import hypothesis
 | |
| 
 | |
| st = hypothesis.strategies
 | |
| given = hypothesis.given
 | |
| example = hypothesis.example
 | |
| 
 | |
| 
 | |
| skip_if_missing("unicode")
 | |
| with imports_under_tool("unicode"):
 | |
|     from dawg import Dawg, build_compression_dawg, lookup, inverse_lookup
 | |
| 
 | |
| 
 | |
| @st.composite
 | |
| def char_name_db(draw, min_length=1, max_length=30):
 | |
|     m = draw(st.integers(min_value=min_length, max_value=max_length))
 | |
|     names = draw(
 | |
|         st.sets(st.text("abcd", min_size=1, max_size=10), min_size=m, max_size=m)
 | |
|     )
 | |
|     characters = draw(st.sets(st.characters(), min_size=m, max_size=m))
 | |
|     return list(zip(names, characters))
 | |
| 
 | |
| 
 | |
| class TestDawg(unittest.TestCase):
 | |
|     """Tests for the directed acyclic word graph data structure that is used
 | |
|     to store the unicode character names in unicodedata. Tests ported from PyPy
 | |
|     """
 | |
| 
 | |
|     def test_dawg_direct_simple(self):
 | |
|         dawg = Dawg()
 | |
|         dawg.insert("a", -4)
 | |
|         dawg.insert("c", -2)
 | |
|         dawg.insert("cat", -1)
 | |
|         dawg.insert("catarr", 0)
 | |
|         dawg.insert("catnip", 1)
 | |
|         dawg.insert("zcatnip", 5)
 | |
|         packed, data, inverse = dawg.finish()
 | |
| 
 | |
|         self.assertEqual(lookup(packed, data, b"a"), -4)
 | |
|         self.assertEqual(lookup(packed, data, b"c"), -2)
 | |
|         self.assertEqual(lookup(packed, data, b"cat"), -1)
 | |
|         self.assertEqual(lookup(packed, data, b"catarr"), 0)
 | |
|         self.assertEqual(lookup(packed, data, b"catnip"), 1)
 | |
|         self.assertEqual(lookup(packed, data, b"zcatnip"), 5)
 | |
|         self.assertRaises(KeyError, lookup, packed, data, b"b")
 | |
|         self.assertRaises(KeyError, lookup, packed, data, b"catni")
 | |
|         self.assertRaises(KeyError, lookup, packed, data, b"catnipp")
 | |
| 
 | |
|         self.assertEqual(inverse_lookup(packed, inverse, -4), b"a")
 | |
|         self.assertEqual(inverse_lookup(packed, inverse, -2), b"c")
 | |
|         self.assertEqual(inverse_lookup(packed, inverse, -1), b"cat")
 | |
|         self.assertEqual(inverse_lookup(packed, inverse, 0), b"catarr")
 | |
|         self.assertEqual(inverse_lookup(packed, inverse, 1), b"catnip")
 | |
|         self.assertEqual(inverse_lookup(packed, inverse, 5), b"zcatnip")
 | |
|         self.assertRaises(KeyError, inverse_lookup, packed, inverse, 12)
 | |
| 
 | |
|     def test_forbid_empty_dawg(self):
 | |
|         dawg = Dawg()
 | |
|         self.assertRaises(ValueError, dawg.finish)
 | |
| 
 | |
|     @given(char_name_db())
 | |
|     @example([("abc", "a"), ("abd", "b")])
 | |
|     @example(
 | |
|         [
 | |
|             ("bab", "1"),
 | |
|             ("a", ":"),
 | |
|             ("ad", "@"),
 | |
|             ("b", "<"),
 | |
|             ("aacc", "?"),
 | |
|             ("dab", "D"),
 | |
|             ("aa", "0"),
 | |
|             ("ab", "F"),
 | |
|             ("aaa", "7"),
 | |
|             ("cbd", "="),
 | |
|             ("abad", ";"),
 | |
|             ("ac", "B"),
 | |
|             ("abb", "4"),
 | |
|             ("bb", "2"),
 | |
|             ("aab", "9"),
 | |
|             ("caaaaba", "E"),
 | |
|             ("ca", ">"),
 | |
|             ("bbaaa", "5"),
 | |
|             ("d", "3"),
 | |
|             ("baac", "8"),
 | |
|             ("c", "6"),
 | |
|             ("ba", "A"),
 | |
|         ]
 | |
|     )
 | |
|     @example(
 | |
|         [
 | |
|             ("bcdac", "9"),
 | |
|             ("acc", "g"),
 | |
|             ("d", "d"),
 | |
|             ("daabdda", "0"),
 | |
|             ("aba", ";"),
 | |
|             ("c", "6"),
 | |
|             ("aa", "7"),
 | |
|             ("abbd", "c"),
 | |
|             ("badbd", "?"),
 | |
|             ("bbd", "f"),
 | |
|             ("cc", "@"),
 | |
|             ("bb", "8"),
 | |
|             ("daca", ">"),
 | |
|             ("ba", ":"),
 | |
|             ("baac", "3"),
 | |
|             ("dbdddac", "a"),
 | |
|             ("a", "2"),
 | |
|             ("cabd", "b"),
 | |
|             ("b", "="),
 | |
|             ("abd", "4"),
 | |
|             ("adcbd", "5"),
 | |
|             ("abc", "e"),
 | |
|             ("ab", "1"),
 | |
|         ]
 | |
|     )
 | |
|     def test_dawg(self, data):
 | |
|         # suppress debug prints
 | |
|         with support.captured_stdout() as output:
 | |
|             # it's enough to build it, building will also check the result
 | |
|             build_compression_dawg(data)
 | 
