mirror of
https://github.com/python/cpython.git
synced 2025-11-03 19:34:08 +00:00
unicode database compression, step 3:
- added decimal digit and digit properties to the unidb tables
This commit is contained in:
parent
858346e484
commit
0f8fad4969
1 changed files with 19 additions and 4 deletions
|
|
@ -8,6 +8,7 @@
|
||||||
# 2000-09-24 fl created (based on bits and pieces from unidb)
|
# 2000-09-24 fl created (based on bits and pieces from unidb)
|
||||||
# 2000-09-25 fl merged tim's splitbin fixes, separate decomposition table
|
# 2000-09-25 fl merged tim's splitbin fixes, separate decomposition table
|
||||||
# 2000-09-25 fl added character type table
|
# 2000-09-25 fl added character type table
|
||||||
|
# 2000-09-26 fl added LINEBREAK flags
|
||||||
#
|
#
|
||||||
# written by Fredrik Lundh (fredrik@pythonware.com), September 2000
|
# written by Fredrik Lundh (fredrik@pythonware.com), September 2000
|
||||||
#
|
#
|
||||||
|
|
@ -28,11 +29,12 @@ BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO",
|
||||||
"PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS",
|
"PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS",
|
||||||
"ON" ]
|
"ON" ]
|
||||||
|
|
||||||
|
# note: should match definitions in Objects/unicodectype.c
|
||||||
ALPHA_MASK = 0x01
|
ALPHA_MASK = 0x01
|
||||||
DECIMAL_MASK = 0x02
|
DECIMAL_MASK = 0x02
|
||||||
DIGIT_MASK = 0x04
|
DIGIT_MASK = 0x04
|
||||||
LOWER_MASK = 0x08
|
LOWER_MASK = 0x08
|
||||||
NUMERIC_MASK = 0x10
|
LINEBREAK_MASK = 0x10
|
||||||
SPACE_MASK = 0x20
|
SPACE_MASK = 0x20
|
||||||
TITLE_MASK = 0x40
|
TITLE_MASK = 0x40
|
||||||
UPPER_MASK = 0x80
|
UPPER_MASK = 0x80
|
||||||
|
|
@ -144,7 +146,7 @@ def maketables():
|
||||||
# 3) unicode type data
|
# 3) unicode type data
|
||||||
|
|
||||||
# extract unicode types
|
# extract unicode types
|
||||||
dummy = (0, 0, 0, 0)
|
dummy = (0, 0, 0, 0, 0, 0)
|
||||||
table = [dummy]
|
table = [dummy]
|
||||||
cache = {0: dummy}
|
cache = {0: dummy}
|
||||||
index = [0] * len(unicode.chars)
|
index = [0] * len(unicode.chars)
|
||||||
|
|
@ -160,6 +162,8 @@ def maketables():
|
||||||
flags |= ALPHA_MASK
|
flags |= ALPHA_MASK
|
||||||
if category == "Ll":
|
if category == "Ll":
|
||||||
flags |= LOWER_MASK
|
flags |= LOWER_MASK
|
||||||
|
if category == "Zl" or bidirectional == "B":
|
||||||
|
flags |= LINEBREAK_MASK
|
||||||
if category == "Zs" or bidirectional in ("WS", "B", "S"):
|
if category == "Zs" or bidirectional in ("WS", "B", "S"):
|
||||||
flags |= SPACE_MASK
|
flags |= SPACE_MASK
|
||||||
if category in ["Lt", "Lu"]:
|
if category in ["Lt", "Lu"]:
|
||||||
|
|
@ -179,8 +183,17 @@ def maketables():
|
||||||
title = (int(record[14], 16) - char) & 0xffff
|
title = (int(record[14], 16) - char) & 0xffff
|
||||||
else:
|
else:
|
||||||
title = 0
|
title = 0
|
||||||
|
# decimal digit, integer digit
|
||||||
|
decimal = 0
|
||||||
|
if record[6]:
|
||||||
|
flags |= DECIMAL_MASK
|
||||||
|
decimal = int(record[6])
|
||||||
|
digit = 0
|
||||||
|
if record[7]:
|
||||||
|
flags |= DIGIT_MASK
|
||||||
|
digit = int(record[7])
|
||||||
item = (
|
item = (
|
||||||
flags, upper, lower, title
|
flags, upper, lower, title, decimal, digit
|
||||||
)
|
)
|
||||||
# add entry to index and item tables
|
# add entry to index and item tables
|
||||||
i = cache.get(item)
|
i = cache.get(item)
|
||||||
|
|
@ -189,6 +202,8 @@ def maketables():
|
||||||
table.append(item)
|
table.append(item)
|
||||||
index[char] = i
|
index[char] = i
|
||||||
|
|
||||||
|
print len(table), "ctype entries"
|
||||||
|
|
||||||
FILE = "Objects/unicodetype_db.h"
|
FILE = "Objects/unicodetype_db.h"
|
||||||
|
|
||||||
sys.stdout = open(FILE, "w")
|
sys.stdout = open(FILE, "w")
|
||||||
|
|
@ -198,7 +213,7 @@ def maketables():
|
||||||
print "/* a list of unique character type descriptors */"
|
print "/* a list of unique character type descriptors */"
|
||||||
print "const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {"
|
print "const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {"
|
||||||
for item in table:
|
for item in table:
|
||||||
print " {%d, %d, %d, %d}," % item
|
print " {%d, %d, %d, %d, %d, %d}," % item
|
||||||
print "};"
|
print "};"
|
||||||
print
|
print
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue