mirror of
https://github.com/python/cpython.git
synced 2025-07-24 11:44:31 +00:00
- SF #962502: Add two more methods for unicode type; width() and
iswide() for east asian width manipulation. (Inspired by David Goodger, Reviewed by Martin v. Loewis) - Move _PyUnicode_TypeRecord.flags to the end of the struct so that no padding is added for UCS-4 builds. (Suggested by Martin v. Loewis)
This commit is contained in:
parent
b6568b91fd
commit
974ed7cfa5
11 changed files with 683 additions and 459 deletions
|
@ -18,6 +18,7 @@
|
|||
# 2002-10-22 mvl generate NFC tables
|
||||
# 2002-11-24 mvl expand all ranges, sort names version-independently
|
||||
# 2002-11-25 mvl add UNIDATA_VERSION
|
||||
# 2004-05-29 perky add east asian width information
|
||||
#
|
||||
# written by Fredrik Lundh (fredrik@pythonware.com)
|
||||
#
|
||||
|
@ -25,12 +26,13 @@
|
|||
import sys
|
||||
|
||||
SCRIPT = sys.argv[0]
|
||||
VERSION = "2.2"
|
||||
VERSION = "2.3"
|
||||
|
||||
# The Unicode Database
|
||||
UNIDATA_VERSION = "3.2.0"
|
||||
UNICODE_DATA = "UnicodeData.txt"
|
||||
COMPOSITION_EXCLUSIONS = "CompositionExclusions.txt"
|
||||
EASTASIAN_WIDTH = "EastAsianWidth.txt"
|
||||
|
||||
CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd",
|
||||
"Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", "Cs", "Co", "Cn", "Lm",
|
||||
|
@ -50,12 +52,14 @@ LINEBREAK_MASK = 0x10
|
|||
SPACE_MASK = 0x20
|
||||
TITLE_MASK = 0x40
|
||||
UPPER_MASK = 0x80
|
||||
WIDE_MASK = 0x100
|
||||
|
||||
def maketables(trace=0):
|
||||
|
||||
print "--- Reading", UNICODE_DATA, "..."
|
||||
|
||||
unicode = UnicodeData(UNICODE_DATA, COMPOSITION_EXCLUSIONS)
|
||||
unicode = UnicodeData(UNICODE_DATA, COMPOSITION_EXCLUSIONS,
|
||||
EASTASIAN_WIDTH)
|
||||
|
||||
print len(filter(None, unicode.table)), "characters"
|
||||
|
||||
|
@ -330,8 +334,10 @@ def makeunicodetype(unicode, trace):
|
|||
if record[7]:
|
||||
flags |= DIGIT_MASK
|
||||
digit = int(record[7])
|
||||
if record[15] in ('W', 'F'): # Wide or Full width
|
||||
flags |= WIDE_MASK
|
||||
item = (
|
||||
flags, upper, lower, title, decimal, digit
|
||||
upper, lower, title, decimal, digit, flags
|
||||
)
|
||||
# add entry to index and item tables
|
||||
i = cache.get(item)
|
||||
|
@ -538,7 +544,7 @@ import sys
|
|||
|
||||
class UnicodeData:
|
||||
|
||||
def __init__(self, filename, exclusions, expand=1):
|
||||
def __init__(self, filename, exclusions, eastasianwidth, expand=1):
|
||||
file = open(filename)
|
||||
table = [None] * 0x110000
|
||||
while 1:
|
||||
|
@ -581,6 +587,25 @@ class UnicodeData:
|
|||
char = int(s.split()[0],16)
|
||||
self.exclusions[char] = 1
|
||||
|
||||
widths = [None] * 0x110000
|
||||
for s in open(eastasianwidth):
|
||||
s = s.strip()
|
||||
if not s:
|
||||
continue
|
||||
if s[0] == '#':
|
||||
continue
|
||||
s = s.split()[0].split(';')
|
||||
if '..' in s[0]:
|
||||
first, last = [int(c, 16) for c in s[0].split('..')]
|
||||
chars = range(first, last+1)
|
||||
else:
|
||||
chars = [int(s[0], 16)]
|
||||
for char in chars:
|
||||
widths[char] = s[1]
|
||||
for i in range(0, 0x110000):
|
||||
if table[i] is not None:
|
||||
table[i].append(widths[i])
|
||||
|
||||
def uselatin1(self):
|
||||
# restrict character range to ISO Latin 1
|
||||
self.chars = range(256)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue