#2630: Implement PEP 3138.

The repr() of a string now contains printable Unicode characters unescaped.
The new ascii() builtin can be used to get a repr() with only ASCII characters in it.

PEP and patch were written by Atsuo Ishimoto.
This commit is contained in:
Georg Brandl 2008-06-11 18:37:52 +00:00
parent ea6d58d9d3
commit 559e5d7f4d
25 changed files with 1271 additions and 974 deletions

View file

@ -20,6 +20,7 @@
# 2002-11-25 mvl add UNIDATA_VERSION
# 2004-05-29 perky add east asian width information
# 2006-03-10 mvl update to Unicode 4.1; add UCD 3.2 delta
# 2008-06-11 gb add NONPRINTABLE_MASK for Atsuo Ishimoto's ascii() patch
#
# written by Fredrik Lundh (fredrik@pythonware.com)
#
@ -60,6 +61,7 @@ TITLE_MASK = 0x40
UPPER_MASK = 0x80
XID_START_MASK = 0x100
XID_CONTINUE_MASK = 0x200
NONPRINTABLE_MASK = 0x400
def maketables(trace=0):
@ -71,7 +73,7 @@ def maketables(trace=0):
EASTASIAN_WIDTH % version,
DERIVED_CORE_PROPERTIES % version)
print(len(filter(None, unicode.table)), "characters")
print(len(list(filter(None, unicode.table))), "characters")
for version in old_versions:
print("--- Reading", UNICODE_DATA % ("-"+version), "...")
@ -79,7 +81,7 @@ def maketables(trace=0):
COMPOSITION_EXCLUSIONS % ("-"+version),
EASTASIAN_WIDTH % ("-"+version),
DERIVED_CORE_PROPERTIES % ("-"+version))
print(len(filter(None, old_unicode.table)), "characters")
print(len(list(filter(None, old_unicode.table))), "characters")
merge_old_version(version, unicode, old_unicode)
makeunicodename(unicode, trace)
@ -371,6 +373,10 @@ def makeunicodetype(unicode, trace):
flags |= TITLE_MASK
if category == "Lu":
flags |= UPPER_MASK
if category[0] == "C":
flags |= NONPRINTABLE_MASK
if category[0] == "Z" and char != " ":
flags |= NONPRINTABLE_MASK
if "XID_Start" in properties:
flags |= XID_START_MASK
if "XID_Continue" in properties:
@ -465,7 +471,7 @@ def makeunicodename(unicode, trace):
if name and name[0] != "<":
names[char] = name + chr(0)
print(len(n for n in names if n is not None), "distinct names")
print(len(list(n for n in names if n is not None)), "distinct names")
# collect unique words from names (note that we differ between
# words inside a sentence, and words ending a sentence. the