Add a workaround for a problem that UTF-8 strings can be corrupted

or broken by basic ctype functions in 4.4BSD descendants.  This
will be fixed in their future development branches but they'll keep
the POSIX-incompatibility for their backward-compatiblities in near
future.
This commit is contained in:
Hye-Shik Chang 2004-08-04 06:33:51 +00:00
parent 6db15d7307
commit b5047fd019
3 changed files with 71 additions and 0 deletions

View file

@ -47,3 +47,38 @@ try:
locale.getpreferredencoding()
finally:
locale.setlocale(locale.LC_NUMERIC, oldlocale)
# Test BSD Rune locale's bug for isctype functions.
def teststrop(s, method, output):
if verbose:
print "%s.%s() =? %s ..." % (repr(s), method, repr(output)),
result = getattr(s, method)()
if result != output:
if verbose:
print "no"
print "%s.%s() == %s != %s" % (repr(s), method, repr(result),
repr(output))
elif verbose:
print "yes"
try:
oldlocale = locale.setlocale(locale.LC_CTYPE)
locale.setlocale(locale.LC_CTYPE, 'en_US.UTF-8')
except locale.Error:
pass
else:
try:
teststrop('\x20', 'isspace', True)
teststrop('\xa0', 'isspace', False)
teststrop('\xa1', 'isspace', False)
teststrop('\xc0', 'isalpha', False)
teststrop('\xc0', 'isalnum', False)
teststrop('\xc0', 'isupper', False)
teststrop('\xc0', 'islower', False)
teststrop('\xec\xa0\xbc', 'split', ['\xec\xa0\xbc'])
teststrop('\xed\x95\xa0', 'strip', '\xed\x95\xa0')
teststrop('\xcc\x85', 'lower', '\xcc\x85')
teststrop('\xed\x95\xa0', 'upper', '\xed\x95\xa0')
finally:
locale.setlocale(locale.LC_CTYPE, oldlocale)