Marc-Andre Lemburg:

Attached you find the latest update of the Unicode implementation.
The patch is against the current CVS version.

It includes the fix I posted yesterday for the core dump problem
in codecs.c (was introduced by my previous patch set -- sorry),
adds more tests for the codecs and two new parser markers
"es" and "es#".
This commit is contained in:
Guido van Rossum 2000-03-24 22:14:19 +00:00
parent 27fc3c05e1
commit d8855fde88
5 changed files with 259 additions and 6 deletions

View file

@ -293,3 +293,33 @@ else:
assert unicodedata.combining(u'\u20e1') == 230
print 'done.'
# Test builtin codecs
print 'Testing builtin codecs...',
assert unicode('hello','ascii') == u'hello'
assert unicode('hello','utf-8') == u'hello'
assert unicode('hello','utf8') == u'hello'
assert unicode('hello','latin-1') == u'hello'
assert u'hello'.encode('ascii') == 'hello'
assert u'hello'.encode('utf-8') == 'hello'
assert u'hello'.encode('utf8') == 'hello'
assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
assert u'hello'.encode('latin-1') == 'hello'
u = u''.join(map(unichr, range(1024)))
for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
assert unicode(u.encode(encoding),encoding) == u
u = u''.join(map(unichr, range(256)))
for encoding in ('latin-1',):
assert unicode(u.encode(encoding),encoding) == u
u = u''.join(map(unichr, range(128)))
for encoding in ('ascii',):
assert unicode(u.encode(encoding),encoding) == u
print 'done.'