mirror of
https://github.com/python/cpython.git
synced 2025-10-17 12:18:23 +00:00
Marc-Andre Lemburg:
Attached you find the latest update of the Unicode implementation. The patch is against the current CVS version. It includes the fix I posted yesterday for the core dump problem in codecs.c (was introduced by my previous patch set -- sorry), adds more tests for the codecs and two new parser markers "es" and "es#".
This commit is contained in:
parent
27fc3c05e1
commit
d8855fde88
5 changed files with 259 additions and 6 deletions
|
@ -293,3 +293,33 @@ else:
|
|||
assert unicodedata.combining(u'\u20e1') == 230
|
||||
|
||||
print 'done.'
|
||||
|
||||
# Test builtin codecs
|
||||
print 'Testing builtin codecs...',
|
||||
|
||||
assert unicode('hello','ascii') == u'hello'
|
||||
assert unicode('hello','utf-8') == u'hello'
|
||||
assert unicode('hello','utf8') == u'hello'
|
||||
assert unicode('hello','latin-1') == u'hello'
|
||||
|
||||
assert u'hello'.encode('ascii') == 'hello'
|
||||
assert u'hello'.encode('utf-8') == 'hello'
|
||||
assert u'hello'.encode('utf8') == 'hello'
|
||||
assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
|
||||
assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
|
||||
assert u'hello'.encode('latin-1') == 'hello'
|
||||
|
||||
u = u''.join(map(unichr, range(1024)))
|
||||
for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
|
||||
'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
|
||||
assert unicode(u.encode(encoding),encoding) == u
|
||||
|
||||
u = u''.join(map(unichr, range(256)))
|
||||
for encoding in ('latin-1',):
|
||||
assert unicode(u.encode(encoding),encoding) == u
|
||||
|
||||
u = u''.join(map(unichr, range(128)))
|
||||
for encoding in ('ascii',):
|
||||
assert unicode(u.encode(encoding),encoding) == u
|
||||
|
||||
print 'done.'
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue