mirror of
https://github.com/python/cpython.git
synced 2025-12-11 03:20:01 +00:00
Fix issue #15899 - howto/unicode.rst doctest corrections for 2.7 branch.
This commit is contained in:
parent
a8a5b397c1
commit
769798363d
1 changed files with 17 additions and 15 deletions
|
|
@ -253,11 +253,11 @@ characters greater than 127 will be treated as errors::
|
||||||
>>> s = unicode('abcdef')
|
>>> s = unicode('abcdef')
|
||||||
>>> type(s)
|
>>> type(s)
|
||||||
<type 'unicode'>
|
<type 'unicode'>
|
||||||
>>> unicode('abcdef' + chr(255))
|
>>> unicode('abcdef' + chr(255)) #doctest: +NORMALIZE_WHITESPACE
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
File "<stdin>", line 1, in ?
|
...
|
||||||
UnicodeDecodeError: 'ascii' codec can't decode byte 0xff in position 6:
|
UnicodeDecodeError: 'ascii' codec can't decode byte 0xff in position 6:
|
||||||
ordinal not in range(128)
|
ordinal not in range(128)
|
||||||
|
|
||||||
The ``errors`` argument specifies the response when the input string can't be
|
The ``errors`` argument specifies the response when the input string can't be
|
||||||
converted according to the encoding's rules. Legal values for this argument are
|
converted according to the encoding's rules. Legal values for this argument are
|
||||||
|
|
@ -265,11 +265,11 @@ converted according to the encoding's rules. Legal values for this argument are
|
||||||
'REPLACEMENT CHARACTER'), or 'ignore' (just leave the character out of the
|
'REPLACEMENT CHARACTER'), or 'ignore' (just leave the character out of the
|
||||||
Unicode result). The following examples show the differences::
|
Unicode result). The following examples show the differences::
|
||||||
|
|
||||||
>>> unicode('\x80abc', errors='strict')
|
>>> unicode('\x80abc', errors='strict') #doctest: +NORMALIZE_WHITESPACE
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
File "<stdin>", line 1, in ?
|
...
|
||||||
UnicodeDecodeError: 'ascii' codec can't decode byte 0x80 in position 0:
|
UnicodeDecodeError: 'ascii' codec can't decode byte 0x80 in position 0:
|
||||||
ordinal not in range(128)
|
ordinal not in range(128)
|
||||||
>>> unicode('\x80abc', errors='replace')
|
>>> unicode('\x80abc', errors='replace')
|
||||||
u'\ufffdabc'
|
u'\ufffdabc'
|
||||||
>>> unicode('\x80abc', errors='ignore')
|
>>> unicode('\x80abc', errors='ignore')
|
||||||
|
|
@ -312,10 +312,11 @@ strings. 8-bit strings will be converted to Unicode before carrying out the
|
||||||
operation; Python's default ASCII encoding will be used, so characters greater
|
operation; Python's default ASCII encoding will be used, so characters greater
|
||||||
than 127 will cause an exception::
|
than 127 will cause an exception::
|
||||||
|
|
||||||
>>> s.find('Was\x9f')
|
>>> s.find('Was\x9f') #doctest: +NORMALIZE_WHITESPACE
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
File "<stdin>", line 1, in ?
|
...
|
||||||
UnicodeDecodeError: 'ascii' codec can't decode byte 0x9f in position 3: ordinal not in range(128)
|
UnicodeDecodeError: 'ascii' codec can't decode byte 0x9f in position 3:
|
||||||
|
ordinal not in range(128)
|
||||||
>>> s.find(u'Was\x9f')
|
>>> s.find(u'Was\x9f')
|
||||||
-1
|
-1
|
||||||
|
|
||||||
|
|
@ -333,10 +334,11 @@ character references. The following example shows the different results::
|
||||||
>>> u = unichr(40960) + u'abcd' + unichr(1972)
|
>>> u = unichr(40960) + u'abcd' + unichr(1972)
|
||||||
>>> u.encode('utf-8')
|
>>> u.encode('utf-8')
|
||||||
'\xea\x80\x80abcd\xde\xb4'
|
'\xea\x80\x80abcd\xde\xb4'
|
||||||
>>> u.encode('ascii')
|
>>> u.encode('ascii') #doctest: +NORMALIZE_WHITESPACE
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
File "<stdin>", line 1, in ?
|
...
|
||||||
UnicodeEncodeError: 'ascii' codec can't encode character '\ua000' in position 0: ordinal not in range(128)
|
UnicodeEncodeError: 'ascii' codec can't encode character u'\ua000' in
|
||||||
|
position 0: ordinal not in range(128)
|
||||||
>>> u.encode('ascii', 'ignore')
|
>>> u.encode('ascii', 'ignore')
|
||||||
'abcd'
|
'abcd'
|
||||||
>>> u.encode('ascii', 'replace')
|
>>> u.encode('ascii', 'replace')
|
||||||
|
|
@ -384,9 +386,9 @@ arbitrary code point. Octal escapes can go up to U+01ff, which is octal 777.
|
||||||
::
|
::
|
||||||
|
|
||||||
>>> s = u"a\xac\u1234\u20ac\U00008000"
|
>>> s = u"a\xac\u1234\u20ac\U00008000"
|
||||||
^^^^ two-digit hex escape
|
... # ^^^^ two-digit hex escape
|
||||||
^^^^^^ four-digit Unicode escape
|
... # ^^^^^^ four-digit Unicode escape
|
||||||
^^^^^^^^^^ eight-digit Unicode escape
|
... # ^^^^^^^^^^ eight-digit Unicode escape
|
||||||
>>> for c in s: print ord(c),
|
>>> for c in s: print ord(c),
|
||||||
...
|
...
|
||||||
97 172 4660 8364 32768
|
97 172 4660 8364 32768
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue