mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Normalize the encoding names for Latin-1 and UTF-8 to
'latin-1' and 'utf-8'. These are optimized in the Python Unicode implementation to result in more direct processing, bypassing the codec registry. Also see issue11303.
This commit is contained in:
parent
a391b11320
commit
8f36af7a4c
32 changed files with 84 additions and 79 deletions
|
@ -75,7 +75,7 @@ class async_chat (asyncore.dispatcher):
|
||||||
# sign of an application bug that we don't want to pass silently
|
# sign of an application bug that we don't want to pass silently
|
||||||
|
|
||||||
use_encoding = 0
|
use_encoding = 0
|
||||||
encoding = 'latin1'
|
encoding = 'latin-1'
|
||||||
|
|
||||||
def __init__ (self, sock=None, map=None):
|
def __init__ (self, sock=None, map=None):
|
||||||
# for string terminator matching
|
# for string terminator matching
|
||||||
|
|
|
@ -263,11 +263,11 @@ class bdist_wininst(Command):
|
||||||
cfgdata = cfgdata + b"\0"
|
cfgdata = cfgdata + b"\0"
|
||||||
if self.pre_install_script:
|
if self.pre_install_script:
|
||||||
# We need to normalize newlines, so we open in text mode and
|
# We need to normalize newlines, so we open in text mode and
|
||||||
# convert back to bytes. "latin1" simply avoids any possible
|
# convert back to bytes. "latin-1" simply avoids any possible
|
||||||
# failures.
|
# failures.
|
||||||
with open(self.pre_install_script, "r",
|
with open(self.pre_install_script, "r",
|
||||||
encoding="latin1") as script:
|
encoding="latin-1") as script:
|
||||||
script_data = script.read().encode("latin1")
|
script_data = script.read().encode("latin-1")
|
||||||
cfgdata = cfgdata + script_data + b"\n\0"
|
cfgdata = cfgdata + script_data + b"\n\0"
|
||||||
else:
|
else:
|
||||||
# empty pre-install script
|
# empty pre-install script
|
||||||
|
|
|
@ -100,7 +100,7 @@ class FTP:
|
||||||
file = None
|
file = None
|
||||||
welcome = None
|
welcome = None
|
||||||
passiveserver = 1
|
passiveserver = 1
|
||||||
encoding = "latin1"
|
encoding = "latin-1"
|
||||||
|
|
||||||
# Initialization method (called by class instantiation).
|
# Initialization method (called by class instantiation).
|
||||||
# Initialize host to localhost, port to standard ftp port
|
# Initialize host to localhost, port to standard ftp port
|
||||||
|
|
|
@ -697,7 +697,7 @@ class HTTPConnection:
|
||||||
self.send(connect_bytes)
|
self.send(connect_bytes)
|
||||||
for header, value in self._tunnel_headers.items():
|
for header, value in self._tunnel_headers.items():
|
||||||
header_str = "%s: %s\r\n" % (header, value)
|
header_str = "%s: %s\r\n" % (header, value)
|
||||||
header_bytes = header_str.encode("latin1")
|
header_bytes = header_str.encode("latin-1")
|
||||||
self.send(header_bytes)
|
self.send(header_bytes)
|
||||||
self.send(b'\r\n')
|
self.send(b'\r\n')
|
||||||
|
|
||||||
|
@ -937,7 +937,7 @@ class HTTPConnection:
|
||||||
values = list(values)
|
values = list(values)
|
||||||
for i, one_value in enumerate(values):
|
for i, one_value in enumerate(values):
|
||||||
if hasattr(one_value, 'encode'):
|
if hasattr(one_value, 'encode'):
|
||||||
values[i] = one_value.encode('latin1')
|
values[i] = one_value.encode('latin-1')
|
||||||
elif isinstance(one_value, int):
|
elif isinstance(one_value, int):
|
||||||
values[i] = str(one_value).encode('ascii')
|
values[i] = str(one_value).encode('ascii')
|
||||||
value = b'\r\n\t'.join(values)
|
value = b'\r\n\t'.join(values)
|
||||||
|
|
|
@ -448,7 +448,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
|
||||||
message = ''
|
message = ''
|
||||||
if self.request_version != 'HTTP/0.9':
|
if self.request_version != 'HTTP/0.9':
|
||||||
self.wfile.write(("%s %d %s\r\n" %
|
self.wfile.write(("%s %d %s\r\n" %
|
||||||
(self.protocol_version, code, message)).encode('latin1', 'strict'))
|
(self.protocol_version, code, message)).encode('latin-1', 'strict'))
|
||||||
|
|
||||||
def send_header(self, keyword, value):
|
def send_header(self, keyword, value):
|
||||||
"""Send a MIME header."""
|
"""Send a MIME header."""
|
||||||
|
@ -456,7 +456,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
|
||||||
if not hasattr(self, '_headers_buffer'):
|
if not hasattr(self, '_headers_buffer'):
|
||||||
self._headers_buffer = []
|
self._headers_buffer = []
|
||||||
self._headers_buffer.append(
|
self._headers_buffer.append(
|
||||||
("%s: %s\r\n" % (keyword, value)).encode('latin1', 'strict'))
|
("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
|
||||||
|
|
||||||
if keyword.lower() == 'connection':
|
if keyword.lower() == 'connection':
|
||||||
if value.lower() == 'close':
|
if value.lower() == 'close':
|
||||||
|
|
|
@ -434,10 +434,10 @@ class ConnectionWrapper(object):
|
||||||
return self._loads(s)
|
return self._loads(s)
|
||||||
|
|
||||||
def _xml_dumps(obj):
|
def _xml_dumps(obj):
|
||||||
return xmlrpclib.dumps((obj,), None, None, None, 1).encode('utf8')
|
return xmlrpclib.dumps((obj,), None, None, None, 1).encode('utf-8')
|
||||||
|
|
||||||
def _xml_loads(s):
|
def _xml_loads(s):
|
||||||
(obj,), method = xmlrpclib.loads(s.decode('utf8'))
|
(obj,), method = xmlrpclib.loads(s.decode('utf-8'))
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
class XmlListener(Listener):
|
class XmlListener(Listener):
|
||||||
|
|
|
@ -114,7 +114,7 @@ class Random(_random.Random):
|
||||||
if version == 2:
|
if version == 2:
|
||||||
if isinstance(a, (str, bytes, bytearray)):
|
if isinstance(a, (str, bytes, bytearray)):
|
||||||
if isinstance(a, str):
|
if isinstance(a, str):
|
||||||
a = a.encode("utf8")
|
a = a.encode("utf-8")
|
||||||
a += _sha512(a).digest()
|
a += _sha512(a).digest()
|
||||||
a = int.from_bytes(a, 'big')
|
a = int.from_bytes(a, 'big')
|
||||||
|
|
||||||
|
|
|
@ -275,7 +275,7 @@ class SMTPChannel(asynchat.async_chat):
|
||||||
return
|
return
|
||||||
elif limit:
|
elif limit:
|
||||||
self.num_bytes += len(data)
|
self.num_bytes += len(data)
|
||||||
self.received_lines.append(str(data, "utf8"))
|
self.received_lines.append(str(data, "utf-8"))
|
||||||
|
|
||||||
# Implementation of base class abstract method
|
# Implementation of base class abstract method
|
||||||
def found_terminator(self):
|
def found_terminator(self):
|
||||||
|
|
|
@ -85,7 +85,7 @@ class DeclTypesTests(unittest.TestCase):
|
||||||
if isinstance(_val, bytes):
|
if isinstance(_val, bytes):
|
||||||
# sqlite3 always calls __init__ with a bytes created from a
|
# sqlite3 always calls __init__ with a bytes created from a
|
||||||
# UTF-8 string when __conform__ was used to store the object.
|
# UTF-8 string when __conform__ was used to store the object.
|
||||||
_val = _val.decode('utf8')
|
_val = _val.decode('utf-8')
|
||||||
self.val = _val
|
self.val = _val
|
||||||
|
|
||||||
def __cmp__(self, other):
|
def __cmp__(self, other):
|
||||||
|
|
|
@ -791,7 +791,7 @@ def parse_template(source, pattern):
|
||||||
else:
|
else:
|
||||||
# The tokenizer implicitly decodes bytes objects as latin-1, we must
|
# The tokenizer implicitly decodes bytes objects as latin-1, we must
|
||||||
# therefore re-encode the final representation.
|
# therefore re-encode the final representation.
|
||||||
encode = lambda x: x.encode('latin1')
|
encode = lambda x: x.encode('latin-1')
|
||||||
for c, s in p:
|
for c, s in p:
|
||||||
if c is MARK:
|
if c is MARK:
|
||||||
groupsappend((i, s))
|
groupsappend((i, s))
|
||||||
|
|
|
@ -1084,7 +1084,7 @@ class TarInfo(object):
|
||||||
def create_pax_global_header(cls, pax_headers):
|
def create_pax_global_header(cls, pax_headers):
|
||||||
"""Return the object as a pax global header block sequence.
|
"""Return the object as a pax global header block sequence.
|
||||||
"""
|
"""
|
||||||
return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8")
|
return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
|
||||||
|
|
||||||
def _posix_split_name(self, name):
|
def _posix_split_name(self, name):
|
||||||
"""Split a name longer than 100 chars into a prefix
|
"""Split a name longer than 100 chars into a prefix
|
||||||
|
@ -1167,7 +1167,7 @@ class TarInfo(object):
|
||||||
binary = False
|
binary = False
|
||||||
for keyword, value in pax_headers.items():
|
for keyword, value in pax_headers.items():
|
||||||
try:
|
try:
|
||||||
value.encode("utf8", "strict")
|
value.encode("utf-8", "strict")
|
||||||
except UnicodeEncodeError:
|
except UnicodeEncodeError:
|
||||||
binary = True
|
binary = True
|
||||||
break
|
break
|
||||||
|
@ -1178,13 +1178,13 @@ class TarInfo(object):
|
||||||
records += b"21 hdrcharset=BINARY\n"
|
records += b"21 hdrcharset=BINARY\n"
|
||||||
|
|
||||||
for keyword, value in pax_headers.items():
|
for keyword, value in pax_headers.items():
|
||||||
keyword = keyword.encode("utf8")
|
keyword = keyword.encode("utf-8")
|
||||||
if binary:
|
if binary:
|
||||||
# Try to restore the original byte representation of `value'.
|
# Try to restore the original byte representation of `value'.
|
||||||
# Needless to say, that the encoding must match the string.
|
# Needless to say, that the encoding must match the string.
|
||||||
value = value.encode(encoding, "surrogateescape")
|
value = value.encode(encoding, "surrogateescape")
|
||||||
else:
|
else:
|
||||||
value = value.encode("utf8")
|
value = value.encode("utf-8")
|
||||||
|
|
||||||
l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
|
l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
|
||||||
n = p = 0
|
n = p = 0
|
||||||
|
@ -1393,7 +1393,7 @@ class TarInfo(object):
|
||||||
# the translation to UTF-8 fails.
|
# the translation to UTF-8 fails.
|
||||||
match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
|
match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
|
||||||
if match is not None:
|
if match is not None:
|
||||||
pax_headers["hdrcharset"] = match.group(1).decode("utf8")
|
pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
|
||||||
|
|
||||||
# For the time being, we don't care about anything other than "BINARY".
|
# For the time being, we don't care about anything other than "BINARY".
|
||||||
# The only other value that is currently allowed by the standard is
|
# The only other value that is currently allowed by the standard is
|
||||||
|
@ -1402,7 +1402,7 @@ class TarInfo(object):
|
||||||
if hdrcharset == "BINARY":
|
if hdrcharset == "BINARY":
|
||||||
encoding = tarfile.encoding
|
encoding = tarfile.encoding
|
||||||
else:
|
else:
|
||||||
encoding = "utf8"
|
encoding = "utf-8"
|
||||||
|
|
||||||
# Parse pax header information. A record looks like that:
|
# Parse pax header information. A record looks like that:
|
||||||
# "%d %s=%s\n" % (length, keyword, value). length is the size
|
# "%d %s=%s\n" % (length, keyword, value). length is the size
|
||||||
|
@ -1419,20 +1419,20 @@ class TarInfo(object):
|
||||||
length = int(length)
|
length = int(length)
|
||||||
value = buf[match.end(2) + 1:match.start(1) + length - 1]
|
value = buf[match.end(2) + 1:match.start(1) + length - 1]
|
||||||
|
|
||||||
# Normally, we could just use "utf8" as the encoding and "strict"
|
# Normally, we could just use "utf-8" as the encoding and "strict"
|
||||||
# as the error handler, but we better not take the risk. For
|
# as the error handler, but we better not take the risk. For
|
||||||
# example, GNU tar <= 1.23 is known to store filenames it cannot
|
# example, GNU tar <= 1.23 is known to store filenames it cannot
|
||||||
# translate to UTF-8 as raw strings (unfortunately without a
|
# translate to UTF-8 as raw strings (unfortunately without a
|
||||||
# hdrcharset=BINARY header).
|
# hdrcharset=BINARY header).
|
||||||
# We first try the strict standard encoding, and if that fails we
|
# We first try the strict standard encoding, and if that fails we
|
||||||
# fall back on the user's encoding and error handler.
|
# fall back on the user's encoding and error handler.
|
||||||
keyword = self._decode_pax_field(keyword, "utf8", "utf8",
|
keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
|
||||||
tarfile.errors)
|
tarfile.errors)
|
||||||
if keyword in PAX_NAME_FIELDS:
|
if keyword in PAX_NAME_FIELDS:
|
||||||
value = self._decode_pax_field(value, encoding, tarfile.encoding,
|
value = self._decode_pax_field(value, encoding, tarfile.encoding,
|
||||||
tarfile.errors)
|
tarfile.errors)
|
||||||
else:
|
else:
|
||||||
value = self._decode_pax_field(value, "utf8", "utf8",
|
value = self._decode_pax_field(value, "utf-8", "utf-8",
|
||||||
tarfile.errors)
|
tarfile.errors)
|
||||||
|
|
||||||
pax_headers[keyword] = value
|
pax_headers[keyword] = value
|
||||||
|
|
|
@ -4328,7 +4328,7 @@ class TestEncoding(TestCase):
|
||||||
def _test_module_encoding(self, path):
|
def _test_module_encoding(self, path):
|
||||||
path, _ = os.path.splitext(path)
|
path, _ = os.path.splitext(path)
|
||||||
path += ".py"
|
path += ".py"
|
||||||
with codecs.open(path, 'r', 'utf8') as f:
|
with codecs.open(path, 'r', 'utf-8') as f:
|
||||||
f.read()
|
f.read()
|
||||||
|
|
||||||
def test_argparse_module_encoding(self):
|
def test_argparse_module_encoding(self):
|
||||||
|
|
|
@ -707,7 +707,7 @@ class StrTest(unittest.TestCase, BaseStrTest):
|
||||||
class BytesTest(unittest.TestCase, BaseStrTest):
|
class BytesTest(unittest.TestCase, BaseStrTest):
|
||||||
|
|
||||||
def from_latin1(self, s):
|
def from_latin1(self, s):
|
||||||
return s.encode("latin1")
|
return s.encode("latin-1")
|
||||||
|
|
||||||
@bigmemtest(minsize=_2G + 2, memuse=1 + character_size)
|
@bigmemtest(minsize=_2G + 2, memuse=1 + character_size)
|
||||||
def test_decode(self, size):
|
def test_decode(self, size):
|
||||||
|
@ -718,7 +718,7 @@ class BytesTest(unittest.TestCase, BaseStrTest):
|
||||||
class BytearrayTest(unittest.TestCase, BaseStrTest):
|
class BytearrayTest(unittest.TestCase, BaseStrTest):
|
||||||
|
|
||||||
def from_latin1(self, s):
|
def from_latin1(self, s):
|
||||||
return bytearray(s.encode("latin1"))
|
return bytearray(s.encode("latin-1"))
|
||||||
|
|
||||||
@bigmemtest(minsize=_2G + 2, memuse=1 + character_size)
|
@bigmemtest(minsize=_2G + 2, memuse=1 + character_size)
|
||||||
def test_decode(self, size):
|
def test_decode(self, size):
|
||||||
|
|
|
@ -188,24 +188,26 @@ class BaseBytesTest(unittest.TestCase):
|
||||||
|
|
||||||
def test_encoding(self):
|
def test_encoding(self):
|
||||||
sample = "Hello world\n\u1234\u5678\u9abc"
|
sample = "Hello world\n\u1234\u5678\u9abc"
|
||||||
for enc in ("utf8", "utf16"):
|
for enc in ("utf-8", "utf-16"):
|
||||||
b = self.type2test(sample, enc)
|
b = self.type2test(sample, enc)
|
||||||
self.assertEqual(b, self.type2test(sample.encode(enc)))
|
self.assertEqual(b, self.type2test(sample.encode(enc)))
|
||||||
self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin1")
|
self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin-1")
|
||||||
b = self.type2test(sample, "latin1", "ignore")
|
b = self.type2test(sample, "latin-1", "ignore")
|
||||||
self.assertEqual(b, self.type2test(sample[:-3], "utf-8"))
|
self.assertEqual(b, self.type2test(sample[:-3], "utf-8"))
|
||||||
|
|
||||||
def test_decode(self):
|
def test_decode(self):
|
||||||
sample = "Hello world\n\u1234\u5678\u9abc\def0\def0"
|
sample = "Hello world\n\u1234\u5678\u9abc\def0\def0"
|
||||||
for enc in ("utf8", "utf16"):
|
for enc in ("utf-8", "utf-16"):
|
||||||
b = self.type2test(sample, enc)
|
b = self.type2test(sample, enc)
|
||||||
self.assertEqual(b.decode(enc), sample)
|
self.assertEqual(b.decode(enc), sample)
|
||||||
sample = "Hello world\n\x80\x81\xfe\xff"
|
sample = "Hello world\n\x80\x81\xfe\xff"
|
||||||
b = self.type2test(sample, "latin1")
|
b = self.type2test(sample, "latin-1")
|
||||||
self.assertRaises(UnicodeDecodeError, b.decode, "utf8")
|
self.assertRaises(UnicodeDecodeError, b.decode, "utf-8")
|
||||||
self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n")
|
self.assertEqual(b.decode("utf-8", "ignore"), "Hello world\n")
|
||||||
self.assertEqual(b.decode(errors="ignore", encoding="utf8"),
|
self.assertEqual(b.decode(errors="ignore", encoding="utf-8"),
|
||||||
"Hello world\n")
|
"Hello world\n")
|
||||||
|
# Default encoding is utf-8
|
||||||
|
self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603')
|
||||||
|
|
||||||
def test_from_int(self):
|
def test_from_int(self):
|
||||||
b = self.type2test(0)
|
b = self.type2test(0)
|
||||||
|
|
|
@ -151,7 +151,7 @@ class CmdLineTest(unittest.TestCase):
|
||||||
@unittest.skipUnless(sys.platform == 'darwin', 'test specific to Mac OS X')
|
@unittest.skipUnless(sys.platform == 'darwin', 'test specific to Mac OS X')
|
||||||
def test_osx_utf8(self):
|
def test_osx_utf8(self):
|
||||||
def check_output(text):
|
def check_output(text):
|
||||||
decoded = text.decode('utf8', 'surrogateescape')
|
decoded = text.decode('utf-8', 'surrogateescape')
|
||||||
expected = ascii(decoded).encode('ascii') + b'\n'
|
expected = ascii(decoded).encode('ascii') + b'\n'
|
||||||
|
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
|
@ -223,7 +223,7 @@ class CmdLineTest(unittest.TestCase):
|
||||||
self.assertIn(path2.encode('ascii'), out)
|
self.assertIn(path2.encode('ascii'), out)
|
||||||
|
|
||||||
def test_displayhook_unencodable(self):
|
def test_displayhook_unencodable(self):
|
||||||
for encoding in ('ascii', 'latin1', 'utf8'):
|
for encoding in ('ascii', 'latin-1', 'utf-8'):
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
env['PYTHONIOENCODING'] = encoding
|
env['PYTHONIOENCODING'] = encoding
|
||||||
p = subprocess.Popen(
|
p = subprocess.Popen(
|
||||||
|
|
|
@ -1250,7 +1250,7 @@ class EncodedFileTest(unittest.TestCase):
|
||||||
self.assertEqual(ef.read(), b'\\\xd5\n\x00\x00\xae')
|
self.assertEqual(ef.read(), b'\\\xd5\n\x00\x00\xae')
|
||||||
|
|
||||||
f = io.BytesIO()
|
f = io.BytesIO()
|
||||||
ef = codecs.EncodedFile(f, 'utf-8', 'latin1')
|
ef = codecs.EncodedFile(f, 'utf-8', 'latin-1')
|
||||||
ef.write(b'\xc3\xbc')
|
ef.write(b'\xc3\xbc')
|
||||||
self.assertEqual(f.getvalue(), b'\xfc')
|
self.assertEqual(f.getvalue(), b'\xfc')
|
||||||
|
|
||||||
|
@ -1611,7 +1611,7 @@ class SurrogateEscapeTest(unittest.TestCase):
|
||||||
|
|
||||||
def test_latin1(self):
|
def test_latin1(self):
|
||||||
# Issue6373
|
# Issue6373
|
||||||
self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin1", "surrogateescape"),
|
self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin-1", "surrogateescape"),
|
||||||
b"\xe4\xeb\xef\xf6\xfc")
|
b"\xe4\xeb\xef\xf6\xfc")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,7 @@ except ImportError:
|
||||||
|
|
||||||
def _default_chunk_size():
|
def _default_chunk_size():
|
||||||
"""Get the default TextIOWrapper chunk size"""
|
"""Get the default TextIOWrapper chunk size"""
|
||||||
with open(__file__, "r", encoding="latin1") as f:
|
with open(__file__, "r", encoding="latin-1") as f:
|
||||||
return f._CHUNK_SIZE
|
return f._CHUNK_SIZE
|
||||||
|
|
||||||
|
|
||||||
|
@ -1684,11 +1684,11 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||||
r = self.BytesIO(b"\xc3\xa9\n\n")
|
r = self.BytesIO(b"\xc3\xa9\n\n")
|
||||||
b = self.BufferedReader(r, 1000)
|
b = self.BufferedReader(r, 1000)
|
||||||
t = self.TextIOWrapper(b)
|
t = self.TextIOWrapper(b)
|
||||||
t.__init__(b, encoding="latin1", newline="\r\n")
|
t.__init__(b, encoding="latin-1", newline="\r\n")
|
||||||
self.assertEqual(t.encoding, "latin1")
|
self.assertEqual(t.encoding, "latin-1")
|
||||||
self.assertEqual(t.line_buffering, False)
|
self.assertEqual(t.line_buffering, False)
|
||||||
t.__init__(b, encoding="utf8", line_buffering=True)
|
t.__init__(b, encoding="utf-8", line_buffering=True)
|
||||||
self.assertEqual(t.encoding, "utf8")
|
self.assertEqual(t.encoding, "utf-8")
|
||||||
self.assertEqual(t.line_buffering, True)
|
self.assertEqual(t.line_buffering, True)
|
||||||
self.assertEqual("\xe9\n", t.readline())
|
self.assertEqual("\xe9\n", t.readline())
|
||||||
self.assertRaises(TypeError, t.__init__, b, newline=42)
|
self.assertRaises(TypeError, t.__init__, b, newline=42)
|
||||||
|
@ -1738,8 +1738,8 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||||
def test_encoding(self):
|
def test_encoding(self):
|
||||||
# Check the encoding attribute is always set, and valid
|
# Check the encoding attribute is always set, and valid
|
||||||
b = self.BytesIO()
|
b = self.BytesIO()
|
||||||
t = self.TextIOWrapper(b, encoding="utf8")
|
t = self.TextIOWrapper(b, encoding="utf-8")
|
||||||
self.assertEqual(t.encoding, "utf8")
|
self.assertEqual(t.encoding, "utf-8")
|
||||||
t = self.TextIOWrapper(b)
|
t = self.TextIOWrapper(b)
|
||||||
self.assertTrue(t.encoding is not None)
|
self.assertTrue(t.encoding is not None)
|
||||||
codecs.lookup(t.encoding)
|
codecs.lookup(t.encoding)
|
||||||
|
@ -1918,7 +1918,7 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||||
|
|
||||||
def test_basic_io(self):
|
def test_basic_io(self):
|
||||||
for chunksize in (1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65):
|
for chunksize in (1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65):
|
||||||
for enc in "ascii", "latin1", "utf8" :# , "utf-16-be", "utf-16-le":
|
for enc in "ascii", "latin-1", "utf-8" :# , "utf-16-be", "utf-16-le":
|
||||||
f = self.open(support.TESTFN, "w+", encoding=enc)
|
f = self.open(support.TESTFN, "w+", encoding=enc)
|
||||||
f._CHUNK_SIZE = chunksize
|
f._CHUNK_SIZE = chunksize
|
||||||
self.assertEqual(f.write("abc"), 3)
|
self.assertEqual(f.write("abc"), 3)
|
||||||
|
@ -1968,7 +1968,7 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||||
self.assertEqual(rlines, wlines)
|
self.assertEqual(rlines, wlines)
|
||||||
|
|
||||||
def test_telling(self):
|
def test_telling(self):
|
||||||
f = self.open(support.TESTFN, "w+", encoding="utf8")
|
f = self.open(support.TESTFN, "w+", encoding="utf-8")
|
||||||
p0 = f.tell()
|
p0 = f.tell()
|
||||||
f.write("\xff\n")
|
f.write("\xff\n")
|
||||||
p1 = f.tell()
|
p1 = f.tell()
|
||||||
|
|
|
@ -95,14 +95,14 @@ class TestMailbox(TestBase):
|
||||||
""")
|
""")
|
||||||
|
|
||||||
def test_add_invalid_8bit_bytes_header(self):
|
def test_add_invalid_8bit_bytes_header(self):
|
||||||
key = self._box.add(self._nonascii_msg.encode('latin1'))
|
key = self._box.add(self._nonascii_msg.encode('latin-1'))
|
||||||
self.assertEqual(len(self._box), 1)
|
self.assertEqual(len(self._box), 1)
|
||||||
self.assertEqual(self._box.get_bytes(key),
|
self.assertEqual(self._box.get_bytes(key),
|
||||||
self._nonascii_msg.encode('latin1'))
|
self._nonascii_msg.encode('latin-1'))
|
||||||
|
|
||||||
def test_invalid_nonascii_header_as_string(self):
|
def test_invalid_nonascii_header_as_string(self):
|
||||||
subj = self._nonascii_msg.splitlines()[1]
|
subj = self._nonascii_msg.splitlines()[1]
|
||||||
key = self._box.add(subj.encode('latin1'))
|
key = self._box.add(subj.encode('latin-1'))
|
||||||
self.assertEqual(self._box.get_string(key),
|
self.assertEqual(self._box.get_string(key),
|
||||||
'Subject: =?unknown-8bit?b?RmFsaW5hcHThciBo4Xpob3pzeuFsbO104XNz'
|
'Subject: =?unknown-8bit?b?RmFsaW5hcHThciBo4Xpob3pzeuFsbO104XNz'
|
||||||
'YWwuIE3hciByZW5kZWx06Ww/?=\n\n')
|
'YWwuIE3hciByZW5kZWx06Ww/?=\n\n')
|
||||||
|
|
|
@ -813,7 +813,7 @@ class NNTPv1v2TestsMixin:
|
||||||
|
|
||||||
def _check_article_body(self, lines):
|
def _check_article_body(self, lines):
|
||||||
self.assertEqual(len(lines), 4)
|
self.assertEqual(len(lines), 4)
|
||||||
self.assertEqual(lines[-1].decode('utf8'), "-- Signed by André.")
|
self.assertEqual(lines[-1].decode('utf-8'), "-- Signed by André.")
|
||||||
self.assertEqual(lines[-2], b"")
|
self.assertEqual(lines[-2], b"")
|
||||||
self.assertEqual(lines[-3], b".Here is a dot-starting line.")
|
self.assertEqual(lines[-3], b".Here is a dot-starting line.")
|
||||||
self.assertEqual(lines[-4], b"This is just a test article.")
|
self.assertEqual(lines[-4], b"This is just a test article.")
|
||||||
|
|
|
@ -19,8 +19,8 @@ class PEP3120Test(unittest.TestCase):
|
||||||
try:
|
try:
|
||||||
import test.badsyntax_pep3120
|
import test.badsyntax_pep3120
|
||||||
except SyntaxError as msg:
|
except SyntaxError as msg:
|
||||||
msg = str(msg)
|
msg = str(msg).lower()
|
||||||
self.assertTrue('UTF-8' in msg or 'utf8' in msg)
|
self.assertTrue('utf-8' in msg or 'utf8' in msg)
|
||||||
else:
|
else:
|
||||||
self.fail("expected exception didn't occur")
|
self.fail("expected exception didn't occur")
|
||||||
|
|
||||||
|
|
|
@ -20,8 +20,8 @@ import unittest
|
||||||
TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
|
TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
|
||||||
TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
|
TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
|
||||||
try:
|
try:
|
||||||
TEST_XMLFILE.encode("utf8")
|
TEST_XMLFILE.encode("utf-8")
|
||||||
TEST_XMLFILE_OUT.encode("utf8")
|
TEST_XMLFILE_OUT.encode("utf-8")
|
||||||
except UnicodeEncodeError:
|
except UnicodeEncodeError:
|
||||||
raise unittest.SkipTest("filename is not encodable to utf8")
|
raise unittest.SkipTest("filename is not encodable to utf8")
|
||||||
|
|
||||||
|
|
|
@ -129,8 +129,8 @@ class TestCase(unittest.TestCase):
|
||||||
shelve.Shelf(d)[key] = [1]
|
shelve.Shelf(d)[key] = [1]
|
||||||
self.assertIn(key.encode('utf-8'), d)
|
self.assertIn(key.encode('utf-8'), d)
|
||||||
# but a different one can be given
|
# but a different one can be given
|
||||||
shelve.Shelf(d, keyencoding='latin1')[key] = [1]
|
shelve.Shelf(d, keyencoding='latin-1')[key] = [1]
|
||||||
self.assertIn(key.encode('latin1'), d)
|
self.assertIn(key.encode('latin-1'), d)
|
||||||
# with all consequences
|
# with all consequences
|
||||||
s = shelve.Shelf(d, keyencoding='ascii')
|
s = shelve.Shelf(d, keyencoding='ascii')
|
||||||
self.assertRaises(UnicodeEncodeError, s.__setitem__, key, [1])
|
self.assertRaises(UnicodeEncodeError, s.__setitem__, key, [1])
|
||||||
|
|
|
@ -44,7 +44,7 @@ def linux_version():
|
||||||
return 0, 0, 0
|
return 0, 0, 0
|
||||||
|
|
||||||
HOST = support.HOST
|
HOST = support.HOST
|
||||||
MSG = 'Michael Gilfix was here\u1234\r\n'.encode('utf8') ## test unicode string and carriage return
|
MSG = 'Michael Gilfix was here\u1234\r\n'.encode('utf-8') ## test unicode string and carriage return
|
||||||
SUPPORTS_IPV6 = socket.has_ipv6 and try_address('::1', family=socket.AF_INET6)
|
SUPPORTS_IPV6 = socket.has_ipv6 and try_address('::1', family=socket.AF_INET6)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -1065,7 +1065,7 @@ class FileObjectClassTestCase(SocketConnectedTest):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
bufsize = -1 # Use default buffer size
|
bufsize = -1 # Use default buffer size
|
||||||
encoding = 'utf8'
|
encoding = 'utf-8'
|
||||||
errors = 'strict'
|
errors = 'strict'
|
||||||
newline = None
|
newline = None
|
||||||
|
|
||||||
|
@ -1269,7 +1269,7 @@ class FileObjectInterruptedTestCase(unittest.TestCase):
|
||||||
data = b''
|
data = b''
|
||||||
else:
|
else:
|
||||||
data = ''
|
data = ''
|
||||||
expecting = expecting.decode('utf8')
|
expecting = expecting.decode('utf-8')
|
||||||
while len(data) != len(expecting):
|
while len(data) != len(expecting):
|
||||||
part = fo.read(size)
|
part = fo.read(size)
|
||||||
if not part:
|
if not part:
|
||||||
|
@ -1427,7 +1427,7 @@ class UnicodeReadFileObjectClassTestCase(FileObjectClassTestCase):
|
||||||
"""Tests for socket.makefile() in text mode (rather than binary)"""
|
"""Tests for socket.makefile() in text mode (rather than binary)"""
|
||||||
|
|
||||||
read_mode = 'r'
|
read_mode = 'r'
|
||||||
read_msg = MSG.decode('utf8')
|
read_msg = MSG.decode('utf-8')
|
||||||
write_mode = 'wb'
|
write_mode = 'wb'
|
||||||
write_msg = MSG
|
write_msg = MSG
|
||||||
newline = ''
|
newline = ''
|
||||||
|
@ -1439,7 +1439,7 @@ class UnicodeWriteFileObjectClassTestCase(FileObjectClassTestCase):
|
||||||
read_mode = 'rb'
|
read_mode = 'rb'
|
||||||
read_msg = MSG
|
read_msg = MSG
|
||||||
write_mode = 'w'
|
write_mode = 'w'
|
||||||
write_msg = MSG.decode('utf8')
|
write_msg = MSG.decode('utf-8')
|
||||||
newline = ''
|
newline = ''
|
||||||
|
|
||||||
|
|
||||||
|
@ -1447,9 +1447,9 @@ class UnicodeReadWriteFileObjectClassTestCase(FileObjectClassTestCase):
|
||||||
"""Tests for socket.makefile() in text mode (rather than binary)"""
|
"""Tests for socket.makefile() in text mode (rather than binary)"""
|
||||||
|
|
||||||
read_mode = 'r'
|
read_mode = 'r'
|
||||||
read_msg = MSG.decode('utf8')
|
read_msg = MSG.decode('utf-8')
|
||||||
write_mode = 'w'
|
write_mode = 'w'
|
||||||
write_msg = MSG.decode('utf8')
|
write_msg = MSG.decode('utf-8')
|
||||||
newline = ''
|
newline = ''
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -130,7 +130,7 @@ class TestLiterals(unittest.TestCase):
|
||||||
self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
|
self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
|
||||||
|
|
||||||
def test_file_utf8(self):
|
def test_file_utf8(self):
|
||||||
self.check_encoding("utf8")
|
self.check_encoding("utf-8")
|
||||||
|
|
||||||
def test_file_iso_8859_1(self):
|
def test_file_iso_8859_1(self):
|
||||||
self.check_encoding("iso-8859-1")
|
self.check_encoding("iso-8859-1")
|
||||||
|
|
|
@ -1191,7 +1191,7 @@ class POSIXProcessTestCase(BaseTestCase):
|
||||||
stdout, stderr = p.communicate()
|
stdout, stderr = p.communicate()
|
||||||
self.assertEqual(0, p.returncode, "sigchild_ignore.py exited"
|
self.assertEqual(0, p.returncode, "sigchild_ignore.py exited"
|
||||||
" non-zero with this error:\n%s" %
|
" non-zero with this error:\n%s" %
|
||||||
stderr.decode('utf8'))
|
stderr.decode('utf-8'))
|
||||||
|
|
||||||
|
|
||||||
@unittest.skipUnless(mswindows, "Windows specific tests")
|
@unittest.skipUnless(mswindows, "Windows specific tests")
|
||||||
|
|
|
@ -1289,7 +1289,7 @@ class UstarUnicodeTest(unittest.TestCase):
|
||||||
self._test_unicode_filename("utf7")
|
self._test_unicode_filename("utf7")
|
||||||
|
|
||||||
def test_utf8_filename(self):
|
def test_utf8_filename(self):
|
||||||
self._test_unicode_filename("utf8")
|
self._test_unicode_filename("utf-8")
|
||||||
|
|
||||||
def _test_unicode_filename(self, encoding):
|
def _test_unicode_filename(self, encoding):
|
||||||
tar = tarfile.open(tmpname, "w", format=self.format, encoding=encoding, errors="strict")
|
tar = tarfile.open(tmpname, "w", format=self.format, encoding=encoding, errors="strict")
|
||||||
|
@ -1368,7 +1368,7 @@ class GNUUnicodeTest(UstarUnicodeTest):
|
||||||
def test_bad_pax_header(self):
|
def test_bad_pax_header(self):
|
||||||
# Test for issue #8633. GNU tar <= 1.23 creates raw binary fields
|
# Test for issue #8633. GNU tar <= 1.23 creates raw binary fields
|
||||||
# without a hdrcharset=BINARY header.
|
# without a hdrcharset=BINARY header.
|
||||||
for encoding, name in (("utf8", "pax/bad-pax-\udce4\udcf6\udcfc"),
|
for encoding, name in (("utf-8", "pax/bad-pax-\udce4\udcf6\udcfc"),
|
||||||
("iso8859-1", "pax/bad-pax-\xe4\xf6\xfc"),):
|
("iso8859-1", "pax/bad-pax-\xe4\xf6\xfc"),):
|
||||||
with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar:
|
with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar:
|
||||||
try:
|
try:
|
||||||
|
@ -1383,7 +1383,7 @@ class PAXUnicodeTest(UstarUnicodeTest):
|
||||||
|
|
||||||
def test_binary_header(self):
|
def test_binary_header(self):
|
||||||
# Test a POSIX.1-2008 compatible header with a hdrcharset=BINARY field.
|
# Test a POSIX.1-2008 compatible header with a hdrcharset=BINARY field.
|
||||||
for encoding, name in (("utf8", "pax/hdrcharset-\udce4\udcf6\udcfc"),
|
for encoding, name in (("utf-8", "pax/hdrcharset-\udce4\udcf6\udcfc"),
|
||||||
("iso8859-1", "pax/hdrcharset-\xe4\xf6\xfc"),):
|
("iso8859-1", "pax/hdrcharset-\xe4\xf6\xfc"),):
|
||||||
with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar:
|
with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar:
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -1182,11 +1182,14 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
self.assertEqual('hello'.encode('ascii'), b'hello')
|
self.assertEqual('hello'.encode('ascii'), b'hello')
|
||||||
self.assertEqual('hello'.encode('utf-7'), b'hello')
|
self.assertEqual('hello'.encode('utf-7'), b'hello')
|
||||||
self.assertEqual('hello'.encode('utf-8'), b'hello')
|
self.assertEqual('hello'.encode('utf-8'), b'hello')
|
||||||
self.assertEqual('hello'.encode('utf8'), b'hello')
|
self.assertEqual('hello'.encode('utf-8'), b'hello')
|
||||||
self.assertEqual('hello'.encode('utf-16-le'), b'h\000e\000l\000l\000o\000')
|
self.assertEqual('hello'.encode('utf-16-le'), b'h\000e\000l\000l\000o\000')
|
||||||
self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o')
|
self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o')
|
||||||
self.assertEqual('hello'.encode('latin-1'), b'hello')
|
self.assertEqual('hello'.encode('latin-1'), b'hello')
|
||||||
|
|
||||||
|
# Default encoding is utf-8
|
||||||
|
self.assertEqual('\u2603'.encode(), b'\xe2\x98\x83')
|
||||||
|
|
||||||
# Roundtrip safety for BMP (just the first 1024 chars)
|
# Roundtrip safety for BMP (just the first 1024 chars)
|
||||||
for c in range(1024):
|
for c in range(1024):
|
||||||
u = chr(c)
|
u = chr(c)
|
||||||
|
|
|
@ -251,7 +251,7 @@ class urlretrieve_FileTests(unittest.TestCase):
|
||||||
def constructLocalFileUrl(self, filePath):
|
def constructLocalFileUrl(self, filePath):
|
||||||
filePath = os.path.abspath(filePath)
|
filePath = os.path.abspath(filePath)
|
||||||
try:
|
try:
|
||||||
filePath.encode("utf8")
|
filePath.encode("utf-8")
|
||||||
except UnicodeEncodeError:
|
except UnicodeEncodeError:
|
||||||
raise unittest.SkipTest("filePath is not encodable to utf8")
|
raise unittest.SkipTest("filePath is not encodable to utf8")
|
||||||
return "file://%s" % urllib.request.pathname2url(filePath)
|
return "file://%s" % urllib.request.pathname2url(filePath)
|
||||||
|
|
|
@ -599,7 +599,7 @@ class OpenerDirectorTests(unittest.TestCase):
|
||||||
|
|
||||||
def sanepathname2url(path):
|
def sanepathname2url(path):
|
||||||
try:
|
try:
|
||||||
path.encode("utf8")
|
path.encode("utf-8")
|
||||||
except UnicodeEncodeError:
|
except UnicodeEncodeError:
|
||||||
raise unittest.SkipTest("path is not encodable to utf8")
|
raise unittest.SkipTest("path is not encodable to utf8")
|
||||||
urlpath = urllib.request.pathname2url(path)
|
urlpath = urllib.request.pathname2url(path)
|
||||||
|
|
|
@ -471,14 +471,14 @@ class TestUUID(TestCase):
|
||||||
if pid == 0:
|
if pid == 0:
|
||||||
os.close(fds[0])
|
os.close(fds[0])
|
||||||
value = uuid.uuid4()
|
value = uuid.uuid4()
|
||||||
os.write(fds[1], value.hex.encode('latin1'))
|
os.write(fds[1], value.hex.encode('latin-1'))
|
||||||
os._exit(0)
|
os._exit(0)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
os.close(fds[1])
|
os.close(fds[1])
|
||||||
parent_value = uuid.uuid4().hex
|
parent_value = uuid.uuid4().hex
|
||||||
os.waitpid(pid, 0)
|
os.waitpid(pid, 0)
|
||||||
child_value = os.read(fds[0], 100).decode('latin1')
|
child_value = os.read(fds[0], 100).decode('latin-1')
|
||||||
|
|
||||||
self.assertNotEqual(parent_value, child_value)
|
self.assertNotEqual(parent_value, child_value)
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ from xml.etree import ElementTree as ET
|
||||||
|
|
||||||
SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
|
SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
|
||||||
try:
|
try:
|
||||||
SIMPLE_XMLFILE.encode("utf8")
|
SIMPLE_XMLFILE.encode("utf-8")
|
||||||
except UnicodeEncodeError:
|
except UnicodeEncodeError:
|
||||||
raise unittest.SkipTest("filename is not encodable to utf8")
|
raise unittest.SkipTest("filename is not encodable to utf8")
|
||||||
SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
|
SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
|
||||||
|
@ -1255,8 +1255,8 @@ def processinginstruction():
|
||||||
|
|
||||||
>>> ET.tostring(ET.PI('test', '<testing&>'))
|
>>> ET.tostring(ET.PI('test', '<testing&>'))
|
||||||
b'<?test <testing&>?>'
|
b'<?test <testing&>?>'
|
||||||
>>> ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin1')
|
>>> ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1')
|
||||||
b"<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>"
|
b"<?xml version='1.0' encoding='latin-1'?>\\n<?test <testing&>\\xe3?>"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|
|
@ -1846,7 +1846,7 @@ class URLopener:
|
||||||
if encoding == 'base64':
|
if encoding == 'base64':
|
||||||
import base64
|
import base64
|
||||||
# XXX is this encoding/decoding ok?
|
# XXX is this encoding/decoding ok?
|
||||||
data = base64.decodebytes(data.encode('ascii')).decode('latin1')
|
data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
|
||||||
else:
|
else:
|
||||||
data = unquote(data)
|
data = unquote(data)
|
||||||
msg.append('Content-Length: %d' % len(data))
|
msg.append('Content-Length: %d' % len(data))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue