mirror of
https://github.com/python/cpython.git
synced 2025-08-03 00:23:06 +00:00
Added errors argument to TarFile class that allows the user to
specify an error handling scheme for character conversion. Additional scheme "utf-8" in read mode. Unicode input filenames are now supported by design. The values of the pax_headers dictionary are now limited to unicode objects. Fixed: The prefix field is no longer used in PAX_FORMAT (in conformance with POSIX). Fixed: In read mode use a possible pax header size field. Fixed: Strip trailing slashes from pax header name values. Fixed: Give values in user-specified pax_headers precedence when writing. Added unicode tests. Added pax/regtype4 member to testtar.tar all possible number fields in a pax header. Added two chapters to the documentation about the different formats tarfile.py supports and how unicode issues are handled.
This commit is contained in:
parent
0ac601995c
commit
a0fcb9384e
5 changed files with 453 additions and 197 deletions
|
@ -1,4 +1,4 @@
|
|||
# encoding: iso8859-1
|
||||
# -*- coding: iso-8859-15 -*-
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
@ -372,9 +372,9 @@ class LongnameTest(ReadTest):
|
|||
|
||||
def test_read_longname(self):
|
||||
# Test reading of longname (bug #1471427).
|
||||
name = self.subdir + "/" + "123/" * 125 + "longname"
|
||||
longname = self.subdir + "/" + "123/" * 125 + "longname"
|
||||
try:
|
||||
tarinfo = self.tar.getmember(name)
|
||||
tarinfo = self.tar.getmember(longname)
|
||||
except KeyError:
|
||||
self.fail("longname not found")
|
||||
self.assert_(tarinfo.type != tarfile.DIRTYPE, "read longname as dirtype")
|
||||
|
@ -393,13 +393,24 @@ class LongnameTest(ReadTest):
|
|||
tarinfo = self.tar.getmember(longname)
|
||||
offset = tarinfo.offset
|
||||
self.tar.fileobj.seek(offset)
|
||||
fobj = StringIO.StringIO(self.tar.fileobj.read(1536))
|
||||
fobj = StringIO.StringIO(self.tar.fileobj.read(3 * 512))
|
||||
self.assertRaises(tarfile.ReadError, tarfile.open, name="foo.tar", fileobj=fobj)
|
||||
|
||||
def test_header_offset(self):
|
||||
# Test if the start offset of the TarInfo object includes
|
||||
# the preceding extended header.
|
||||
longname = self.subdir + "/" + "123/" * 125 + "longname"
|
||||
offset = self.tar.getmember(longname).offset
|
||||
fobj = open(tarname)
|
||||
fobj.seek(offset)
|
||||
tarinfo = tarfile.TarInfo.frombuf(fobj.read(512))
|
||||
self.assertEqual(tarinfo.type, self.longnametype)
|
||||
|
||||
|
||||
class GNUReadTest(LongnameTest):
|
||||
|
||||
subdir = "gnu"
|
||||
longnametype = tarfile.GNUTYPE_LONGNAME
|
||||
|
||||
def test_sparse_file(self):
|
||||
tarinfo1 = self.tar.getmember("ustar/sparse")
|
||||
|
@ -410,26 +421,40 @@ class GNUReadTest(LongnameTest):
|
|||
"sparse file extraction failed")
|
||||
|
||||
|
||||
class PaxReadTest(ReadTest):
|
||||
class PaxReadTest(LongnameTest):
|
||||
|
||||
subdir = "pax"
|
||||
longnametype = tarfile.XHDTYPE
|
||||
|
||||
def test_pax_globheaders(self):
|
||||
def test_pax_global_headers(self):
|
||||
tar = tarfile.open(tarname, encoding="iso8859-1")
|
||||
|
||||
tarinfo = tar.getmember("pax/regtype1")
|
||||
self.assertEqual(tarinfo.uname, "foo")
|
||||
self.assertEqual(tarinfo.gname, "bar")
|
||||
self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "ÄÖÜäöüß")
|
||||
self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß")
|
||||
|
||||
tarinfo = tar.getmember("pax/regtype2")
|
||||
self.assertEqual(tarinfo.uname, "")
|
||||
self.assertEqual(tarinfo.gname, "bar")
|
||||
self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "ÄÖÜäöüß")
|
||||
self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß")
|
||||
|
||||
tarinfo = tar.getmember("pax/regtype3")
|
||||
self.assertEqual(tarinfo.uname, "tarfile")
|
||||
self.assertEqual(tarinfo.gname, "tarfile")
|
||||
self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "ÄÖÜäöüß")
|
||||
self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß")
|
||||
|
||||
def test_pax_number_fields(self):
|
||||
# All following number fields are read from the pax header.
|
||||
tar = tarfile.open(tarname, encoding="iso8859-1")
|
||||
tarinfo = tar.getmember("pax/regtype4")
|
||||
self.assertEqual(tarinfo.size, 7011)
|
||||
self.assertEqual(tarinfo.uid, 123)
|
||||
self.assertEqual(tarinfo.gid, 123)
|
||||
self.assertEqual(tarinfo.mtime, 1041808783.0)
|
||||
self.assertEqual(type(tarinfo.mtime), float)
|
||||
self.assertEqual(float(tarinfo.pax_headers["atime"]), 1041808783.0)
|
||||
self.assertEqual(float(tarinfo.pax_headers["ctime"]), 1041808783.0)
|
||||
|
||||
|
||||
class WriteTest(unittest.TestCase):
|
||||
|
@ -700,68 +725,161 @@ class PaxWriteTest(GNUWriteTest):
|
|||
n = tar.getmembers()[0].name
|
||||
self.assert_(name == n, "PAX longname creation failed")
|
||||
|
||||
def test_iso8859_15_filename(self):
|
||||
self._test_unicode_filename("iso8859-15")
|
||||
def test_pax_global_header(self):
|
||||
pax_headers = {
|
||||
u"foo": u"bar",
|
||||
u"uid": u"0",
|
||||
u"mtime": u"1.23",
|
||||
u"test": u"äöü",
|
||||
u"äöü": u"test"}
|
||||
|
||||
tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, \
|
||||
pax_headers=pax_headers)
|
||||
tar.addfile(tarfile.TarInfo("test"))
|
||||
tar.close()
|
||||
|
||||
# Test if the global header was written correctly.
|
||||
tar = tarfile.open(tmpname, encoding="iso8859-1")
|
||||
self.assertEqual(tar.pax_headers, pax_headers)
|
||||
self.assertEqual(tar.getmembers()[0].pax_headers, pax_headers)
|
||||
|
||||
# Test if all the fields are unicode.
|
||||
for key, val in tar.pax_headers.iteritems():
|
||||
self.assert_(type(key) is unicode)
|
||||
self.assert_(type(val) is unicode)
|
||||
if key in tarfile.PAX_NUMBER_FIELDS:
|
||||
try:
|
||||
tarfile.PAX_NUMBER_FIELDS[key](val)
|
||||
except (TypeError, ValueError):
|
||||
self.fail("unable to convert pax header field")
|
||||
|
||||
def test_pax_extended_header(self):
|
||||
# The fields from the pax header have priority over the
|
||||
# TarInfo.
|
||||
pax_headers = {u"path": u"foo", u"uid": u"123"}
|
||||
|
||||
tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="iso8859-1")
|
||||
t = tarfile.TarInfo()
|
||||
t.name = u"äöü" # non-ASCII
|
||||
t.uid = 8**8 # too large
|
||||
t.pax_headers = pax_headers
|
||||
tar.addfile(t)
|
||||
tar.close()
|
||||
|
||||
tar = tarfile.open(tmpname, encoding="iso8859-1")
|
||||
t = tar.getmembers()[0]
|
||||
self.assertEqual(t.pax_headers, pax_headers)
|
||||
self.assertEqual(t.name, "foo")
|
||||
self.assertEqual(t.uid, 123)
|
||||
|
||||
|
||||
class UstarUnicodeTest(unittest.TestCase):
|
||||
# All *UnicodeTests FIXME
|
||||
|
||||
format = tarfile.USTAR_FORMAT
|
||||
|
||||
def test_iso8859_1_filename(self):
|
||||
self._test_unicode_filename("iso8859-1")
|
||||
|
||||
def test_utf7_filename(self):
|
||||
self._test_unicode_filename("utf7")
|
||||
|
||||
def test_utf8_filename(self):
|
||||
self._test_unicode_filename("utf8")
|
||||
|
||||
def test_utf16_filename(self):
|
||||
self._test_unicode_filename("utf16")
|
||||
|
||||
def _test_unicode_filename(self, encoding):
|
||||
tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT)
|
||||
name = u"\u20ac".encode(encoding) # Euro sign
|
||||
tar.encoding = encoding
|
||||
tar = tarfile.open(tmpname, "w", format=self.format, encoding=encoding, errors="strict")
|
||||
name = u"äöü"
|
||||
tar.addfile(tarfile.TarInfo(name))
|
||||
tar.close()
|
||||
|
||||
tar = tarfile.open(tmpname, encoding=encoding)
|
||||
self.assertEqual(tar.getmembers()[0].name, name)
|
||||
self.assert_(type(tar.getnames()[0]) is not unicode)
|
||||
self.assertEqual(tar.getmembers()[0].name, name.encode(encoding))
|
||||
tar.close()
|
||||
|
||||
def test_unicode_filename_error(self):
|
||||
# The euro sign filename cannot be translated to iso8859-1 encoding.
|
||||
tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="utf8")
|
||||
name = u"\u20ac".encode("utf8") # Euro sign
|
||||
tar.addfile(tarfile.TarInfo(name))
|
||||
tar = tarfile.open(tmpname, "w", format=self.format, encoding="ascii", errors="strict")
|
||||
tarinfo = tarfile.TarInfo()
|
||||
|
||||
tarinfo.name = "äöü"
|
||||
if self.format == tarfile.PAX_FORMAT:
|
||||
self.assertRaises(UnicodeError, tar.addfile, tarinfo)
|
||||
else:
|
||||
tar.addfile(tarinfo)
|
||||
|
||||
tarinfo.name = u"äöü"
|
||||
self.assertRaises(UnicodeError, tar.addfile, tarinfo)
|
||||
|
||||
tarinfo.name = "foo"
|
||||
tarinfo.uname = u"äöü"
|
||||
self.assertRaises(UnicodeError, tar.addfile, tarinfo)
|
||||
|
||||
def test_unicode_argument(self):
|
||||
tar = tarfile.open(tarname, "r", encoding="iso8859-1", errors="strict")
|
||||
for t in tar:
|
||||
self.assert_(type(t.name) is str)
|
||||
self.assert_(type(t.linkname) is str)
|
||||
self.assert_(type(t.uname) is str)
|
||||
self.assert_(type(t.gname) is str)
|
||||
tar.close()
|
||||
|
||||
self.assertRaises(UnicodeError, tarfile.open, tmpname, encoding="iso8859-1")
|
||||
def test_uname_unicode(self):
|
||||
for name in (u"äöü", "äöü"):
|
||||
t = tarfile.TarInfo("foo")
|
||||
t.uname = name
|
||||
t.gname = name
|
||||
|
||||
def test_pax_headers(self):
|
||||
self._test_pax_headers({"foo": "bar", "uid": 0, "mtime": 1.23})
|
||||
fobj = StringIO.StringIO()
|
||||
tar = tarfile.open("foo.tar", mode="w", fileobj=fobj, format=self.format, encoding="iso8859-1")
|
||||
tar.addfile(t)
|
||||
tar.close()
|
||||
fobj.seek(0)
|
||||
|
||||
self._test_pax_headers({"euro": u"\u20ac".encode("utf8")})
|
||||
tar = tarfile.open("foo.tar", fileobj=fobj, encoding="iso8859-1")
|
||||
t = tar.getmember("foo")
|
||||
self.assertEqual(t.uname, "äöü")
|
||||
self.assertEqual(t.gname, "äöü")
|
||||
|
||||
self._test_pax_headers({"euro": u"\u20ac"},
|
||||
{"euro": u"\u20ac".encode("utf8")})
|
||||
|
||||
self._test_pax_headers({u"\u20ac": "euro"},
|
||||
{u"\u20ac".encode("utf8"): "euro"})
|
||||
class GNUUnicodeTest(UstarUnicodeTest):
|
||||
|
||||
def _test_pax_headers(self, pax_headers, cmp_headers=None):
|
||||
if cmp_headers is None:
|
||||
cmp_headers = pax_headers
|
||||
format = tarfile.GNU_FORMAT
|
||||
|
||||
tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, \
|
||||
pax_headers=pax_headers, encoding="utf8")
|
||||
tar.addfile(tarfile.TarInfo("test"))
|
||||
|
||||
class PaxUnicodeTest(UstarUnicodeTest):
|
||||
|
||||
format = tarfile.PAX_FORMAT
|
||||
|
||||
def _create_unicode_name(self, name):
|
||||
tar = tarfile.open(tmpname, "w", format=self.format)
|
||||
t = tarfile.TarInfo()
|
||||
t.pax_headers["path"] = name
|
||||
tar.addfile(t)
|
||||
tar.close()
|
||||
|
||||
tar = tarfile.open(tmpname, encoding="utf8")
|
||||
self.assertEqual(tar.pax_headers, cmp_headers)
|
||||
def test_error_handlers(self):
|
||||
# Test if the unicode error handlers work correctly for characters
|
||||
# that cannot be expressed in a given encoding.
|
||||
self._create_unicode_name(u"äöü")
|
||||
|
||||
def test_truncated_header(self):
|
||||
tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT)
|
||||
tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
|
||||
tar.addfile(tarinfo)
|
||||
tar.close()
|
||||
for handler, name in (("utf-8", u"äöü".encode("utf8")),
|
||||
("replace", "???"), ("ignore", "")):
|
||||
tar = tarfile.open(tmpname, format=self.format, encoding="ascii",
|
||||
errors=handler)
|
||||
self.assertEqual(tar.getnames()[0], name)
|
||||
|
||||
# Simulate a premature EOF.
|
||||
open(tmpname, "rb+").truncate(1536)
|
||||
tar = tarfile.open(tmpname)
|
||||
self.assertEqual(tar.getmembers(), [])
|
||||
self.assertRaises(UnicodeError, tarfile.open, tmpname,
|
||||
encoding="ascii", errors="strict")
|
||||
|
||||
def test_error_handler_utf8(self):
|
||||
# Create a pathname that has one component representable using
|
||||
# iso8859-1 and the other only in iso8859-15.
|
||||
self._create_unicode_name(u"äöü/¤")
|
||||
|
||||
tar = tarfile.open(tmpname, format=self.format, encoding="iso8859-1",
|
||||
errors="utf-8")
|
||||
self.assertEqual(tar.getnames()[0], "äöü/" + u"¤".encode("utf8"))
|
||||
|
||||
|
||||
class AppendTest(unittest.TestCase):
|
||||
|
@ -836,63 +954,58 @@ class LimitsTest(unittest.TestCase):
|
|||
def test_ustar_limits(self):
|
||||
# 100 char name
|
||||
tarinfo = tarfile.TarInfo("0123456789" * 10)
|
||||
tarinfo.create_ustar_header()
|
||||
tarinfo.tobuf(tarfile.USTAR_FORMAT)
|
||||
|
||||
# 101 char name that cannot be stored
|
||||
tarinfo = tarfile.TarInfo("0123456789" * 10 + "0")
|
||||
self.assertRaises(ValueError, tarinfo.create_ustar_header)
|
||||
self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
|
||||
|
||||
# 256 char name with a slash at pos 156
|
||||
tarinfo = tarfile.TarInfo("123/" * 62 + "longname")
|
||||
tarinfo.create_ustar_header()
|
||||
tarinfo.tobuf(tarfile.USTAR_FORMAT)
|
||||
|
||||
# 256 char name that cannot be stored
|
||||
tarinfo = tarfile.TarInfo("1234567/" * 31 + "longname")
|
||||
self.assertRaises(ValueError, tarinfo.create_ustar_header)
|
||||
self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
|
||||
|
||||
# 512 char name
|
||||
tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
|
||||
self.assertRaises(ValueError, tarinfo.create_ustar_header)
|
||||
self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
|
||||
|
||||
# 512 char linkname
|
||||
tarinfo = tarfile.TarInfo("longlink")
|
||||
tarinfo.linkname = "123/" * 126 + "longname"
|
||||
self.assertRaises(ValueError, tarinfo.create_ustar_header)
|
||||
self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
|
||||
|
||||
# uid > 8 digits
|
||||
tarinfo = tarfile.TarInfo("name")
|
||||
tarinfo.uid = 010000000
|
||||
self.assertRaises(ValueError, tarinfo.create_ustar_header)
|
||||
self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
|
||||
|
||||
def test_gnu_limits(self):
|
||||
tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
|
||||
tarinfo.create_gnu_header()
|
||||
tarinfo.tobuf(tarfile.GNU_FORMAT)
|
||||
|
||||
tarinfo = tarfile.TarInfo("longlink")
|
||||
tarinfo.linkname = "123/" * 126 + "longname"
|
||||
tarinfo.create_gnu_header()
|
||||
tarinfo.tobuf(tarfile.GNU_FORMAT)
|
||||
|
||||
# uid >= 256 ** 7
|
||||
tarinfo = tarfile.TarInfo("name")
|
||||
tarinfo.uid = 04000000000000000000L
|
||||
self.assertRaises(ValueError, tarinfo.create_gnu_header)
|
||||
self.assertRaises(ValueError, tarinfo.tobuf, tarfile.GNU_FORMAT)
|
||||
|
||||
def test_pax_limits(self):
|
||||
# A 256 char name that can be stored without an extended header.
|
||||
tarinfo = tarfile.TarInfo("123/" * 62 + "longname")
|
||||
self.assert_(len(tarinfo.create_pax_header("utf8")) == 512,
|
||||
"create_pax_header attached superfluous extended header")
|
||||
|
||||
tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
|
||||
tarinfo.create_pax_header("utf8")
|
||||
tarinfo.tobuf(tarfile.PAX_FORMAT)
|
||||
|
||||
tarinfo = tarfile.TarInfo("longlink")
|
||||
tarinfo.linkname = "123/" * 126 + "longname"
|
||||
tarinfo.create_pax_header("utf8")
|
||||
tarinfo.tobuf(tarfile.PAX_FORMAT)
|
||||
|
||||
tarinfo = tarfile.TarInfo("name")
|
||||
tarinfo.uid = 04000000000000000000L
|
||||
tarinfo.create_pax_header("utf8")
|
||||
tarinfo.tobuf(tarfile.PAX_FORMAT)
|
||||
|
||||
|
||||
class GzipMiscReadTest(MiscReadTest):
|
||||
|
@ -940,6 +1053,9 @@ def test_main():
|
|||
StreamWriteTest,
|
||||
GNUWriteTest,
|
||||
PaxWriteTest,
|
||||
UstarUnicodeTest,
|
||||
GNUUnicodeTest,
|
||||
PaxUnicodeTest,
|
||||
AppendTest,
|
||||
LimitsTest,
|
||||
]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue