bpo-45863: tarfile: don't zero out header fields unnecessarily (GH-29693)

Numeric fields of type float, notably mtime, can't be represented
exactly in the ustar header, so the pax header is used. But it is
helpful to set them to the nearest int (i.e. second rather than
nanosecond precision mtimes) in the ustar header as well, for the
benefit of unarchivers that don't understand the pax header.

Add test for tarfile.TarInfo.create_pax_header to confirm correct
behaviour.
(cherry picked from commit bf2d44ffb0)

Co-authored-by: Joshua Root <jmr@macports.org>
This commit is contained in:
Miss Islington (bot) 2022-02-09 09:35:56 -08:00 committed by GitHub
parent bde3765a3f
commit b0517a1217
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 71 additions and 6 deletions

View file

@ -1898,6 +1898,61 @@ class PaxWriteTest(GNUWriteTest):
finally:
tar.close()
def test_create_pax_header(self):
# The ustar header should contain values that can be
# represented reasonably, even if a better (e.g. higher
# precision) version is set in the pax header.
# Issue #45863
# values that should be kept
t = tarfile.TarInfo()
t.name = "foo"
t.mtime = 1000.1
t.size = 100
t.uid = 123
t.gid = 124
info = t.get_info()
header = t.create_pax_header(info, encoding="iso8859-1")
self.assertEqual(info['name'], "foo")
# mtime should be rounded to nearest second
self.assertIsInstance(info['mtime'], int)
self.assertEqual(info['mtime'], 1000)
self.assertEqual(info['size'], 100)
self.assertEqual(info['uid'], 123)
self.assertEqual(info['gid'], 124)
self.assertEqual(header,
b'././@PaxHeader' + bytes(86) \
+ b'0000000\x000000000\x000000000\x0000000000020\x0000000000000\x00010205\x00 x' \
+ bytes(100) + b'ustar\x0000'+ bytes(247) \
+ b'16 mtime=1000.1\n' + bytes(496) + b'foo' + bytes(97) \
+ b'0000644\x000000173\x000000174\x0000000000144\x0000000001750\x00006516\x00 0' \
+ bytes(100) + b'ustar\x0000' + bytes(247))
# values that should be changed
t = tarfile.TarInfo()
t.name = "foo\u3374" # can't be represented in ascii
t.mtime = 10**10 # too big
t.size = 10**10 # too big
t.uid = 8**8 # too big
t.gid = 8**8+1 # too big
info = t.get_info()
header = t.create_pax_header(info, encoding="iso8859-1")
# name is kept as-is in info but should be added to pax header
self.assertEqual(info['name'], "foo\u3374")
self.assertEqual(info['mtime'], 0)
self.assertEqual(info['size'], 0)
self.assertEqual(info['uid'], 0)
self.assertEqual(info['gid'], 0)
self.assertEqual(header,
b'././@PaxHeader' + bytes(86) \
+ b'0000000\x000000000\x000000000\x0000000000130\x0000000000000\x00010207\x00 x' \
+ bytes(100) + b'ustar\x0000' + bytes(247) \
+ b'15 path=foo\xe3\x8d\xb4\n16 uid=16777216\n' \
+ b'16 gid=16777217\n20 size=10000000000\n' \
+ b'21 mtime=10000000000\n'+ bytes(424) + b'foo?' + bytes(96) \
+ b'0000644\x000000000\x000000000\x0000000000000\x0000000000000\x00006540\x00 0' \
+ bytes(100) + b'ustar\x0000' + bytes(247))
class UnicodeTest: