Skip to content

[3.9] bpo-45863: tarfile: don't zero out header fields unnecessarily (GH-29693) #31232

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions Lib/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -887,15 +887,24 @@ def create_pax_header(self, info, encoding):
# Test number fields for values that exceed the field limit or values
# that like to be stored as float.
for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
if name in pax_headers:
# The pax header has priority. Avoid overflow.
info[name] = 0
continue
needs_pax = False

val = info[name]
if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
pax_headers[name] = str(val)
val_is_float = isinstance(val, float)
val_int = round(val) if val_is_float else val
if not 0 <= val_int < 8 ** (digits - 1):
# Avoid overflow.
info[name] = 0
needs_pax = True
elif val_is_float:
# Put rounded value in ustar header, and full
# precision value in pax header.
info[name] = val_int
needs_pax = True

# The existing pax header has priority.
if needs_pax and name not in pax_headers:
pax_headers[name] = str(val)

# Create a pax extended header if necessary.
if pax_headers:
Expand Down
55 changes: 55 additions & 0 deletions Lib/test/test_tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1898,6 +1898,61 @@ def test_pax_extended_header(self):
finally:
tar.close()

def test_create_pax_header(self):
# The ustar header should contain values that can be
# represented reasonably, even if a better (e.g. higher
# precision) version is set in the pax header.
# Issue #45863

# values that should be kept
t = tarfile.TarInfo()
t.name = "foo"
t.mtime = 1000.1
t.size = 100
t.uid = 123
t.gid = 124
info = t.get_info()
header = t.create_pax_header(info, encoding="iso8859-1")
self.assertEqual(info['name'], "foo")
# mtime should be rounded to nearest second
self.assertIsInstance(info['mtime'], int)
self.assertEqual(info['mtime'], 1000)
self.assertEqual(info['size'], 100)
self.assertEqual(info['uid'], 123)
self.assertEqual(info['gid'], 124)
self.assertEqual(header,
b'././@PaxHeader' + bytes(86) \
+ b'0000000\x000000000\x000000000\x0000000000020\x0000000000000\x00010205\x00 x' \
+ bytes(100) + b'ustar\x0000'+ bytes(247) \
+ b'16 mtime=1000.1\n' + bytes(496) + b'foo' + bytes(97) \
+ b'0000644\x000000173\x000000174\x0000000000144\x0000000001750\x00006516\x00 0' \
+ bytes(100) + b'ustar\x0000' + bytes(247))

# values that should be changed
t = tarfile.TarInfo()
t.name = "foo\u3374" # can't be represented in ascii
t.mtime = 10**10 # too big
t.size = 10**10 # too big
t.uid = 8**8 # too big
t.gid = 8**8+1 # too big
info = t.get_info()
header = t.create_pax_header(info, encoding="iso8859-1")
# name is kept as-is in info but should be added to pax header
self.assertEqual(info['name'], "foo\u3374")
self.assertEqual(info['mtime'], 0)
self.assertEqual(info['size'], 0)
self.assertEqual(info['uid'], 0)
self.assertEqual(info['gid'], 0)
self.assertEqual(header,
b'././@PaxHeader' + bytes(86) \
+ b'0000000\x000000000\x000000000\x0000000000130\x0000000000000\x00010207\x00 x' \
+ bytes(100) + b'ustar\x0000' + bytes(247) \
+ b'15 path=foo\xe3\x8d\xb4\n16 uid=16777216\n' \
+ b'16 gid=16777217\n20 size=10000000000\n' \
+ b'21 mtime=10000000000\n'+ bytes(424) + b'foo?' + bytes(96) \
+ b'0000644\x000000000\x000000000\x0000000000000\x0000000000000\x00006540\x00 0' \
+ bytes(100) + b'ustar\x0000' + bytes(247))


class UnicodeTest:

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
When the :mod:`tarfile` module creates a pax format archive, it will put an integer representation of timestamps in the ustar header (if possible) for the benefit of older unarchivers, in addition to the existing full-precision timestamps in the pax extended header.