Skip to content

Commit 49258ef

Browse files
jsiroisgpsheadEclips4
authored
gh-118107: Fix zipimporter ZIP64 handling. (GH-118108)
Add missing import to code that handles too large files and offsets. Use list, not tuple, for a mutable sequence. Add tests to prevent similar mistakes. --------- Co-authored-by: Gregory P. Smith [Google LLC] <greg@krypto.org> Co-authored-by: Kirill Podoprigora <kirill.bast9@mail.ru>
1 parent 698417f commit 49258ef

7 files changed

+128
-3
lines changed

Lib/test/test_zipimport.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import sys
22
import os
33
import marshal
4+
import glob
45
import importlib
56
import importlib.util
7+
import re
68
import struct
79
import time
810
import unittest
@@ -54,6 +56,7 @@ def module_path_to_dotted_name(path):
5456
TESTPACK2 = "ziptestpackage2"
5557
TEMP_DIR = os.path.abspath("junk95142")
5658
TEMP_ZIP = os.path.abspath("junk95142.zip")
59+
TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "zipimport_data")
5760

5861
pyc_file = importlib.util.cache_from_source(TESTMOD + '.py')
5962
pyc_ext = '.pyc'
@@ -134,7 +137,9 @@ def getZip64Files(self):
134137

135138
def doTest(self, expected_ext, files, *modules, **kw):
136139
self.makeZip(files, **kw)
140+
self.doTestWithPreBuiltZip(expected_ext, *modules, **kw)
137141

142+
def doTestWithPreBuiltZip(self, expected_ext, *modules, **kw):
138143
sys.path.insert(0, TEMP_ZIP)
139144

140145
mod = importlib.import_module(".".join(modules))
@@ -810,6 +815,122 @@ def testZip64CruftAndComment(self):
810815
files = self.getZip64Files()
811816
self.doTest(".py", files, "f65536", comment=b"c" * ((1 << 16) - 1))
812817

818+
def testZip64LargeFile(self):
819+
support.requires(
820+
"largefile",
821+
f"test generates files >{0xFFFFFFFF} bytes and takes a long time "
822+
"to run"
823+
)
824+
825+
# N.B.: We do alot of gymnastics below in the ZIP_STORED case to save
826+
# and reconstruct a sparse zip on systems that support sparse files.
827+
# Instead of creating a ~8GB zip file mainly consisting of null bytes
828+
# for every run of the test, we create the zip once and save off the
829+
# non-null portions of the resulting file as data blobs with offsets
830+
# that allow re-creating the zip file sparsely. This drops disk space
831+
# usage to ~9KB for the ZIP_STORED case and drops that test time by ~2
832+
# orders of magnitude. For the ZIP_DEFLATED case, however, we bite the
833+
# bullet. The resulting zip file is ~8MB of non-null data; so the sparse
834+
# trick doesn't work and would result in that full ~8MB zip data file
835+
# being checked in to source control.
836+
parts_glob = f"sparse-zip64-c{self.compression:d}-0x*.part"
837+
full_parts_glob = os.path.join(TEST_DATA_DIR, parts_glob)
838+
pre_built_zip_parts = glob.glob(full_parts_glob)
839+
840+
self.addCleanup(os_helper.unlink, TEMP_ZIP)
841+
if not pre_built_zip_parts:
842+
if self.compression != ZIP_STORED:
843+
support.requires(
844+
"cpu",
845+
"test requires a lot of CPU for compression."
846+
)
847+
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
848+
with open(os_helper.TESTFN, "wb") as f:
849+
f.write(b"data")
850+
f.write(os.linesep.encode())
851+
f.seek(0xffff_ffff, os.SEEK_CUR)
852+
f.write(os.linesep.encode())
853+
os.utime(os_helper.TESTFN, (0.0, 0.0))
854+
with ZipFile(
855+
TEMP_ZIP,
856+
"w",
857+
compression=self.compression,
858+
strict_timestamps=False
859+
) as z:
860+
z.write(os_helper.TESTFN, "data1")
861+
z.writestr(
862+
ZipInfo("module.py", (1980, 1, 1, 0, 0, 0)), test_src
863+
)
864+
z.write(os_helper.TESTFN, "data2")
865+
866+
# This "works" but relies on the zip format having a non-empty
867+
# final page due to the trailing central directory to wind up with
868+
# the correct length file.
869+
def make_sparse_zip_parts(name):
870+
empty_page = b"\0" * 4096
871+
with open(name, "rb") as f:
872+
part = None
873+
try:
874+
while True:
875+
offset = f.tell()
876+
data = f.read(len(empty_page))
877+
if not data:
878+
break
879+
if data != empty_page:
880+
if not part:
881+
part_fullname = os.path.join(
882+
TEST_DATA_DIR,
883+
f"sparse-zip64-c{self.compression:d}-"
884+
f"{offset:#011x}.part",
885+
)
886+
os.makedirs(
887+
os.path.dirname(part_fullname),
888+
exist_ok=True
889+
)
890+
part = open(part_fullname, "wb")
891+
print("Created", part_fullname)
892+
part.write(data)
893+
else:
894+
if part:
895+
part.close()
896+
part = None
897+
finally:
898+
if part:
899+
part.close()
900+
901+
if self.compression == ZIP_STORED:
902+
print(f"Creating sparse parts to check in into {TEST_DATA_DIR}:")
903+
make_sparse_zip_parts(TEMP_ZIP)
904+
905+
else:
906+
def extract_offset(name):
907+
if m := re.search(r"-(0x[0-9a-f]{9})\.part$", name):
908+
return int(m.group(1), base=16)
909+
raise ValueError(f"{name=} does not fit expected pattern.")
910+
offset_parts = [(extract_offset(n), n) for n in pre_built_zip_parts]
911+
with open(TEMP_ZIP, "wb") as f:
912+
for offset, part_fn in sorted(offset_parts):
913+
with open(part_fn, "rb") as part:
914+
f.seek(offset, os.SEEK_SET)
915+
f.write(part.read())
916+
# Confirm that the reconstructed zip file works and looks right.
917+
with ZipFile(TEMP_ZIP, "r") as z:
918+
self.assertEqual(
919+
z.getinfo("module.py").date_time, (1980, 1, 1, 0, 0, 0)
920+
)
921+
self.assertEqual(
922+
z.read("module.py"), test_src.encode(),
923+
msg=f"Recreate {full_parts_glob}, unexpected contents."
924+
)
925+
def assertDataEntry(name):
926+
zinfo = z.getinfo(name)
927+
self.assertEqual(zinfo.date_time, (1980, 1, 1, 0, 0, 0))
928+
self.assertGreater(zinfo.file_size, 0xffff_ffff)
929+
assertDataEntry("data1")
930+
assertDataEntry("data2")
931+
932+
self.doTestWithPreBuiltZip(".py", "module")
933+
813934

814935
@support.requires_zlib()
815936
class CompressedZipImportTestCase(UncompressedZipImportTestCase):
Binary file not shown.
Binary file not shown.
Binary file not shown.

Lib/zipimport.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -517,8 +517,9 @@ def _read_directory(archive):
517517
num_extra_values = (len(extra_data) - 4) // 8
518518
if num_extra_values > 3:
519519
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
520-
values = struct.unpack_from(f"<{min(num_extra_values, 3)}Q",
521-
extra_data, offset=4)
520+
import struct
521+
values = list(struct.unpack_from(f"<{min(num_extra_values, 3)}Q",
522+
extra_data, offset=4))
522523

523524
# N.b. Here be dragons: the ordering of these is different than
524525
# the header fields, and it's really easy to get it wrong since

Makefile.pre.in

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2483,7 +2483,8 @@ TESTSUBDIRS= idlelib/idle_test \
24832483
test/typinganndata \
24842484
test/wheeldata \
24852485
test/xmltestdata \
2486-
test/xmltestdata/c14n-20
2486+
test/xmltestdata/c14n-20 \
2487+
test/zipimport_data
24872488

24882489
COMPILEALL_OPTS=-j0
24892490

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix :mod:`zipimport` reading of ZIP64 files with file entries that are too big or
2+
offset too far.

0 commit comments

Comments
 (0)