|
1 | 1 | import sys
|
2 | 2 | import os
|
3 | 3 | import marshal
|
| 4 | +import glob |
4 | 5 | import importlib
|
5 | 6 | import importlib.util
|
| 7 | +import re |
6 | 8 | import struct
|
7 | 9 | import time
|
8 | 10 | import unittest
|
@@ -54,6 +56,7 @@ def module_path_to_dotted_name(path):
|
54 | 56 | TESTPACK2 = "ziptestpackage2"
|
55 | 57 | TEMP_DIR = os.path.abspath("junk95142")
|
56 | 58 | TEMP_ZIP = os.path.abspath("junk95142.zip")
|
| 59 | +TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "zipimport_data") |
57 | 60 |
|
58 | 61 | pyc_file = importlib.util.cache_from_source(TESTMOD + '.py')
|
59 | 62 | pyc_ext = '.pyc'
|
@@ -134,7 +137,9 @@ def getZip64Files(self):
|
134 | 137 |
|
135 | 138 | def doTest(self, expected_ext, files, *modules, **kw):
|
136 | 139 | self.makeZip(files, **kw)
|
| 140 | + self.doTestWithPreBuiltZip(expected_ext, *modules, **kw) |
137 | 141 |
|
| 142 | + def doTestWithPreBuiltZip(self, expected_ext, *modules, **kw): |
138 | 143 | sys.path.insert(0, TEMP_ZIP)
|
139 | 144 |
|
140 | 145 | mod = importlib.import_module(".".join(modules))
|
@@ -810,6 +815,122 @@ def testZip64CruftAndComment(self):
|
810 | 815 | files = self.getZip64Files()
|
811 | 816 | self.doTest(".py", files, "f65536", comment=b"c" * ((1 << 16) - 1))
|
812 | 817 |
|
| 818 | + def testZip64LargeFile(self): |
| 819 | + support.requires( |
| 820 | + "largefile", |
| 821 | + f"test generates files >{0xFFFFFFFF} bytes and takes a long time " |
| 822 | + "to run" |
| 823 | + ) |
| 824 | + |
| 825 | + # N.B.: We do alot of gymnastics below in the ZIP_STORED case to save |
| 826 | + # and reconstruct a sparse zip on systems that support sparse files. |
| 827 | + # Instead of creating a ~8GB zip file mainly consisting of null bytes |
| 828 | + # for every run of the test, we create the zip once and save off the |
| 829 | + # non-null portions of the resulting file as data blobs with offsets |
| 830 | + # that allow re-creating the zip file sparsely. This drops disk space |
| 831 | + # usage to ~9KB for the ZIP_STORED case and drops that test time by ~2 |
| 832 | + # orders of magnitude. For the ZIP_DEFLATED case, however, we bite the |
| 833 | + # bullet. The resulting zip file is ~8MB of non-null data; so the sparse |
| 834 | + # trick doesn't work and would result in that full ~8MB zip data file |
| 835 | + # being checked in to source control. |
| 836 | + parts_glob = f"sparse-zip64-c{self.compression:d}-0x*.part" |
| 837 | + full_parts_glob = os.path.join(TEST_DATA_DIR, parts_glob) |
| 838 | + pre_built_zip_parts = glob.glob(full_parts_glob) |
| 839 | + |
| 840 | + self.addCleanup(os_helper.unlink, TEMP_ZIP) |
| 841 | + if not pre_built_zip_parts: |
| 842 | + if self.compression != ZIP_STORED: |
| 843 | + support.requires( |
| 844 | + "cpu", |
| 845 | + "test requires a lot of CPU for compression." |
| 846 | + ) |
| 847 | + self.addCleanup(os_helper.unlink, os_helper.TESTFN) |
| 848 | + with open(os_helper.TESTFN, "wb") as f: |
| 849 | + f.write(b"data") |
| 850 | + f.write(os.linesep.encode()) |
| 851 | + f.seek(0xffff_ffff, os.SEEK_CUR) |
| 852 | + f.write(os.linesep.encode()) |
| 853 | + os.utime(os_helper.TESTFN, (0.0, 0.0)) |
| 854 | + with ZipFile( |
| 855 | + TEMP_ZIP, |
| 856 | + "w", |
| 857 | + compression=self.compression, |
| 858 | + strict_timestamps=False |
| 859 | + ) as z: |
| 860 | + z.write(os_helper.TESTFN, "data1") |
| 861 | + z.writestr( |
| 862 | + ZipInfo("module.py", (1980, 1, 1, 0, 0, 0)), test_src |
| 863 | + ) |
| 864 | + z.write(os_helper.TESTFN, "data2") |
| 865 | + |
| 866 | + # This "works" but relies on the zip format having a non-empty |
| 867 | + # final page due to the trailing central directory to wind up with |
| 868 | + # the correct length file. |
| 869 | + def make_sparse_zip_parts(name): |
| 870 | + empty_page = b"\0" * 4096 |
| 871 | + with open(name, "rb") as f: |
| 872 | + part = None |
| 873 | + try: |
| 874 | + while True: |
| 875 | + offset = f.tell() |
| 876 | + data = f.read(len(empty_page)) |
| 877 | + if not data: |
| 878 | + break |
| 879 | + if data != empty_page: |
| 880 | + if not part: |
| 881 | + part_fullname = os.path.join( |
| 882 | + TEST_DATA_DIR, |
| 883 | + f"sparse-zip64-c{self.compression:d}-" |
| 884 | + f"{offset:#011x}.part", |
| 885 | + ) |
| 886 | + os.makedirs( |
| 887 | + os.path.dirname(part_fullname), |
| 888 | + exist_ok=True |
| 889 | + ) |
| 890 | + part = open(part_fullname, "wb") |
| 891 | + print("Created", part_fullname) |
| 892 | + part.write(data) |
| 893 | + else: |
| 894 | + if part: |
| 895 | + part.close() |
| 896 | + part = None |
| 897 | + finally: |
| 898 | + if part: |
| 899 | + part.close() |
| 900 | + |
| 901 | + if self.compression == ZIP_STORED: |
| 902 | + print(f"Creating sparse parts to check in into {TEST_DATA_DIR}:") |
| 903 | + make_sparse_zip_parts(TEMP_ZIP) |
| 904 | + |
| 905 | + else: |
| 906 | + def extract_offset(name): |
| 907 | + if m := re.search(r"-(0x[0-9a-f]{9})\.part$", name): |
| 908 | + return int(m.group(1), base=16) |
| 909 | + raise ValueError(f"{name=} does not fit expected pattern.") |
| 910 | + offset_parts = [(extract_offset(n), n) for n in pre_built_zip_parts] |
| 911 | + with open(TEMP_ZIP, "wb") as f: |
| 912 | + for offset, part_fn in sorted(offset_parts): |
| 913 | + with open(part_fn, "rb") as part: |
| 914 | + f.seek(offset, os.SEEK_SET) |
| 915 | + f.write(part.read()) |
| 916 | + # Confirm that the reconstructed zip file works and looks right. |
| 917 | + with ZipFile(TEMP_ZIP, "r") as z: |
| 918 | + self.assertEqual( |
| 919 | + z.getinfo("module.py").date_time, (1980, 1, 1, 0, 0, 0) |
| 920 | + ) |
| 921 | + self.assertEqual( |
| 922 | + z.read("module.py"), test_src.encode(), |
| 923 | + msg=f"Recreate {full_parts_glob}, unexpected contents." |
| 924 | + ) |
| 925 | + def assertDataEntry(name): |
| 926 | + zinfo = z.getinfo(name) |
| 927 | + self.assertEqual(zinfo.date_time, (1980, 1, 1, 0, 0, 0)) |
| 928 | + self.assertGreater(zinfo.file_size, 0xffff_ffff) |
| 929 | + assertDataEntry("data1") |
| 930 | + assertDataEntry("data2") |
| 931 | + |
| 932 | + self.doTestWithPreBuiltZip(".py", "module") |
| 933 | + |
813 | 934 |
|
814 | 935 | @support.requires_zlib()
|
815 | 936 | class CompressedZipImportTestCase(UncompressedZipImportTestCase):
|
|
0 commit comments