Skip to content

Commit f3de1a1

Browse files
committed
Add ZipFile.remove() (python#51067)
This is a revision of commit 659eb04 (PR python#19358), notably with following changes: - Add documentation and tests. - Raise `ValueError` for a bad mode, as in other methods. - Support multi-member removal in `_remove_members()`. - Support non-physical removal in `_remove_members()`. - Move physical file data in chunks to prevent excessive memory usage on large files. - Fix missing entry in `self.NameToInfo` when removing a duplicated archive name. - Also update `ZipInfo._end_offset` for physically moved files. Co-authored-by: Éric <merwok@netwok.org> (cherry picked from commit e6bc82a (PR python#103033))
1 parent d706eb9 commit f3de1a1

File tree

4 files changed

+403
-0
lines changed

4 files changed

+403
-0
lines changed

Doc/library/zipfile.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,23 @@ ZipFile Objects
518518
.. versionadded:: 3.11
519519

520520

521+
.. method:: ZipFile.remove(zinfo_or_arcname)
522+
523+
Removes a member from the archive. *zinfo_or_arcname* is either the full
524+
path of the member, or a :class:`ZipInfo` instance.
525+
526+
The archive must be opened with mode ``'w'``, ``'x'`` or ``'a'``.
527+
528+
Calling :meth:`remove` on a closed ZipFile will raise a :exc:`ValueError`.
529+
530+
.. note::
531+
532+
Removing a member in an archive may involve a move of many internal data
533+
records, which can be I/O intensive for a large ZIP file.
534+
535+
.. versionadded:: next
536+
537+
521538
The following data attributes are also available:
522539

523540
.. attribute:: ZipFile.filename

Lib/test/test_zipfile/test_core.py

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,6 +1360,239 @@ class LzmaWriterTests(AbstractWriterTests, unittest.TestCase):
13601360
class ZstdWriterTests(AbstractWriterTests, unittest.TestCase):
13611361
compression = zipfile.ZIP_ZSTANDARD
13621362

1363+
class AbstractRemoveTests:
1364+
1365+
def _test_removing_indexes(self, test_files, indexes):
1366+
"""Test underlying _remove_members() for removing members at given
1367+
indexes."""
1368+
# calculate the expected results
1369+
expected_files = []
1370+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1371+
for i, (file, data) in enumerate(test_files):
1372+
if i not in indexes:
1373+
zh.writestr(file, data)
1374+
expected_files.append(file)
1375+
expected_size = os.path.getsize(TESTFN)
1376+
1377+
# prepare the test zip
1378+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1379+
for file, data in test_files:
1380+
zh.writestr(file, data)
1381+
1382+
# do the removal and check the result
1383+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1384+
members = {zh.infolist()[i] for i in indexes}
1385+
zh._remove_members(members)
1386+
1387+
# make sure internal caches have reflected the change
1388+
# and are consistent
1389+
self.assertEqual(zh.namelist(), expected_files)
1390+
for file, _ in test_files:
1391+
if file in zh.namelist():
1392+
self.assertEqual(zh.getinfo(file).filename, file)
1393+
else:
1394+
with self.assertRaises(KeyError):
1395+
zh.getinfo(file)
1396+
1397+
self.assertIsNone(zh.testzip())
1398+
self.assertEqual(os.path.getsize(TESTFN), expected_size)
1399+
1400+
def _test_removing_combinations(self, test_files, n=None):
1401+
"""Test underlying _remove_members() for removing random combinations
1402+
of members."""
1403+
ln = len(test_files)
1404+
if n is None:
1405+
# iterate n from 1 to all
1406+
for n in range(1, ln + 1):
1407+
for indexes in itertools.combinations(range(ln), n):
1408+
with self.subTest(remove=indexes):
1409+
self._test_removing_indexes(test_files, indexes)
1410+
else:
1411+
for indexes in itertools.combinations(range(ln), n):
1412+
with self.subTest(remove=indexes):
1413+
self._test_removing_indexes(test_files, indexes)
1414+
1415+
def test_basic(self):
1416+
# Test underlying _remove_members() for removing random combinations of members.
1417+
test_files = [
1418+
('file0.txt', b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'),
1419+
('file1.txt', b'Duis aute irure dolor in reprehenderit in voluptate velit esse'),
1420+
('file2.txt', b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'),
1421+
]
1422+
1423+
self._test_removing_combinations(test_files)
1424+
1425+
def test_duplicated_arcname(self):
1426+
# Test underlying _remove_members() for removing any one of random duplicated members.
1427+
dupl_file = 'file.txt'
1428+
test_files = [
1429+
('file0.txt', b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'),
1430+
('file1.txt', b'Duis aute irure dolor in reprehenderit in voluptate velit esse'),
1431+
('file2.txt', b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'),
1432+
]
1433+
1434+
ln = len(test_files)
1435+
for n in range(2, ln + 1):
1436+
for dups in itertools.combinations(range(ln), n):
1437+
files = []
1438+
for i, (file, data) in enumerate(test_files):
1439+
file_ = dupl_file if i in dups else file
1440+
files.append((file_, data))
1441+
1442+
for index in dups:
1443+
indexes = [index]
1444+
with self.subTest(dups=dups, indexes=indexes):
1445+
self._test_removing_indexes(files, indexes)
1446+
1447+
def test_non_physical(self):
1448+
# Test underlying _remove_members() for non-physical removing.
1449+
test_files = [
1450+
('file0.txt', b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'),
1451+
('file1.txt', b'Duis aute irure dolor in reprehenderit in voluptate velit esse'),
1452+
('file2.txt', b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'),
1453+
]
1454+
1455+
ln = len(test_files)
1456+
for n in range(1, ln + 1):
1457+
for indexes in itertools.combinations(range(ln), n):
1458+
with self.subTest(remove=indexes):
1459+
# prepare the test zip
1460+
expected = {}
1461+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1462+
for i, (file, data) in enumerate(test_files):
1463+
zh.writestr(file, data)
1464+
if i not in indexes:
1465+
expected[file] = zh.getinfo(file).header_offset
1466+
1467+
# do the removal and check the result
1468+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1469+
members = {zh.infolist()[i] for i in indexes}
1470+
zh._remove_members(members, remove_physical=False)
1471+
self.assertEqual(zh.namelist(), list(expected))
1472+
for file, offset in expected.items():
1473+
self.assertEqual(zh.getinfo(file).header_offset, offset)
1474+
self.assertIsNone(zh.testzip())
1475+
1476+
def test_verify(self):
1477+
# Test if params are passed to underlying _remove_members() correctly,
1478+
# or never passed if conditions not met.
1479+
file0 = 'file0.txt'
1480+
file = 'datafile.txt'
1481+
data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
1482+
1483+
# closed: error and do nothing
1484+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1485+
zh.writestr(file, data)
1486+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1487+
zh.close()
1488+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1489+
with self.assertRaises(ValueError):
1490+
zh.remove(file)
1491+
mock_fn.assert_not_called()
1492+
1493+
# writing: error and do nothing
1494+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1495+
zh.writestr(file, data)
1496+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1497+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1498+
with zh.open(file0, 'w') as fh:
1499+
with self.assertRaises(ValueError):
1500+
zh.remove(file)
1501+
mock_fn.assert_not_called()
1502+
1503+
# mode 'r': error and do nothing
1504+
with zipfile.ZipFile(TESTFN, 'r') as zh:
1505+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1506+
with self.assertRaises(ValueError):
1507+
zh.remove(file)
1508+
mock_fn.assert_not_called()
1509+
1510+
# mode 'a': the most general use case
1511+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1512+
zh.writestr(file, data)
1513+
# -- remove with arcname
1514+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1515+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1516+
zh.remove(file)
1517+
mock_fn.assert_called_once_with({zh.getinfo(file)})
1518+
# -- remove with zinfo
1519+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1520+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1521+
zinfo = zh.getinfo(file)
1522+
zh.remove(zinfo)
1523+
mock_fn.assert_called_once_with({zinfo})
1524+
# -- remove with nonexist arcname
1525+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1526+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1527+
with self.assertRaises(KeyError):
1528+
zh.remove('nonexist.file')
1529+
mock_fn.assert_not_called()
1530+
# -- remove with nonexist zinfo (even if same name)
1531+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1532+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1533+
zinfo = zipfile.ZipInfo(file)
1534+
with self.assertRaises(KeyError):
1535+
zh.remove(zinfo)
1536+
mock_fn.assert_not_called()
1537+
1538+
# mode 'w': like 'a'; allows removing a just written member
1539+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1540+
zh.writestr(file, data)
1541+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1542+
zh.remove(file)
1543+
mock_fn.assert_called_once_with({zh.getinfo(file)})
1544+
1545+
# mode 'x': like 'w'
1546+
os.remove(TESTFN)
1547+
with zipfile.ZipFile(TESTFN, 'x') as zh:
1548+
zh.writestr(file, data)
1549+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1550+
zh.remove(file)
1551+
mock_fn.assert_called_once_with({zh.getinfo(file)})
1552+
1553+
def test_zip64(self):
1554+
# Test if members use zip64.
1555+
file = 'datafile.txt'
1556+
file1 = 'pre.txt'
1557+
file2 = 'post.txt'
1558+
data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
1559+
data1 = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'
1560+
data2 = b'Duis aute irure dolor in reprehenderit in voluptate velit esse'
1561+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1562+
with zh.open(file1, 'w', force_zip64=True) as fh:
1563+
fh.write(data1)
1564+
with zh.open(file2, 'w', force_zip64=True) as fh:
1565+
fh.write(data2)
1566+
expected_size = os.path.getsize(TESTFN)
1567+
1568+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1569+
with zh.open(file1, 'w', force_zip64=True) as fh:
1570+
fh.write(data1)
1571+
with zh.open(file, 'w', force_zip64=True) as fh:
1572+
fh.write(data)
1573+
with zh.open(file2, 'w', force_zip64=True) as fh:
1574+
fh.write(data2)
1575+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1576+
zh.remove(file)
1577+
self.assertIsNone(zh.testzip())
1578+
self.assertEqual(os.path.getsize(TESTFN), expected_size)
1579+
1580+
class StoredRemoveTests(AbstractRemoveTests, unittest.TestCase):
1581+
compression = zipfile.ZIP_STORED
1582+
1583+
@requires_zlib()
1584+
class DeflateRemoveTests(AbstractRemoveTests, unittest.TestCase):
1585+
compression = zipfile.ZIP_DEFLATED
1586+
1587+
@requires_bz2()
1588+
class Bzip2RemoveTests(AbstractRemoveTests, unittest.TestCase):
1589+
compression = zipfile.ZIP_BZIP2
1590+
1591+
@requires_lzma()
1592+
class LzmaRemoveTests(AbstractRemoveTests, unittest.TestCase):
1593+
compression = zipfile.ZIP_LZMA
1594+
1595+
13631596
class PyZipFileTests(unittest.TestCase):
13641597
def assertCompiledIn(self, name, namelist):
13651598
if name + 'o' not in namelist:

Lib/test/test_zipfile64.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,69 @@ def tearDown(self):
8787
os_helper.unlink(TESTFN2)
8888

8989

90+
class TestRemove(unittest.TestCase):
91+
def setUp(self):
92+
# Create test data.
93+
line_gen = ("Test of zipfile line %d." % i for i in range(1000000))
94+
self.data = '\n'.join(line_gen).encode('ascii')
95+
96+
def _write_large_file(self, fh):
97+
# It will contain enough copies of self.data to reach about 8 GiB.
98+
filecount = 8*1024**3 // len(self.data)
99+
100+
next_time = time.monotonic() + _PRINT_WORKING_MSG_INTERVAL
101+
for num in range(filecount):
102+
fh.write(self.data)
103+
# Print still working message since this test can be really slow
104+
if next_time <= time.monotonic():
105+
next_time = time.monotonic() + _PRINT_WORKING_MSG_INTERVAL
106+
print((
107+
' writing %d of %d, be patient...' %
108+
(num, filecount)), file=sys.__stdout__)
109+
sys.__stdout__.flush()
110+
111+
def test_remove_large_file(self):
112+
# Try the temp file. If we do TESTFN2, then it hogs
113+
# gigabytes of disk space for the duration of the test.
114+
with TemporaryFile() as f:
115+
self._test_remove_large_file(f)
116+
self.assertFalse(f.closed)
117+
118+
def _test_remove_large_file(self, f):
119+
file = 'datafile.txt'
120+
file1 = 'dummy.txt'
121+
data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
122+
with zipfile.ZipFile(f, 'w') as zh:
123+
with zh.open(file1, 'w', force_zip64=True) as fh:
124+
self._write_large_file(fh)
125+
zh.writestr(file, data)
126+
127+
with zipfile.ZipFile(f, 'a') as zh:
128+
zh.remove(file1)
129+
self.assertIsNone(zh.testzip())
130+
131+
def test_remove_before_large_file(self):
132+
# Try the temp file. If we do TESTFN2, then it hogs
133+
# gigabytes of disk space for the duration of the test.
134+
with TemporaryFile() as f:
135+
self._test_remove_before_large_file(f)
136+
self.assertFalse(f.closed)
137+
138+
def _test_remove_before_large_file(self, f):
139+
file = 'datafile.txt'
140+
file1 = 'dummy.txt'
141+
data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
142+
with zipfile.ZipFile(f, 'w') as zh:
143+
zh.writestr(file, data)
144+
with zh.open(file1, 'w', force_zip64=True) as fh:
145+
self._write_large_file(fh)
146+
expected_size = zh.getinfo(file1).file_size
147+
148+
with zipfile.ZipFile(f, 'a') as zh:
149+
zh.remove(file)
150+
self.assertIsNone(zh.testzip())
151+
152+
90153
class OtherTests(unittest.TestCase):
91154
def testMoreThan64kFiles(self):
92155
# This test checks that more than 64k files can be added to an archive,

0 commit comments

Comments
 (0)