Skip to content

Commit a4e2991

Browse files
shireenraojaraco
authored andcommitted
bpo-37772: fix zipfile.Path.iterdir() outputs (GH-15170)
* fix Path._add_implied_dirs to include all implied directories * fix Path._add_implied_dirs to include all implied directories * Optimize code by using sets instead of lists * πŸ“œπŸ€– Added by blurb_it. * fix Path._add_implied_dirs to include all implied directories * Optimize code by using sets instead of lists * πŸ“œπŸ€– Added by blurb_it. * Add tests to zipfile.Path.iterdir() fix * Update test for zipfile.Path.iterdir() * remove whitespace from test file * Rewrite NEWS blurb to describe the user-facing impact and avoid implementation details. * remove redundant [] within set comprehension * Update to use unique_everseen to maintain order and other suggestions in review * remove whitespace and add back add_dirs in tests * Add new standalone function parents using posixpath to get parents of a directory * removing whitespace (sorry) * Remove import pathlib from zipfile.py * Rewrite _parents as a slice on a generator of the ancestry of a path. * Remove check for '.' and '/', now that parents no longer returns those. * Separate calculation of implied dirs from adding those * Re-use _implied_dirs in tests for generating zipfile with dir entries. * Replace three fixtures (abcde, abcdef, abde) with one representative example alpharep. * Simplify implementation of _implied_dirs by collapsing the generation of parent directories for each name.
1 parent ef61c52 commit a4e2991

File tree

3 files changed

+135
-52
lines changed

3 files changed

+135
-52
lines changed

β€ŽLib/test/test_zipfile.py

Lines changed: 63 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -2397,37 +2397,49 @@ def test_extract_command(self):
23972397
consume = tuple
23982398

23992399

2400-
def add_dirs(zipfile):
2400+
def add_dirs(zf):
24012401
"""
2402-
Given a writable zipfile, inject directory entries for
2402+
Given a writable zip file zf, inject directory entries for
24032403
any directories implied by the presence of children.
24042404
"""
2405-
names = zipfile.namelist()
2406-
consume(
2407-
zipfile.writestr(name + "/", b"")
2408-
for name in map(posixpath.dirname, names)
2409-
if name and name + "/" not in names
2410-
)
2411-
return zipfile
2405+
for name in zipfile.Path._implied_dirs(zf.namelist()):
2406+
zf.writestr(name, b"")
2407+
return zf
24122408

24132409

2414-
def build_abcde_files():
2410+
def build_alpharep_fixture():
24152411
"""
24162412
Create a zip file with this structure:
24172413
24182414
.
24192415
β”œβ”€β”€ a.txt
2420-
└── b
2421-
β”œβ”€β”€ c.txt
2422-
└── d
2423-
└── e.txt
2416+
β”œβ”€β”€ b
2417+
β”‚ β”œβ”€β”€ c.txt
2418+
β”‚ β”œβ”€β”€ d
2419+
β”‚ β”‚ └── e.txt
2420+
β”‚ └── f.txt
2421+
└── g
2422+
└── h
2423+
└── i.txt
2424+
2425+
This fixture has the following key characteristics:
2426+
2427+
- a file at the root (a)
2428+
- a file two levels deep (b/d/e)
2429+
- multiple files in a directory (b/c, b/f)
2430+
- a directory containing only a directory (g/h)
2431+
2432+
"alpha" because it uses alphabet
2433+
"rep" because it's a representative example
24242434
"""
24252435
data = io.BytesIO()
24262436
zf = zipfile.ZipFile(data, "w")
24272437
zf.writestr("a.txt", b"content of a")
24282438
zf.writestr("b/c.txt", b"content of c")
24292439
zf.writestr("b/d/e.txt", b"content of e")
2430-
zf.filename = "abcde.zip"
2440+
zf.writestr("b/f.txt", b"content of f")
2441+
zf.writestr("g/h/i.txt", b"content of i")
2442+
zf.filename = "alpharep.zip"
24312443
return zf
24322444

24332445

@@ -2436,60 +2448,64 @@ def setUp(self):
24362448
self.fixtures = contextlib.ExitStack()
24372449
self.addCleanup(self.fixtures.close)
24382450

2439-
def zipfile_abcde(self):
2451+
def zipfile_alpharep(self):
24402452
with self.subTest():
2441-
yield build_abcde_files()
2453+
yield build_alpharep_fixture()
24422454
with self.subTest():
2443-
yield add_dirs(build_abcde_files())
2455+
yield add_dirs(build_alpharep_fixture())
24442456

24452457
def zipfile_ondisk(self):
24462458
tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir()))
2447-
for zipfile_abcde in self.zipfile_abcde():
2448-
buffer = zipfile_abcde.fp
2449-
zipfile_abcde.close()
2450-
path = tmpdir / zipfile_abcde.filename
2459+
for alpharep in self.zipfile_alpharep():
2460+
buffer = alpharep.fp
2461+
alpharep.close()
2462+
path = tmpdir / alpharep.filename
24512463
with path.open("wb") as strm:
24522464
strm.write(buffer.getvalue())
24532465
yield path
24542466

2455-
def test_iterdir_istype(self):
2456-
for zipfile_abcde in self.zipfile_abcde():
2457-
root = zipfile.Path(zipfile_abcde)
2467+
def test_iterdir_and_types(self):
2468+
for alpharep in self.zipfile_alpharep():
2469+
root = zipfile.Path(alpharep)
24582470
assert root.is_dir()
2459-
a, b = root.iterdir()
2471+
a, b, g = root.iterdir()
24602472
assert a.is_file()
24612473
assert b.is_dir()
2462-
c, d = b.iterdir()
2463-
assert c.is_file()
2474+
assert g.is_dir()
2475+
c, f, d = b.iterdir()
2476+
assert c.is_file() and f.is_file()
24642477
e, = d.iterdir()
24652478
assert e.is_file()
2479+
h, = g.iterdir()
2480+
i, = h.iterdir()
2481+
assert i.is_file()
24662482

24672483
def test_open(self):
2468-
for zipfile_abcde in self.zipfile_abcde():
2469-
root = zipfile.Path(zipfile_abcde)
2470-
a, b = root.iterdir()
2484+
for alpharep in self.zipfile_alpharep():
2485+
root = zipfile.Path(alpharep)
2486+
a, b, g = root.iterdir()
24712487
with a.open() as strm:
24722488
data = strm.read()
24732489
assert data == b"content of a"
24742490

24752491
def test_read(self):
2476-
for zipfile_abcde in self.zipfile_abcde():
2477-
root = zipfile.Path(zipfile_abcde)
2478-
a, b = root.iterdir()
2492+
for alpharep in self.zipfile_alpharep():
2493+
root = zipfile.Path(alpharep)
2494+
a, b, g = root.iterdir()
24792495
assert a.read_text() == "content of a"
24802496
assert a.read_bytes() == b"content of a"
24812497

24822498
def test_joinpath(self):
2483-
for zipfile_abcde in self.zipfile_abcde():
2484-
root = zipfile.Path(zipfile_abcde)
2499+
for alpharep in self.zipfile_alpharep():
2500+
root = zipfile.Path(alpharep)
24852501
a = root.joinpath("a")
24862502
assert a.is_file()
24872503
e = root.joinpath("b").joinpath("d").joinpath("e.txt")
24882504
assert e.read_text() == "content of e"
24892505

24902506
def test_traverse_truediv(self):
2491-
for zipfile_abcde in self.zipfile_abcde():
2492-
root = zipfile.Path(zipfile_abcde)
2507+
for alpharep in self.zipfile_alpharep():
2508+
root = zipfile.Path(alpharep)
24932509
a = root / "a"
24942510
assert a.is_file()
24952511
e = root / "b" / "d" / "e.txt"
@@ -2504,26 +2520,27 @@ def test_pathlike_construction(self):
25042520
zipfile.Path(pathlike)
25052521

25062522
def test_traverse_pathlike(self):
2507-
for zipfile_abcde in self.zipfile_abcde():
2508-
root = zipfile.Path(zipfile_abcde)
2523+
for alpharep in self.zipfile_alpharep():
2524+
root = zipfile.Path(alpharep)
25092525
root / pathlib.Path("a")
25102526

25112527
def test_parent(self):
2512-
for zipfile_abcde in self.zipfile_abcde():
2513-
root = zipfile.Path(zipfile_abcde)
2528+
for alpharep in self.zipfile_alpharep():
2529+
root = zipfile.Path(alpharep)
25142530
assert (root / 'a').parent.at == ''
25152531
assert (root / 'a' / 'b').parent.at == 'a/'
25162532

25172533
def test_dir_parent(self):
2518-
for zipfile_abcde in self.zipfile_abcde():
2519-
root = zipfile.Path(zipfile_abcde)
2534+
for alpharep in self.zipfile_alpharep():
2535+
root = zipfile.Path(alpharep)
25202536
assert (root / 'b').parent.at == ''
25212537
assert (root / 'b/').parent.at == ''
25222538

25232539
def test_missing_dir_parent(self):
2524-
for zipfile_abcde in self.zipfile_abcde():
2525-
root = zipfile.Path(zipfile_abcde)
2540+
for alpharep in self.zipfile_alpharep():
2541+
root = zipfile.Path(alpharep)
25262542
assert (root / 'missing dir/').parent.at == ''
25272543

2544+
25282545
if __name__ == "__main__":
25292546
unittest.main()

β€ŽLib/zipfile.py

Lines changed: 71 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import functools
88
import importlib.util
99
import io
10+
import itertools
1011
import os
1112
import posixpath
1213
import shutil
@@ -2104,6 +2105,65 @@ def _compile(file, optimize=-1):
21042105
return (fname, archivename)
21052106

21062107

2108+
def _unique_everseen(iterable, key=None):
2109+
"List unique elements, preserving order. Remember all elements ever seen."
2110+
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
2111+
# unique_everseen('ABBCcAD', str.lower) --> A B C D
2112+
seen = set()
2113+
seen_add = seen.add
2114+
if key is None:
2115+
for element in itertools.filterfalse(seen.__contains__, iterable):
2116+
seen_add(element)
2117+
yield element
2118+
else:
2119+
for element in iterable:
2120+
k = key(element)
2121+
if k not in seen:
2122+
seen_add(k)
2123+
yield element
2124+
2125+
2126+
def _parents(path):
2127+
"""
2128+
Given a path with elements separated by
2129+
posixpath.sep, generate all parents of that path.
2130+
2131+
>>> list(_parents('b/d'))
2132+
['b']
2133+
>>> list(_parents('/b/d/'))
2134+
['/b']
2135+
>>> list(_parents('b/d/f/'))
2136+
['b/d', 'b']
2137+
>>> list(_parents('b'))
2138+
[]
2139+
>>> list(_parents(''))
2140+
[]
2141+
"""
2142+
return itertools.islice(_ancestry(path), 1, None)
2143+
2144+
2145+
def _ancestry(path):
2146+
"""
2147+
Given a path with elements separated by
2148+
posixpath.sep, generate all elements of that path
2149+
2150+
>>> list(_ancestry('b/d'))
2151+
['b/d', 'b']
2152+
>>> list(_ancestry('/b/d/'))
2153+
['/b/d', '/b']
2154+
>>> list(_ancestry('b/d/f/'))
2155+
['b/d/f', 'b/d', 'b']
2156+
>>> list(_ancestry('b'))
2157+
['b']
2158+
>>> list(_ancestry(''))
2159+
[]
2160+
"""
2161+
path = path.rstrip(posixpath.sep)
2162+
while path and path != posixpath.sep:
2163+
yield path
2164+
path, tail = posixpath.split(path)
2165+
2166+
21072167
class Path:
21082168
"""
21092169
A pathlib-compatible interface for zip files.
@@ -2227,12 +2287,17 @@ def joinpath(self, add):
22272287
__truediv__ = joinpath
22282288

22292289
@staticmethod
2230-
def _add_implied_dirs(names):
2231-
return names + [
2232-
name + "/"
2233-
for name in map(posixpath.dirname, names)
2234-
if name and name + "/" not in names
2235-
]
2290+
def _implied_dirs(names):
2291+
return _unique_everseen(
2292+
parent + "/"
2293+
for name in names
2294+
for parent in _parents(name)
2295+
if parent + "/" not in names
2296+
)
2297+
2298+
@classmethod
2299+
def _add_implied_dirs(cls, names):
2300+
return names + list(cls._implied_dirs(names))
22362301

22372302
@property
22382303
def parent(self):
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
In ``zipfile.Path``, when adding implicit dirs, ensure that ancestral directories are added and that duplicates are excluded.

0 commit comments

Comments
Β (0)