Skip to content

bpo-37772: fix zipfile.Path.iterdir() outputs #15170

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Aug 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
11efec1
fix Path._add_implied_dirs to include all implied directories
shireenrao Aug 7, 2019
12491c5
fix Path._add_implied_dirs to include all implied directories
shireenrao Aug 7, 2019
b7266c1
Optimize code by using sets instead of lists
shireenrao Aug 7, 2019
3ac5c04
merge branch to get updates
shireenrao Aug 7, 2019
10f50b1
📜🤖 Added by blurb_it.
blurb-it[bot] Aug 7, 2019
ca05ad6
fix Path._add_implied_dirs to include all implied directories
shireenrao Aug 7, 2019
342a447
Optimize code by using sets instead of lists
shireenrao Aug 7, 2019
c36975f
📜🤖 Added by blurb_it.
blurb-it[bot] Aug 7, 2019
69109a7
Add tests to zipfile.Path.iterdir() fix
shireenrao Aug 9, 2019
42c9e51
Update test for zipfile.Path.iterdir()
shireenrao Aug 9, 2019
5f63ce9
Merge branch 'zipfile' of github.com:shireenrao/cpython into zipfile
shireenrao Aug 9, 2019
2c55595
remove whitespace from test file
shireenrao Aug 9, 2019
cedac04
Merge branch 'master' into zipfile
jaraco Aug 10, 2019
64c50a3
Rewrite NEWS blurb to describe the user-facing impact and avoid imple…
jaraco Aug 10, 2019
9e9e42f
remove redundant [] within set comprehension
shireenrao Aug 10, 2019
eeb5f98
Update to use unique_everseen to maintain order and other suggestions…
shireenrao Aug 10, 2019
7b1b6f5
remove whitespace and add back add_dirs in tests
shireenrao Aug 10, 2019
ad05c79
Add new standalone function parents using posixpath to get parents of…
shireenrao Aug 12, 2019
9476650
removing whitespace (sorry)
shireenrao Aug 12, 2019
f9c1706
Remove import pathlib from zipfile.py
shireenrao Aug 12, 2019
c153a6f
Rewrite _parents as a slice on a generator of the ancestry of a path.
jaraco Aug 24, 2019
0002c29
Remove check for '.' and '/', now that parents no longer returns those.
jaraco Aug 24, 2019
c1cbec6
Separate calculation of implied dirs from adding those
jaraco Aug 24, 2019
78a584e
Re-use _implied_dirs in tests for generating zipfile with dir entries.
jaraco Aug 24, 2019
2384aa7
Replace three fixtures (abcde, abcdef, abde) with one representative …
jaraco Aug 24, 2019
be2a13a
Simplify implementation of _implied_dirs by collapsing the generation…
jaraco Aug 24, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 63 additions & 46 deletions Lib/test/test_zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2397,37 +2397,49 @@ def test_extract_command(self):
consume = tuple


def add_dirs(zipfile):
def add_dirs(zf):
"""
Given a writable zipfile, inject directory entries for
Given a writable zip file zf, inject directory entries for
any directories implied by the presence of children.
"""
names = zipfile.namelist()
consume(
zipfile.writestr(name + "/", b"")
for name in map(posixpath.dirname, names)
if name and name + "/" not in names
)
return zipfile
for name in zipfile.Path._implied_dirs(zf.namelist()):
zf.writestr(name, b"")
return zf


def build_abcde_files():
def build_alpharep_fixture():
"""
Create a zip file with this structure:

.
├── a.txt
└── b
├── c.txt
└── d
└── e.txt
├── b
│ ├── c.txt
│ ├── d
│ │ └── e.txt
│ └── f.txt
└── g
└── h
└── i.txt

This fixture has the following key characteristics:

- a file at the root (a)
- a file two levels deep (b/d/e)
- multiple files in a directory (b/c, b/f)
- a directory containing only a directory (g/h)

"alpha" because it uses alphabet
"rep" because it's a representative example
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr("a.txt", b"content of a")
zf.writestr("b/c.txt", b"content of c")
zf.writestr("b/d/e.txt", b"content of e")
zf.filename = "abcde.zip"
zf.writestr("b/f.txt", b"content of f")
zf.writestr("g/h/i.txt", b"content of i")
zf.filename = "alpharep.zip"
return zf


Expand All @@ -2436,60 +2448,64 @@ def setUp(self):
self.fixtures = contextlib.ExitStack()
self.addCleanup(self.fixtures.close)

def zipfile_abcde(self):
def zipfile_alpharep(self):
with self.subTest():
yield build_abcde_files()
yield build_alpharep_fixture()
with self.subTest():
yield add_dirs(build_abcde_files())
yield add_dirs(build_alpharep_fixture())

def zipfile_ondisk(self):
tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir()))
for zipfile_abcde in self.zipfile_abcde():
buffer = zipfile_abcde.fp
zipfile_abcde.close()
path = tmpdir / zipfile_abcde.filename
for alpharep in self.zipfile_alpharep():
buffer = alpharep.fp
alpharep.close()
path = tmpdir / alpharep.filename
with path.open("wb") as strm:
strm.write(buffer.getvalue())
yield path

def test_iterdir_istype(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
def test_iterdir_and_types(self):
for alpharep in self.zipfile_alpharep():
root = zipfile.Path(alpharep)
assert root.is_dir()
a, b = root.iterdir()
a, b, g = root.iterdir()
assert a.is_file()
assert b.is_dir()
c, d = b.iterdir()
assert c.is_file()
assert g.is_dir()
c, f, d = b.iterdir()
assert c.is_file() and f.is_file()
e, = d.iterdir()
assert e.is_file()
h, = g.iterdir()
i, = h.iterdir()
assert i.is_file()

def test_open(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
a, b = root.iterdir()
for alpharep in self.zipfile_alpharep():
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
with a.open() as strm:
data = strm.read()
assert data == b"content of a"

def test_read(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
a, b = root.iterdir()
for alpharep in self.zipfile_alpharep():
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
assert a.read_text() == "content of a"
assert a.read_bytes() == b"content of a"

def test_joinpath(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
for alpharep in self.zipfile_alpharep():
root = zipfile.Path(alpharep)
a = root.joinpath("a")
assert a.is_file()
e = root.joinpath("b").joinpath("d").joinpath("e.txt")
assert e.read_text() == "content of e"

def test_traverse_truediv(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
for alpharep in self.zipfile_alpharep():
root = zipfile.Path(alpharep)
a = root / "a"
assert a.is_file()
e = root / "b" / "d" / "e.txt"
Expand All @@ -2504,26 +2520,27 @@ def test_pathlike_construction(self):
zipfile.Path(pathlike)

def test_traverse_pathlike(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
for alpharep in self.zipfile_alpharep():
root = zipfile.Path(alpharep)
root / pathlib.Path("a")

def test_parent(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
for alpharep in self.zipfile_alpharep():
root = zipfile.Path(alpharep)
assert (root / 'a').parent.at == ''
assert (root / 'a' / 'b').parent.at == 'a/'

def test_dir_parent(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
for alpharep in self.zipfile_alpharep():
root = zipfile.Path(alpharep)
assert (root / 'b').parent.at == ''
assert (root / 'b/').parent.at == ''

def test_missing_dir_parent(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
for alpharep in self.zipfile_alpharep():
root = zipfile.Path(alpharep)
assert (root / 'missing dir/').parent.at == ''


if __name__ == "__main__":
unittest.main()
77 changes: 71 additions & 6 deletions Lib/zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import functools
import importlib.util
import io
import itertools
import os
import posixpath
import shutil
Expand Down Expand Up @@ -2104,6 +2105,65 @@ def _compile(file, optimize=-1):
return (fname, archivename)


def _unique_everseen(iterable, key=None):
"List unique elements, preserving order. Remember all elements ever seen."
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
# unique_everseen('ABBCcAD', str.lower) --> A B C D
seen = set()
seen_add = seen.add
if key is None:
for element in itertools.filterfalse(seen.__contains__, iterable):
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element


def _parents(path):
"""
Given a path with elements separated by
posixpath.sep, generate all parents of that path.

>>> list(_parents('b/d'))
['b']
>>> list(_parents('/b/d/'))
['/b']
>>> list(_parents('b/d/f/'))
['b/d', 'b']
>>> list(_parents('b'))
[]
>>> list(_parents(''))
[]
"""
return itertools.islice(_ancestry(path), 1, None)


def _ancestry(path):
"""
Given a path with elements separated by
posixpath.sep, generate all elements of that path

>>> list(_ancestry('b/d'))
['b/d', 'b']
>>> list(_ancestry('/b/d/'))
['/b/d', '/b']
>>> list(_ancestry('b/d/f/'))
['b/d/f', 'b/d', 'b']
>>> list(_ancestry('b'))
['b']
>>> list(_ancestry(''))
[]
"""
path = path.rstrip(posixpath.sep)
while path and path != posixpath.sep:
yield path
path, tail = posixpath.split(path)


class Path:
"""
A pathlib-compatible interface for zip files.
Expand Down Expand Up @@ -2227,12 +2287,17 @@ def joinpath(self, add):
__truediv__ = joinpath

@staticmethod
def _add_implied_dirs(names):
return names + [
name + "/"
for name in map(posixpath.dirname, names)
if name and name + "/" not in names
]
def _implied_dirs(names):
return _unique_everseen(
parent + "/"
for name in names
for parent in _parents(name)
if parent + "/" not in names
)

@classmethod
def _add_implied_dirs(cls, names):
return names + list(cls._implied_dirs(names))

@property
def parent(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
In ``zipfile.Path``, when adding implicit dirs, ensure that ancestral directories are added and that duplicates are excluded.