Skip to content

Commit 18ea325

Browse files
committed
fix(handlers): handle dangling symlinks in MultiFile handlers.
MultiFile handlers would collect files within a directory corresponding to a specific schema without checking if those files are actually present. For example, a directory could contain dangling symlinks with a name corresponding to the glob search. This would lead to FileNotFoundError being thrown by the multi-file handlers.
1 parent b47c2ce commit 18ea325

File tree

10 files changed

+22
-4
lines changed

10 files changed

+22
-4
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:70f2e1d6ef9b9954fde7f76de1026dce3726ef16e3eb5b944d03f45394675e11
3+
size 10240
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
hello
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
hello
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
hello
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:4e4c5188742aa0848dd13878d4f996465dddd3a1c5376cb3af872149d714daa0
3+
size 10240
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
hello
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
hello
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
hello

unblob/handlers/archive/sevenzip.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,12 @@ class MultiVolumeSevenZipHandler(DirectoryHandler):
106106
PATTERN = Glob("*.7z.001")
107107

108108
def calculate_multifile(self, file: Path) -> Optional[MultiFile]:
109+
paths = sorted(
110+
[p for p in file.parent.glob(f"{file.stem}.*") if p.resolve().exists()]
111+
)
112+
if not paths:
113+
return None
114+
109115
with file.open("rb") as f:
110116
header_data = f.read(HEADER_SIZE)
111117

@@ -117,8 +123,6 @@ def calculate_multifile(self, file: Path) -> Optional[MultiFile]:
117123
size = calculate_sevenzip_size(header)
118124
logger.debug("Sevenzip header", header=header, size=size, _verbosity=3)
119125

120-
paths = sorted(file.parent.glob(f"{file.stem}.*"))
121-
122126
files_size = sum(path.stat().st_size for path in paths)
123127
logger.debug(
124128
"Multi-volume files", paths=paths, files_size=files_size, _verbosity=2

unblob/handlers/compression/gzip.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,12 +166,14 @@ def is_valid_gzip(self, path: Path) -> bool:
166166
return True
167167

168168
def calculate_multifile(self, file: Path) -> Optional[MultiFile]:
169-
paths = sorted(file.parent.glob(f"{file.stem}.*"))
169+
paths = sorted(
170+
[p for p in file.parent.glob(f"{file.stem}.*") if p.resolve().exists()]
171+
)
170172

171173
# we 'discard' paths that are not the first in the ordered list,
172174
# otherwise we will end up with colliding reports, one for every
173175
# path in the list.
174-
if file != paths[0]:
176+
if not paths or file != paths[0]:
175177
return None
176178

177179
if self.is_valid_gzip(file):

0 commit comments

Comments
 (0)