Skip to content

Commit ce2f1ee

Browse files
authored
Merge pull request #901 from onekey-sec/900-invalid-valid-zip64
fix(handler): improve zip64 detection
2 parents 3d541a4 + 2ffcb1b commit ce2f1ee

File tree

3 files changed

+25
-20
lines changed

3 files changed

+25
-20
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:a4ea164d7b08ba3ee6dbea6808e7d5438ffbf317420e28d0c5e42b7090f42851
3+
size 126
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:303980bcb9e9e6cdec515230791af8b0ab1aaa244b58a8d99152673aa22197d0
3+
size 6

unblob/handlers/archive/zip.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,9 @@ def is_zip64_eocd(end_of_central_directory):
114114
or end_of_central_directory.offset_of_cd == 0xFFFFFFFF
115115
)
116116

117-
@staticmethod
118-
def is_zip64_cd_file(file_header):
117+
def has_zip64_tag(self, file):
119118
# see https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.3.9.2
119+
file_header = self.cparser_le.partial_cd_file_header_t(file)
120120
return (
121121
file_header.file_size == 0xFFFFFFFF
122122
or file_header.compress_size == 0xFFFFFFFF
@@ -144,23 +144,23 @@ def _parse_zip64(self, file: File, start_offset: int, offset: int):
144144
"Missing ZIP64 EOCD header record header in ZIP chunk."
145145
)
146146
return zip64_eocd
147-
raise InvalidInputFormat(
148-
"Missing ZIP64 EOCD locator record header in ZIP chunk."
149-
)
147+
return None
148+
149+
def get_zip64_eocd(self, file, start_offset, offset, end_of_central_directory):
150+
# some values in the CD can be FFFF, indicating its a zip64
151+
# if the offset of the CD is 0xFFFFFFFF, its definitely one
152+
# otherwise we check every other header indicating zip64
153+
if self.is_zip64_eocd(end_of_central_directory):
154+
return self._parse_zip64(file, start_offset, offset)
150155

151-
def is_zip64(self, file, start_offset, offset, end_of_central_directory):
152156
absolute_offset_of_cd = start_offset + end_of_central_directory.offset_of_cd
153157

154158
if 0 < absolute_offset_of_cd < offset:
155159
file.seek(absolute_offset_of_cd, io.SEEK_SET)
156-
file_header = self.cparser_le.partial_cd_file_header_t(file)
157-
if self.is_zip64_cd_file(file_header):
158-
return True
160+
if self.has_zip64_tag(file):
161+
return self._parse_zip64(file, start_offset, offset)
159162

160-
# some values in the CD can be FFFF, indicating its a zip64
161-
# if the offset of the CD is 0xFFFFFFFF, its definitely one
162-
# otherwise we check every other header indicating zip64
163-
return self.is_zip64_eocd(end_of_central_directory)
163+
return None
164164

165165
def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
166166
has_encrypted_files = False
@@ -173,9 +173,11 @@ def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]
173173
file.seek(offset, io.SEEK_SET)
174174
end_of_central_directory = self.parse_header(file)
175175

176-
if self.is_zip64(file, start_offset, offset, end_of_central_directory):
177-
file.seek(offset, io.SEEK_SET)
178-
end_of_central_directory = self._parse_zip64(file, start_offset, offset)
176+
zip64_eocd = self.get_zip64_eocd(
177+
file, start_offset, offset, end_of_central_directory
178+
)
179+
if zip64_eocd is not None:
180+
end_of_central_directory = zip64_eocd
179181
break
180182

181183
# the EOCD offset is equal to the offset of CD + size of CD
@@ -188,10 +190,7 @@ def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]
188190
if offset == end_of_central_directory_offset:
189191
break
190192
else:
191-
if offset is None:
192-
raise InvalidInputFormat("Missing EOCD record header in ZIP chunk.")
193-
# if we can't find a valid 32bit ZIP EOCD, we fall back to ZIP64
194-
end_of_central_directory = self._parse_zip64(file, start_offset, offset)
193+
raise InvalidInputFormat("Missing EOCD record header in ZIP chunk.")
195194

196195
has_encrypted_files = self.has_encrypted_files(
197196
file, start_offset, end_of_central_directory

0 commit comments

Comments
 (0)