@@ -274,6 +274,32 @@ def download_url(
274274 )
275275
276276
277+ def _extract_zip (filepath , output_dir ):
278+ with zipfile .ZipFile (filepath , "r" ) as zip_file :
279+ for member in zip_file .infolist ():
280+ safe_path = safe_extract_member (member , output_dir )
281+ if member .is_dir ():
282+ continue
283+ os .makedirs (os .path .dirname (safe_path ), exist_ok = True )
284+ with zip_file .open (member ) as source :
285+ with open (safe_path , "wb" ) as target :
286+ shutil .copyfileobj (source , target )
287+
288+
289+ def _extract_tar (filepath , output_dir ):
290+ with tarfile .open (filepath , "r" ) as tar_file :
291+ for member in tar_file .getmembers ():
292+ safe_path = safe_extract_member (member , output_dir )
293+ if not member .isfile ():
294+ continue
295+ os .makedirs (os .path .dirname (safe_path ), exist_ok = True )
296+ source = tar_file .extractfile (member )
297+ if source is not None :
298+ with source :
299+ with open (safe_path , "wb" ) as target :
300+ shutil .copyfileobj (source , target )
301+
302+
277303def extractall (
278304 filepath : PathLike ,
279305 output_dir : PathLike = "." ,
@@ -319,30 +345,10 @@ def extractall(
319345 logger .info (f"Writing into directory: { output_dir } ." )
320346 _file_type = file_type .lower ().strip ()
321347 if filepath .name .endswith ("zip" ) or _file_type == "zip" :
322- with zipfile .ZipFile (filepath , "r" ) as zip_file :
323- for member in zip_file .infolist ():
324- safe_path = safe_extract_member (member , output_dir )
325- if member .is_dir ():
326- continue
327-
328- os .makedirs (os .path .dirname (safe_path ), exist_ok = True )
329- with zip_file .open (member ) as source :
330- with open (safe_path , "wb" ) as target :
331- shutil .copyfileobj (source , target )
348+ _extract_zip (filepath , output_dir )
332349 return
333350 if filepath .name .endswith ("tar" ) or filepath .name .endswith ("tar.gz" ) or "tar" in _file_type :
334- with tarfile .open (filepath , "r" ) as tar_file :
335- for member in tar_file .getmembers ():
336- safe_path = safe_extract_member (member , output_dir )
337- if not member .isfile ():
338- continue
339-
340- os .makedirs (os .path .dirname (safe_path ), exist_ok = True )
341- source = tar_file .extractfile (member )
342- if source is not None :
343- with source :
344- with open (safe_path , "wb" ) as target :
345- shutil .copyfileobj (source , target )
351+ _extract_tar (filepath , output_dir )
346352 return
347353 raise NotImplementedError (
348354 f'Unsupported file type, available options are: ["zip", "tar.gz", "tar"]. name={ filepath } type={ file_type } .'
0 commit comments