@@ -289,17 +289,23 @@ def get_chunk(adlfs, src, dst, offset, size, buffersize, blocksize,
289289 exponential_factor = backoff )
290290 try :
291291 nbytes = 0
292- with closing (_fetch_range (adlfs .azure , src , start = offset ,
293- end = offset + size , stream = True , retry_policy = retry_policy )) as response :
294- with open (dst , 'rb+' ) as fout :
295- fout .seek (offset )
296- for chunk in response .iter_content (chunk_size = blocksize ):
292+ start = offset
293+
294+ with open (dst , 'rb+' ) as fout :
295+ fout .seek (start )
296+ while start < offset + size :
297+ with closing (_fetch_range (adlfs .azure , src , start = start ,
298+ end = min (start + blocksize , offset + size ), stream = True , retry_policy = retry_policy )) as response :
299+ chunk = response .content
297300 if shutdown_event and shutdown_event .is_set ():
298301 return total_bytes_downloaded , None
299302 if chunk :
300303 nwritten = fout .write (chunk )
301304 if nwritten :
302305 nbytes += nwritten
306+ start += nwritten
307+ else :
308+ raise IOError ("Failed to write to disk for {0} at location {1} with blocksize {2}" .format (dst , start , blocksize ))
303309 logger .debug ('Downloaded %s bytes to %s, byte offset %s' , nbytes , dst , offset )
304310
305311 # There are certain cases where we will be throttled and recieve less than the expected amount of data.
@@ -456,9 +462,12 @@ def _setup(self):
456462 """
457463 is_path_walk_empty = False
458464 if "*" not in self .lpath :
459- out = os .walk (self .lpath )
460- lfiles = sum (([os .path .join (dir , f ) for f in fnames ] for
461- (dir , _ , fnames ) in out ), [])
465+ lfiles = []
466+ for directory , subdir , fnames in os .walk (self .lpath ):
467+ lfiles .extend ([os .path .join (directory , f ) for f in fnames ])
468+ if not subdir and not fnames : # Empty Directory
469+ self .client ._adlfs ._emptyDirs .append (directory )
470+
462471 if (not lfiles and os .path .exists (self .lpath ) and
463472 not os .path .isdir (self .lpath )):
464473 lfiles = [self .lpath ]
@@ -502,6 +511,11 @@ def run(self, nthreads=None, monitor=True):
502511 monitor: bool [True]
503512 To watch and wait (block) until completion.
504513 """
514+ for empty_directory in self .client ._adlfs ._empty_dirs_to_add ():
515+ local_rel_path = os .path .relpath (empty_directory , self .lpath )
516+ rel_rpath = str (AzureDLPath (self .rpath ).trim ().globless_prefix / local_rel_path )
517+ self .client ._adlfs .mkdir (rel_rpath )
518+
505519 self .client .run (nthreads , monitor )
506520
507521 def active (self ):
0 commit comments