1919import logging
2020import sys
2121import time
22+ import uuid
23+
2224
2325# local imports
2426from .exceptions import DatalakeBadOffsetException
@@ -667,6 +669,9 @@ def __init__(self, azure, path, mode='rb', blocksize=2**25,
667669 self .buffer = io .BytesIO ()
668670 self .blocksize = blocksize
669671 self .first_write = True
672+ uniqueid = str (uuid .uuid4 ())
673+ self .filesessionid = uniqueid
674+ self .leaseid = uniqueid
670675
671676 # always invalidate the cache when checking for existence of a file
672677 # that may be created or written to (for the first time).
@@ -758,11 +763,11 @@ def _fetch(self, start, end):
758763 self .start = start
759764 self .end = min (end + self .blocksize , self .size )
760765 response = _fetch_range_with_retry (
761- self .azure .azure , self .path .as_posix (), start , self .end )
766+ self .azure .azure , self .path .as_posix (), start , self .end , filesessionid = self . filesessionid )
762767 self .cache = getattr (response , 'content' , response )
763768 if start < self .start :
764769 response = _fetch_range_with_retry (
765- self .azure .azure , self .path .as_posix (), start , self .start )
770+ self .azure .azure , self .path .as_posix (), start , self .start , filesessionid = self . filesessionid )
766771 new = getattr (response , 'content' , response )
767772 self .start = start
768773 self .cache = new + self .cache
@@ -771,7 +776,7 @@ def _fetch(self, start, end):
771776 return
772777 newend = min (self .size , end + self .blocksize )
773778 response = _fetch_range_with_retry (
774- self .azure .azure , self .path .as_posix (), self .end , newend )
779+ self .azure .azure , self .path .as_posix (), self .end , newend , filesessionid = self . filesessionid )
775780 new = getattr (response , 'content' , response )
776781 self .end = newend
777782 self .cache = self .cache + new
@@ -820,13 +825,14 @@ def write(self, data):
820825 raise ValueError ('File not in write mode' )
821826 if self .closed :
822827 raise ValueError ('I/O operation on closed file.' )
828+
823829 out = self .buffer .write (ensure_writable (data ))
824830 self .loc += out
825- if self .buffer .tell () >= self .blocksize :
826- self .flush ()
831+ self .flush (syncFlag = 'DATA' )
827832 return out
833+
828834
829- def flush (self , force = False ):
835+ def flush (self , syncFlag = 'METADATA' , force = False ):
830836 """
831837 Write buffered data to ADL.
832838
@@ -841,7 +847,11 @@ def flush(self, force=False):
841847 """
842848 if not self .writable () or self .closed :
843849 return
844-
850+
851+ if not (syncFlag == 'METADATA' or syncFlag == 'DATA' or syncFlag == 'CLOSE' ):
852+ raise ValueError ('syncFlag must be one of these: METADAT, DATA or CLOSE' )
853+
854+
845855 if self .buffer .tell () == 0 :
846856 if force and self .first_write :
847857 _put_data_with_retry (
@@ -850,77 +860,94 @@ def flush(self, force=False):
850860 path = self .path .as_posix (),
851861 data = None ,
852862 overwrite = 'true' ,
853- write = 'true' )
863+ write = 'true' ,
864+ syncFlag = syncFlag ,
865+ leaseid = self .leaseid ,
866+ filesessionid = self .filesessionid )
854867 self .first_write = False
855868 return
856869
857870 self .buffer .seek (0 )
858871 data = self .buffer .read ()
859-
860- if self .delimiter :
861- while len (data ) >= self .blocksize :
872+
873+ syncFlagLocal = 'DATA'
874+ while len (data ) > self .blocksize :
875+ if self .delimiter :
862876 place = data [:self .blocksize ].rfind (self .delimiter )
863- if place < 0 :
864- # not found - write whole block
865- limit = self .blocksize
866- else :
867- limit = place + len (self .delimiter )
868- if self .first_write :
869- _put_data_with_retry (
870- self .azure .azure ,
871- 'CREATE' ,
872- path = self .path .as_posix (),
873- data = data [:limit ],
874- overwrite = 'true' ,
875- write = 'true' )
876- self .first_write = False
877- else :
878- _put_data_with_retry (
879- self .azure .azure ,
880- 'APPEND' ,
881- path = self .path .as_posix (),
882- data = data [:limit ],
883- append = 'true' )
884- logger .debug ('Wrote %d bytes to %s' % (limit , self ))
885- data = data [limit :]
886- self .buffer = io .BytesIO (data )
887- self .buffer .seek (0 , 2 )
888-
889- if not self .delimiter or force :
877+ else :
878+ place = - 1
879+ if place < 0 :
880+ # not found - write whole block
881+ limit = self .blocksize
882+ else :
883+ limit = place + len (self .delimiter )
884+ if self .first_write :
885+ _put_data_with_retry (
886+ self .azure .azure ,
887+ 'CREATE' ,
888+ path = self .path .as_posix (),
889+ data = data [:limit ],
890+ overwrite = 'true' ,
891+ write = 'true' ,
892+ syncFlag = syncFlagLocal ,
893+ leaseid = self .leaseid ,
894+ filesessionid = self .filesessionid )
895+ self .first_write = False
896+ else :
897+ _put_data_with_retry (
898+ self .azure .azure ,
899+ 'APPEND' ,
900+ path = self .path .as_posix (),
901+ data = data [:limit ],
902+ append = 'true' ,
903+ syncFlag = syncFlagLocal ,
904+ leaseid = self .leaseid ,
905+ filesessionid = self .filesessionid )
906+ logger .debug ('Wrote %d bytes to %s' % (limit , self ))
907+ data = data [limit :]
908+
909+
910+ self .buffer = io .BytesIO (data )
911+ self .buffer .seek (0 , 2 )
912+
913+ if force :
890914 zero_offset = self .tell () - len (data )
891- offsets = range (0 , len (data ), self .blocksize )
892- for o in offsets :
893- offset = zero_offset + o
894- d2 = data [o :o + self .blocksize ]
895- if self .first_write :
896- _put_data_with_retry (
897- self .azure .azure ,
898- 'CREATE' ,
899- path = self .path .as_posix (),
900- data = d2 ,
901- overwrite = 'true' ,
902- write = 'true' )
903- self .first_write = False
904- else :
905- _put_data_with_retry (
906- self .azure .azure ,
907- 'APPEND' ,
908- path = self .path .as_posix (),
909- data = d2 ,
910- offset = offset ,
911- append = 'true' )
912- logger .debug ('Wrote %d bytes to %s' % (len (d2 ), self ))
915+ if self .first_write :
916+ _put_data_with_retry (
917+ self .azure .azure ,
918+ 'CREATE' ,
919+ path = self .path .as_posix (),
920+ data = data ,
921+ overwrite = 'true' ,
922+ write = 'true' ,
923+ syncFlag = syncFlag ,
924+ leaseid = self .leaseid ,
925+ filesessionid = self .filesessionid )
926+ self .first_write = False
927+ else :
928+ _put_data_with_retry (
929+ self .azure .azure ,
930+ 'APPEND' ,
931+ path = self .path .as_posix (),
932+ data = data ,
933+ offset = zero_offset ,
934+ append = 'true' ,
935+ syncFlag = syncFlag ,
936+ leaseid = self .leaseid ,
937+ filesessionid = self .filesessionid )
938+ logger .debug ('Wrote %d bytes to %s' % (len (data ), self ))
913939 self .buffer = io .BytesIO ()
914940
915941 def close (self ):
916942 """ Close file
917943
918944 If in write mode, causes flush of any unwritten data.
919945 """
946+ logger .info ("closing stream" )
920947 if self .closed :
921948 return
922949 if self .writable ():
923- self .flush (force = True )
950+ self .flush (syncFlag = 'CLOSE' , force = True )
924951 self .azure .invalidate_cache (self .path .as_posix ())
925952 self .closed = True
926953
@@ -948,20 +975,20 @@ def __exit__(self, *args):
948975 self .close ()
949976
950977
951- def _fetch_range (rest , path , start , end , stream = False ):
978+ def _fetch_range (rest , path , start , end , stream = False , ** kwargs ):
952979 logger .debug ('Fetch: %s, %s-%s' , path , start , end )
953980 # if the caller gives a bad start/end combination, OPEN will throw and
954981 # this call will bubble it up
955982 return rest .call (
956- 'OPEN' , path , offset = start , length = end - start , read = 'true' , stream = stream )
983+ 'OPEN' , path , offset = start , length = end - start , read = 'true' , stream = stream , ** kwargs )
957984
958985
959986def _fetch_range_with_retry (rest , path , start , end , stream = False , retries = 10 ,
960- delay = 0.01 , backoff = 3 ):
987+ delay = 0.01 , backoff = 3 , ** kwargs ):
961988 err = None
962989 for i in range (retries ):
963990 try :
964- return _fetch_range (rest , path , start , end , stream = False )
991+ return _fetch_range (rest , path , start , end , stream = False , ** kwargs )
965992 except Exception as e :
966993 err = e
967994 logger .debug ('Exception %s on ADL download on attempt: %s, retrying in %s seconds' ,
0 commit comments