@@ -65,9 +65,6 @@ def write32u(output, value):
65
65
# or unsigned.
66
66
output .write (struct .pack ("<L" , value ))
67
67
68
- def read32 (input ):
69
- return struct .unpack ("<I" , input .read (4 ))[0 ]
70
-
71
68
class _PaddedFile :
72
69
"""Minimal read-only file object that prepends a string to the contents
73
70
of an actual file. Shouldn't be used outside of gzip.py, as it lacks
@@ -281,27 +278,31 @@ def _init_read(self):
281
278
self .crc = zlib .crc32 (b"" ) & 0xffffffff
282
279
self .size = 0
283
280
281
+ def _read_exact (self , n ):
282
+ data = self .fileobj .read (n )
283
+ while len (data ) < n :
284
+ b = self .fileobj .read (n - len (data ))
285
+ if not b :
286
+ raise EOFError ("Compressed file ended before the "
287
+ "end-of-stream marker was reached" )
288
+ data += b
289
+ return data
290
+
284
291
def _read_gzip_header (self ):
285
292
magic = self .fileobj .read (2 )
286
293
if magic == b'' :
287
- raise EOFError ( "Reached EOF" )
294
+ return False
288
295
289
296
if magic != b'\037 \213 ' :
290
297
raise OSError ('Not a gzipped file' )
291
- method = ord ( self .fileobj .read (1 ) )
298
+
299
+ method , flag , self .mtime = struct .unpack ("<BBIxx" , self ._read_exact (8 ))
292
300
if method != 8 :
293
301
raise OSError ('Unknown compression method' )
294
- flag = ord ( self .fileobj .read (1 ) )
295
- self .mtime = read32 (self .fileobj )
296
- # extraflag = self.fileobj.read(1)
297
- # os = self.fileobj.read(1)
298
- self .fileobj .read (2 )
299
302
300
303
if flag & FEXTRA :
301
304
# Read & discard the extra field, if present
302
- xlen = ord (self .fileobj .read (1 ))
303
- xlen = xlen + 256 * ord (self .fileobj .read (1 ))
304
- self .fileobj .read (xlen )
305
+ self ._read_exact (struct .unpack ("<H" , self ._read_exact (2 )))
305
306
if flag & FNAME :
306
307
# Read and discard a null-terminated string containing the filename
307
308
while True :
@@ -315,12 +316,13 @@ def _read_gzip_header(self):
315
316
if not s or s == b'\000 ' :
316
317
break
317
318
if flag & FHCRC :
318
- self .fileobj . read (2 ) # Read & discard the 16-bit header CRC
319
+ self ._read_exact (2 ) # Read & discard the 16-bit header CRC
319
320
320
321
unused = self .fileobj .unused ()
321
322
if unused :
322
323
uncompress = self .decompress .decompress (unused )
323
324
self ._add_read_data (uncompress )
325
+ return True
324
326
325
327
def write (self ,data ):
326
328
self ._check_closed ()
@@ -354,20 +356,16 @@ def read(self, size=-1):
354
356
355
357
readsize = 1024
356
358
if size < 0 : # get the whole thing
357
- try :
358
- while True :
359
- self ._read (readsize )
360
- readsize = min (self .max_read_chunk , readsize * 2 )
361
- except EOFError :
362
- size = self .extrasize
359
+ while self ._read (readsize ):
360
+ readsize = min (self .max_read_chunk , readsize * 2 )
361
+ size = self .extrasize
363
362
else : # just get some more of it
364
- try :
365
- while size > self .extrasize :
366
- self ._read (readsize )
367
- readsize = min (self .max_read_chunk , readsize * 2 )
368
- except EOFError :
369
- if size > self .extrasize :
370
- size = self .extrasize
363
+ while size > self .extrasize :
364
+ if not self ._read (readsize ):
365
+ if size > self .extrasize :
366
+ size = self .extrasize
367
+ break
368
+ readsize = min (self .max_read_chunk , readsize * 2 )
371
369
372
370
offset = self .offset - self .extrastart
373
371
chunk = self .extrabuf [offset : offset + size ]
@@ -385,12 +383,9 @@ def read1(self, size=-1):
385
383
if self .extrasize <= 0 and self .fileobj is None :
386
384
return b''
387
385
388
- try :
389
- # For certain input data, a single call to _read() may not return
390
- # any data. In this case, retry until we get some data or reach EOF.
391
- while self .extrasize <= 0 :
392
- self ._read ()
393
- except EOFError :
386
+ # For certain input data, a single call to _read() may not return
387
+ # any data. In this case, retry until we get some data or reach EOF.
388
+ while self .extrasize <= 0 and self ._read ():
394
389
pass
395
390
if size < 0 or size > self .extrasize :
396
391
size = self .extrasize
@@ -413,12 +408,9 @@ def peek(self, n):
413
408
if self .extrasize == 0 :
414
409
if self .fileobj is None :
415
410
return b''
416
- try :
417
- # Ensure that we don't return b"" if we haven't reached EOF.
418
- while self .extrasize == 0 :
419
- # 1024 is the same buffering heuristic used in read()
420
- self ._read (max (n , 1024 ))
421
- except EOFError :
411
+ # Ensure that we don't return b"" if we haven't reached EOF.
412
+ # 1024 is the same buffering heuristic used in read()
413
+ while self .extrasize == 0 and self ._read (max (n , 1024 )):
422
414
pass
423
415
offset = self .offset - self .extrastart
424
416
remaining = self .extrasize
@@ -431,13 +423,14 @@ def _unread(self, buf):
431
423
432
424
def _read (self , size = 1024 ):
433
425
if self .fileobj is None :
434
- raise EOFError ( "Reached EOF" )
426
+ return False
435
427
436
428
if self ._new_member :
437
429
# If the _new_member flag is set, we have to
438
430
# jump to the next member, if there is one.
439
431
self ._init_read ()
440
- self ._read_gzip_header ()
432
+ if not self ._read_gzip_header ():
433
+ return False
441
434
self .decompress = zlib .decompressobj (- zlib .MAX_WBITS )
442
435
self ._new_member = False
443
436
@@ -454,7 +447,7 @@ def _read(self, size=1024):
454
447
self .fileobj .prepend (self .decompress .unused_data , True )
455
448
self ._read_eof ()
456
449
self ._add_read_data ( uncompress )
457
- raise EOFError ( 'Reached EOF' )
450
+ return False
458
451
459
452
uncompress = self .decompress .decompress (buf )
460
453
self ._add_read_data ( uncompress )
@@ -470,6 +463,7 @@ def _read(self, size=1024):
470
463
# a new member on the next call
471
464
self ._read_eof ()
472
465
self ._new_member = True
466
+ return True
473
467
474
468
def _add_read_data (self , data ):
475
469
self .crc = zlib .crc32 (data , self .crc ) & 0xffffffff
@@ -484,8 +478,7 @@ def _read_eof(self):
484
478
# We check the that the computed CRC and size of the
485
479
# uncompressed data matches the stored values. Note that the size
486
480
# stored is the true file size mod 2**32.
487
- crc32 = read32 (self .fileobj )
488
- isize = read32 (self .fileobj ) # may exceed 2GB
481
+ crc32 , isize = struct .unpack ("<II" , self ._read_exact (8 ))
489
482
if crc32 != self .crc :
490
483
raise OSError ("CRC check failed %s != %s" % (hex (crc32 ),
491
484
hex (self .crc )))
0 commit comments