@@ -217,12 +217,15 @@ def __init__(self, filename=None, mode=None,
217217 FutureWarning , 2 )
218218 self .mode = WRITE
219219 self ._init_write (filename )
220+ if mtime is None :
221+ mtime = int (time .time ())
220222 self .compress = zlib .compressobj (compresslevel ,
221223 zlib .DEFLATED ,
222- - zlib .MAX_WBITS ,
224+ 16 + zlib .MAX_WBITS ,
223225 zlib .DEF_MEM_LEVEL ,
224- 0 )
225- self ._write_mtime = mtime
226+ 0 ,
227+ mtime = mtime ,
228+ fname = self ._encode_fname ())
226229 self ._buffer_size = _WRITE_BUFFER_SIZE
227230 self ._buffer = io .BufferedWriter (_WriteBufferStream (self ),
228231 buffer_size = self ._buffer_size )
@@ -231,9 +234,6 @@ def __init__(self, filename=None, mode=None,
231234
232235 self .fileobj = fileobj
233236
234- if self .mode == WRITE :
235- self ._write_gzip_header (compresslevel )
236-
237237 @property
238238 def mtime (self ):
239239 """Last modification time read from stream, or None"""
@@ -245,7 +245,6 @@ def __repr__(self):
245245
246246 def _init_write (self , filename ):
247247 self .name = filename
248- self .crc = zlib .crc32 (b"" )
249248 self .size = 0
250249 self .writebuf = []
251250 self .bufsize = 0
@@ -256,9 +255,7 @@ def tell(self):
256255 self ._buffer .flush ()
257256 return super ().tell ()
258257
259- def _write_gzip_header (self , compresslevel ):
260- self .fileobj .write (b'\037 \213 ' ) # magic header
261- self .fileobj .write (b'\010 ' ) # compression method
258+ def _encode_fname (self ):
262259 try :
263260 # RFC 1952 requires the FNAME field to be Latin-1. Do not
264261 # include filenames that cannot be represented that way.
@@ -269,24 +266,7 @@ def _write_gzip_header(self, compresslevel):
269266 fname = fname [:- 3 ]
270267 except UnicodeEncodeError :
271268 fname = b''
272- flags = 0
273- if fname :
274- flags = FNAME
275- self .fileobj .write (chr (flags ).encode ('latin-1' ))
276- mtime = self ._write_mtime
277- if mtime is None :
278- mtime = time .time ()
279- write32u (self .fileobj , int (mtime ))
280- if compresslevel == _COMPRESS_LEVEL_BEST :
281- xfl = b'\002 '
282- elif compresslevel == _COMPRESS_LEVEL_FAST :
283- xfl = b'\004 '
284- else :
285- xfl = b'\000 '
286- self .fileobj .write (xfl )
287- self .fileobj .write (b'\377 ' )
288- if fname :
289- self .fileobj .write (fname + b'\000 ' )
269+ return fname
290270
291271 def write (self ,data ):
292272 self ._check_not_closed ()
@@ -311,7 +291,6 @@ def _write_raw(self, data):
311291 if length > 0 :
312292 self .fileobj .write (self .compress .compress (data ))
313293 self .size += length
314- self .crc = zlib .crc32 (data , self .crc )
315294 self .offset += length
316295
317296 return length
@@ -355,9 +334,6 @@ def close(self):
355334 if self .mode == WRITE :
356335 self ._buffer .flush ()
357336 fileobj .write (self .compress .flush ())
358- write32u (fileobj , self .crc )
359- # self.size may exceed 2 GiB, or even 4 GiB
360- write32u (fileobj , self .size & 0xffffffff )
361337 elif self .mode == READ :
362338 self ._buffer .close ()
363339 finally :
@@ -424,78 +400,17 @@ def readline(self, size=-1):
424400 return self ._buffer .readline (size )
425401
426402
427- def _read_exact (fp , n ):
428- '''Read exactly *n* bytes from `fp`
429-
430- This method is required because fp may be unbuffered,
431- i.e. return short reads.
432- '''
433- data = fp .read (n )
434- while len (data ) < n :
435- b = fp .read (n - len (data ))
436- if not b :
437- raise EOFError ("Compressed file ended before the "
438- "end-of-stream marker was reached" )
439- data += b
440- return data
441-
442-
443- def _read_gzip_header (fp ):
444- '''Read a gzip header from `fp` and progress to the end of the header.
445-
446- Returns last mtime if header was present or None otherwise.
447- '''
448- magic = fp .read (2 )
449- if magic == b'' :
450- return None
451-
452- if magic != b'\037 \213 ' :
453- raise BadGzipFile ('Not a gzipped file (%r)' % magic )
454-
455- (method , flag , last_mtime ) = struct .unpack ("<BBIxx" , _read_exact (fp , 8 ))
456- if method != 8 :
457- raise BadGzipFile ('Unknown compression method' )
458-
459- if flag & FEXTRA :
460- # Read & discard the extra field, if present
461- extra_len , = struct .unpack ("<H" , _read_exact (fp , 2 ))
462- _read_exact (fp , extra_len )
463- if flag & FNAME :
464- # Read and discard a null-terminated string containing the filename
465- while True :
466- s = fp .read (1 )
467- if not s or s == b'\000 ' :
468- break
469- if flag & FCOMMENT :
470- # Read and discard a null-terminated string containing a comment
471- while True :
472- s = fp .read (1 )
473- if not s or s == b'\000 ' :
474- break
475- if flag & FHCRC :
476- _read_exact (fp , 2 ) # Read & discard the 16-bit header CRC
477- return last_mtime
478-
479-
480403class _GzipReader (_compression .DecompressReader ):
481404 def __init__ (self , fp ):
482405 super ().__init__ (_PaddedFile (fp ), zlib ._ZlibDecompressor ,
483- wbits = - zlib .MAX_WBITS )
406+ wbits = 16 + zlib .MAX_WBITS )
484407 # Set flag indicating start of a new member
485408 self ._new_member = True
486409 self ._last_mtime = None
487410
488411 def _init_read (self ):
489- self ._crc = zlib .crc32 (b"" )
490412 self ._stream_size = 0 # Decompressed size of unconcatenated stream
491413
492- def _read_gzip_header (self ):
493- last_mtime = _read_gzip_header (self ._fp )
494- if last_mtime is None :
495- return False
496- self ._last_mtime = last_mtime
497- return True
498-
499414 def read (self , size = - 1 ):
500415 if size < 0 :
501416 return self .readall ()
@@ -509,33 +424,35 @@ def read(self, size=-1):
509424 while True :
510425 if self ._decompressor .eof :
511426 # Ending case: we've come to the end of a member in the file,
512- # so finish up this member, and read a new gzip header.
513- # Check the CRC and file size, and set the flag so we read
514- # a new member
427+ # so finish up this member and set the flag, so that we read a
428+ # new member
515429 self ._read_eof ()
516430 self ._new_member = True
517431 self ._decompressor = self ._decomp_factory (
518432 ** self ._decomp_args )
519433
520- if self ._new_member :
521- # If the _new_member flag is set, we have to
522- # jump to the next member, if there is one.
523- self ._init_read ()
524- if not self ._read_gzip_header ():
525- self ._size = self ._pos
526- return b""
527- self ._new_member = False
528-
529434 # Read a chunk of data from the file
530435 if self ._decompressor .needs_input :
531436 buf = self ._fp .read (READ_BUFFER_SIZE )
437+ if self ._new_member :
438+ # If the _new_member flag is set, we have to
439+ # jump to the next member, if there is one.
440+ self ._init_read ()
441+ if not buf :
442+ self ._size = self ._pos
443+ return b""
444+ self ._new_member = False
532445 uncompress = self ._decompressor .decompress (buf , size )
533446 else :
447+ assert not self ._new_member
534448 uncompress = self ._decompressor .decompress (b"" , size )
535449
450+ if self ._decompressor .gz_header_done :
451+ self ._last_mtime = self ._decompressor .gz_header_mtime
452+
536453 if self ._decompressor .unused_data != b"" :
537454 # Prepend the already read bytes to the fileobj so they can
538- # be seen by _read_eof() and _read_gzip_header()
455+ # be seen by _read_eof()
539456 self ._fp .prepend (self ._decompressor .unused_data )
540457
541458 if uncompress != b"" :
@@ -544,23 +461,12 @@ def read(self, size=-1):
544461 raise EOFError ("Compressed file ended before the "
545462 "end-of-stream marker was reached" )
546463
547- self ._crc = zlib .crc32 (uncompress , self ._crc )
548464 self ._stream_size += len (uncompress )
549465 self ._pos += len (uncompress )
550466 return uncompress
551467
552468 def _read_eof (self ):
553469 # We've read to the end of the file
554- # We check that the computed CRC and size of the
555- # uncompressed data matches the stored values. Note that the size
556- # stored is the true file size mod 2**32.
557- crc32 , isize = struct .unpack ("<II" , _read_exact (self ._fp , 8 ))
558- if crc32 != self ._crc :
559- raise BadGzipFile ("CRC check failed %s != %s" % (hex (crc32 ),
560- hex (self ._crc )))
561- elif isize != (self ._stream_size & 0xffffffff ):
562- raise BadGzipFile ("Incorrect length of data produced" )
563-
564470 # Gzip files can be padded with zeroes and still have archives.
565471 # Consume all zero bytes and set the file position to the first
566472 # non-zero byte. See http://www.gzip.org/#faq8
@@ -575,68 +481,32 @@ def _rewind(self):
575481 self ._new_member = True
576482
577483
578- def _create_simple_gzip_header (compresslevel : int ,
579- mtime = None ) -> bytes :
580- """
581- Write a simple gzip header with no extra fields.
582- :param compresslevel: Compresslevel used to determine the xfl bytes.
583- :param mtime: The mtime (must support conversion to a 32-bit integer).
584- :return: A bytes object representing the gzip header.
585- """
586- if mtime is None :
587- mtime = time .time ()
588- if compresslevel == _COMPRESS_LEVEL_BEST :
589- xfl = 2
590- elif compresslevel == _COMPRESS_LEVEL_FAST :
591- xfl = 4
592- else :
593- xfl = 0
594- # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
595- # fields added to header), mtime, xfl and os (255 for unknown OS).
596- return struct .pack ("<BBBBLBB" , 0x1f , 0x8b , 8 , 0 , int (mtime ), xfl , 255 )
597-
598-
599484def compress (data , compresslevel = _COMPRESS_LEVEL_BEST , * , mtime = None ):
600485 """Compress data in one shot and return the compressed string.
601486
602487 compresslevel sets the compression level in range of 0-9.
603488 mtime can be used to set the modification time. The modification time is
604489 set to the current time by default.
605490 """
606- if mtime == 0 :
607- # Use zlib as it creates the header with 0 mtime by default.
608- # This is faster and with less overhead.
609- return zlib .compress (data , level = compresslevel , wbits = 31 )
610- header = _create_simple_gzip_header (compresslevel , mtime )
611- trailer = struct .pack ("<LL" , zlib .crc32 (data ), (len (data ) & 0xffffffff ))
612- # Wbits=-15 creates a raw deflate block.
613- return (header + zlib .compress (data , level = compresslevel , wbits = - 15 ) +
614- trailer )
491+ if mtime is None :
492+ mtime = int (time .time ())
493+ return zlib .compress (data , level = compresslevel , wbits = 31 , mtime = mtime )
615494
616495
617496def decompress (data ):
618497 """Decompress a gzip compressed string in one shot.
619498 Return the decompressed string.
620499 """
621500 decompressed_members = []
622- while True :
623- fp = io .BytesIO (data )
624- if _read_gzip_header (fp ) is None :
625- return b"" .join (decompressed_members )
626- # Use a zlib raw deflate compressor
627- do = zlib .decompressobj (wbits = - zlib .MAX_WBITS )
628- # Read all the data except the header
629- decompressed = do .decompress (data [fp .tell ():])
630- if not do .eof or len (do .unused_data ) < 8 :
501+ while data :
502+ do = zlib .decompressobj (wbits = 16 + zlib .MAX_WBITS )
503+ decompressed = do .decompress (data )
504+ if not do .eof :
631505 raise EOFError ("Compressed file ended before the end-of-stream "
632506 "marker was reached" )
633- crc , length = struct .unpack ("<II" , do .unused_data [:8 ])
634- if crc != zlib .crc32 (decompressed ):
635- raise BadGzipFile ("CRC check failed" )
636- if length != (len (decompressed ) & 0xffffffff ):
637- raise BadGzipFile ("Incorrect length of data produced" )
638507 decompressed_members .append (decompressed )
639- data = do .unused_data [8 :].lstrip (b"\x00 " )
508+ data = do .unused_data .lstrip (b"\x00 " )
509+ return b"" .join (decompressed_members )
640510
641511
642512def main ():
0 commit comments