17db96d56Sopenharmony_ci"""Functions that read and write gzipped files. 27db96d56Sopenharmony_ci 37db96d56Sopenharmony_ciThe user of the file doesn't have to worry about the compression, 47db96d56Sopenharmony_cibut random access is not allowed.""" 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_ci# based on Andrew Kuchling's minigzip.py distributed with the zlib module 77db96d56Sopenharmony_ci 87db96d56Sopenharmony_ciimport struct, sys, time, os 97db96d56Sopenharmony_ciimport zlib 107db96d56Sopenharmony_ciimport builtins 117db96d56Sopenharmony_ciimport io 127db96d56Sopenharmony_ciimport _compression 137db96d56Sopenharmony_ci 147db96d56Sopenharmony_ci__all__ = ["BadGzipFile", "GzipFile", "open", "compress", "decompress"] 157db96d56Sopenharmony_ci 167db96d56Sopenharmony_ciFTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 177db96d56Sopenharmony_ci 187db96d56Sopenharmony_ciREAD, WRITE = 1, 2 197db96d56Sopenharmony_ci 207db96d56Sopenharmony_ci_COMPRESS_LEVEL_FAST = 1 217db96d56Sopenharmony_ci_COMPRESS_LEVEL_TRADEOFF = 6 227db96d56Sopenharmony_ci_COMPRESS_LEVEL_BEST = 9 237db96d56Sopenharmony_ci 247db96d56Sopenharmony_ci 257db96d56Sopenharmony_cidef open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST, 267db96d56Sopenharmony_ci encoding=None, errors=None, newline=None): 277db96d56Sopenharmony_ci """Open a gzip-compressed file in binary or text mode. 287db96d56Sopenharmony_ci 297db96d56Sopenharmony_ci The filename argument can be an actual filename (a str or bytes object), or 307db96d56Sopenharmony_ci an existing file object to read from or write to. 317db96d56Sopenharmony_ci 327db96d56Sopenharmony_ci The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for 337db96d56Sopenharmony_ci binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is 347db96d56Sopenharmony_ci "rb", and the default compresslevel is 9. 357db96d56Sopenharmony_ci 367db96d56Sopenharmony_ci For binary mode, this function is equivalent to the GzipFile constructor: 377db96d56Sopenharmony_ci GzipFile(filename, mode, compresslevel). In this case, the encoding, errors 387db96d56Sopenharmony_ci and newline arguments must not be provided. 397db96d56Sopenharmony_ci 407db96d56Sopenharmony_ci For text mode, a GzipFile object is created, and wrapped in an 417db96d56Sopenharmony_ci io.TextIOWrapper instance with the specified encoding, error handling 427db96d56Sopenharmony_ci behavior, and line ending(s). 437db96d56Sopenharmony_ci 447db96d56Sopenharmony_ci """ 457db96d56Sopenharmony_ci if "t" in mode: 467db96d56Sopenharmony_ci if "b" in mode: 477db96d56Sopenharmony_ci raise ValueError("Invalid mode: %r" % (mode,)) 487db96d56Sopenharmony_ci else: 497db96d56Sopenharmony_ci if encoding is not None: 507db96d56Sopenharmony_ci raise ValueError("Argument 'encoding' not supported in binary mode") 517db96d56Sopenharmony_ci if errors is not None: 527db96d56Sopenharmony_ci raise ValueError("Argument 'errors' not supported in binary mode") 537db96d56Sopenharmony_ci if newline is not None: 547db96d56Sopenharmony_ci raise ValueError("Argument 'newline' not supported in binary mode") 557db96d56Sopenharmony_ci 567db96d56Sopenharmony_ci gz_mode = mode.replace("t", "") 577db96d56Sopenharmony_ci if isinstance(filename, (str, bytes, os.PathLike)): 587db96d56Sopenharmony_ci binary_file = GzipFile(filename, gz_mode, compresslevel) 597db96d56Sopenharmony_ci elif hasattr(filename, "read") or hasattr(filename, "write"): 607db96d56Sopenharmony_ci binary_file = GzipFile(None, gz_mode, compresslevel, filename) 617db96d56Sopenharmony_ci else: 627db96d56Sopenharmony_ci raise TypeError("filename must be a str or bytes object, or a file") 637db96d56Sopenharmony_ci 647db96d56Sopenharmony_ci if "t" in mode: 657db96d56Sopenharmony_ci encoding = io.text_encoding(encoding) 667db96d56Sopenharmony_ci return io.TextIOWrapper(binary_file, encoding, errors, newline) 677db96d56Sopenharmony_ci else: 687db96d56Sopenharmony_ci return binary_file 697db96d56Sopenharmony_ci 707db96d56Sopenharmony_cidef write32u(output, value): 717db96d56Sopenharmony_ci # The L format writes the bit pattern correctly whether signed 727db96d56Sopenharmony_ci # or unsigned. 737db96d56Sopenharmony_ci output.write(struct.pack("<L", value)) 747db96d56Sopenharmony_ci 757db96d56Sopenharmony_ciclass _PaddedFile: 767db96d56Sopenharmony_ci """Minimal read-only file object that prepends a string to the contents 777db96d56Sopenharmony_ci of an actual file. Shouldn't be used outside of gzip.py, as it lacks 787db96d56Sopenharmony_ci essential functionality.""" 797db96d56Sopenharmony_ci 807db96d56Sopenharmony_ci def __init__(self, f, prepend=b''): 817db96d56Sopenharmony_ci self._buffer = prepend 827db96d56Sopenharmony_ci self._length = len(prepend) 837db96d56Sopenharmony_ci self.file = f 847db96d56Sopenharmony_ci self._read = 0 857db96d56Sopenharmony_ci 867db96d56Sopenharmony_ci def read(self, size): 877db96d56Sopenharmony_ci if self._read is None: 887db96d56Sopenharmony_ci return self.file.read(size) 897db96d56Sopenharmony_ci if self._read + size <= self._length: 907db96d56Sopenharmony_ci read = self._read 917db96d56Sopenharmony_ci self._read += size 927db96d56Sopenharmony_ci return self._buffer[read:self._read] 937db96d56Sopenharmony_ci else: 947db96d56Sopenharmony_ci read = self._read 957db96d56Sopenharmony_ci self._read = None 967db96d56Sopenharmony_ci return self._buffer[read:] + \ 977db96d56Sopenharmony_ci self.file.read(size-self._length+read) 987db96d56Sopenharmony_ci 997db96d56Sopenharmony_ci def prepend(self, prepend=b''): 1007db96d56Sopenharmony_ci if self._read is None: 1017db96d56Sopenharmony_ci self._buffer = prepend 1027db96d56Sopenharmony_ci else: # Assume data was read since the last prepend() call 1037db96d56Sopenharmony_ci self._read -= len(prepend) 1047db96d56Sopenharmony_ci return 1057db96d56Sopenharmony_ci self._length = len(self._buffer) 1067db96d56Sopenharmony_ci self._read = 0 1077db96d56Sopenharmony_ci 1087db96d56Sopenharmony_ci def seek(self, off): 1097db96d56Sopenharmony_ci self._read = None 1107db96d56Sopenharmony_ci self._buffer = None 1117db96d56Sopenharmony_ci return self.file.seek(off) 1127db96d56Sopenharmony_ci 1137db96d56Sopenharmony_ci def seekable(self): 1147db96d56Sopenharmony_ci return True # Allows fast-forwarding even in unseekable streams 1157db96d56Sopenharmony_ci 1167db96d56Sopenharmony_ci 1177db96d56Sopenharmony_ciclass BadGzipFile(OSError): 1187db96d56Sopenharmony_ci """Exception raised in some cases for invalid gzip files.""" 1197db96d56Sopenharmony_ci 1207db96d56Sopenharmony_ci 1217db96d56Sopenharmony_ciclass GzipFile(_compression.BaseStream): 1227db96d56Sopenharmony_ci """The GzipFile class simulates most of the methods of a file object with 1237db96d56Sopenharmony_ci the exception of the truncate() method. 1247db96d56Sopenharmony_ci 1257db96d56Sopenharmony_ci This class only supports opening files in binary mode. If you need to open a 1267db96d56Sopenharmony_ci compressed file in text mode, use the gzip.open() function. 1277db96d56Sopenharmony_ci 1287db96d56Sopenharmony_ci """ 1297db96d56Sopenharmony_ci 1307db96d56Sopenharmony_ci # Overridden with internal file object to be closed, if only a filename 1317db96d56Sopenharmony_ci # is passed in 1327db96d56Sopenharmony_ci myfileobj = None 1337db96d56Sopenharmony_ci 1347db96d56Sopenharmony_ci def __init__(self, filename=None, mode=None, 1357db96d56Sopenharmony_ci compresslevel=_COMPRESS_LEVEL_BEST, fileobj=None, mtime=None): 1367db96d56Sopenharmony_ci """Constructor for the GzipFile class. 1377db96d56Sopenharmony_ci 1387db96d56Sopenharmony_ci At least one of fileobj and filename must be given a 1397db96d56Sopenharmony_ci non-trivial value. 1407db96d56Sopenharmony_ci 1417db96d56Sopenharmony_ci The new class instance is based on fileobj, which can be a regular 1427db96d56Sopenharmony_ci file, an io.BytesIO object, or any other object which simulates a file. 1437db96d56Sopenharmony_ci It defaults to None, in which case filename is opened to provide 1447db96d56Sopenharmony_ci a file object. 1457db96d56Sopenharmony_ci 1467db96d56Sopenharmony_ci When fileobj is not None, the filename argument is only used to be 1477db96d56Sopenharmony_ci included in the gzip file header, which may include the original 1487db96d56Sopenharmony_ci filename of the uncompressed file. It defaults to the filename of 1497db96d56Sopenharmony_ci fileobj, if discernible; otherwise, it defaults to the empty string, 1507db96d56Sopenharmony_ci and in this case the original filename is not included in the header. 1517db96d56Sopenharmony_ci 1527db96d56Sopenharmony_ci The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x', or 1537db96d56Sopenharmony_ci 'xb' depending on whether the file will be read or written. The default 1547db96d56Sopenharmony_ci is the mode of fileobj if discernible; otherwise, the default is 'rb'. 1557db96d56Sopenharmony_ci A mode of 'r' is equivalent to one of 'rb', and similarly for 'w' and 1567db96d56Sopenharmony_ci 'wb', 'a' and 'ab', and 'x' and 'xb'. 1577db96d56Sopenharmony_ci 1587db96d56Sopenharmony_ci The compresslevel argument is an integer from 0 to 9 controlling the 1597db96d56Sopenharmony_ci level of compression; 1 is fastest and produces the least compression, 1607db96d56Sopenharmony_ci and 9 is slowest and produces the most compression. 0 is no compression 1617db96d56Sopenharmony_ci at all. The default is 9. 1627db96d56Sopenharmony_ci 1637db96d56Sopenharmony_ci The mtime argument is an optional numeric timestamp to be written 1647db96d56Sopenharmony_ci to the last modification time field in the stream when compressing. 1657db96d56Sopenharmony_ci If omitted or None, the current time is used. 1667db96d56Sopenharmony_ci 1677db96d56Sopenharmony_ci """ 1687db96d56Sopenharmony_ci 1697db96d56Sopenharmony_ci if mode and ('t' in mode or 'U' in mode): 1707db96d56Sopenharmony_ci raise ValueError("Invalid mode: {!r}".format(mode)) 1717db96d56Sopenharmony_ci if mode and 'b' not in mode: 1727db96d56Sopenharmony_ci mode += 'b' 1737db96d56Sopenharmony_ci if fileobj is None: 1747db96d56Sopenharmony_ci fileobj = self.myfileobj = builtins.open(filename, mode or 'rb') 1757db96d56Sopenharmony_ci if filename is None: 1767db96d56Sopenharmony_ci filename = getattr(fileobj, 'name', '') 1777db96d56Sopenharmony_ci if not isinstance(filename, (str, bytes)): 1787db96d56Sopenharmony_ci filename = '' 1797db96d56Sopenharmony_ci else: 1807db96d56Sopenharmony_ci filename = os.fspath(filename) 1817db96d56Sopenharmony_ci origmode = mode 1827db96d56Sopenharmony_ci if mode is None: 1837db96d56Sopenharmony_ci mode = getattr(fileobj, 'mode', 'rb') 1847db96d56Sopenharmony_ci 1857db96d56Sopenharmony_ci if mode.startswith('r'): 1867db96d56Sopenharmony_ci self.mode = READ 1877db96d56Sopenharmony_ci raw = _GzipReader(fileobj) 1887db96d56Sopenharmony_ci self._buffer = io.BufferedReader(raw) 1897db96d56Sopenharmony_ci self.name = filename 1907db96d56Sopenharmony_ci 1917db96d56Sopenharmony_ci elif mode.startswith(('w', 'a', 'x')): 1927db96d56Sopenharmony_ci if origmode is None: 1937db96d56Sopenharmony_ci import warnings 1947db96d56Sopenharmony_ci warnings.warn( 1957db96d56Sopenharmony_ci "GzipFile was opened for writing, but this will " 1967db96d56Sopenharmony_ci "change in future Python releases. " 1977db96d56Sopenharmony_ci "Specify the mode argument for opening it for writing.", 1987db96d56Sopenharmony_ci FutureWarning, 2) 1997db96d56Sopenharmony_ci self.mode = WRITE 2007db96d56Sopenharmony_ci self._init_write(filename) 2017db96d56Sopenharmony_ci self.compress = zlib.compressobj(compresslevel, 2027db96d56Sopenharmony_ci zlib.DEFLATED, 2037db96d56Sopenharmony_ci -zlib.MAX_WBITS, 2047db96d56Sopenharmony_ci zlib.DEF_MEM_LEVEL, 2057db96d56Sopenharmony_ci 0) 2067db96d56Sopenharmony_ci self._write_mtime = mtime 2077db96d56Sopenharmony_ci else: 2087db96d56Sopenharmony_ci raise ValueError("Invalid mode: {!r}".format(mode)) 2097db96d56Sopenharmony_ci 2107db96d56Sopenharmony_ci self.fileobj = fileobj 2117db96d56Sopenharmony_ci 2127db96d56Sopenharmony_ci if self.mode == WRITE: 2137db96d56Sopenharmony_ci self._write_gzip_header(compresslevel) 2147db96d56Sopenharmony_ci 2157db96d56Sopenharmony_ci @property 2167db96d56Sopenharmony_ci def filename(self): 2177db96d56Sopenharmony_ci import warnings 2187db96d56Sopenharmony_ci warnings.warn("use the name attribute", DeprecationWarning, 2) 2197db96d56Sopenharmony_ci if self.mode == WRITE and self.name[-3:] != ".gz": 2207db96d56Sopenharmony_ci return self.name + ".gz" 2217db96d56Sopenharmony_ci return self.name 2227db96d56Sopenharmony_ci 2237db96d56Sopenharmony_ci @property 2247db96d56Sopenharmony_ci def mtime(self): 2257db96d56Sopenharmony_ci """Last modification time read from stream, or None""" 2267db96d56Sopenharmony_ci return self._buffer.raw._last_mtime 2277db96d56Sopenharmony_ci 2287db96d56Sopenharmony_ci def __repr__(self): 2297db96d56Sopenharmony_ci s = repr(self.fileobj) 2307db96d56Sopenharmony_ci return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>' 2317db96d56Sopenharmony_ci 2327db96d56Sopenharmony_ci def _init_write(self, filename): 2337db96d56Sopenharmony_ci self.name = filename 2347db96d56Sopenharmony_ci self.crc = zlib.crc32(b"") 2357db96d56Sopenharmony_ci self.size = 0 2367db96d56Sopenharmony_ci self.writebuf = [] 2377db96d56Sopenharmony_ci self.bufsize = 0 2387db96d56Sopenharmony_ci self.offset = 0 # Current file offset for seek(), tell(), etc 2397db96d56Sopenharmony_ci 2407db96d56Sopenharmony_ci def _write_gzip_header(self, compresslevel): 2417db96d56Sopenharmony_ci self.fileobj.write(b'\037\213') # magic header 2427db96d56Sopenharmony_ci self.fileobj.write(b'\010') # compression method 2437db96d56Sopenharmony_ci try: 2447db96d56Sopenharmony_ci # RFC 1952 requires the FNAME field to be Latin-1. Do not 2457db96d56Sopenharmony_ci # include filenames that cannot be represented that way. 2467db96d56Sopenharmony_ci fname = os.path.basename(self.name) 2477db96d56Sopenharmony_ci if not isinstance(fname, bytes): 2487db96d56Sopenharmony_ci fname = fname.encode('latin-1') 2497db96d56Sopenharmony_ci if fname.endswith(b'.gz'): 2507db96d56Sopenharmony_ci fname = fname[:-3] 2517db96d56Sopenharmony_ci except UnicodeEncodeError: 2527db96d56Sopenharmony_ci fname = b'' 2537db96d56Sopenharmony_ci flags = 0 2547db96d56Sopenharmony_ci if fname: 2557db96d56Sopenharmony_ci flags = FNAME 2567db96d56Sopenharmony_ci self.fileobj.write(chr(flags).encode('latin-1')) 2577db96d56Sopenharmony_ci mtime = self._write_mtime 2587db96d56Sopenharmony_ci if mtime is None: 2597db96d56Sopenharmony_ci mtime = time.time() 2607db96d56Sopenharmony_ci write32u(self.fileobj, int(mtime)) 2617db96d56Sopenharmony_ci if compresslevel == _COMPRESS_LEVEL_BEST: 2627db96d56Sopenharmony_ci xfl = b'\002' 2637db96d56Sopenharmony_ci elif compresslevel == _COMPRESS_LEVEL_FAST: 2647db96d56Sopenharmony_ci xfl = b'\004' 2657db96d56Sopenharmony_ci else: 2667db96d56Sopenharmony_ci xfl = b'\000' 2677db96d56Sopenharmony_ci self.fileobj.write(xfl) 2687db96d56Sopenharmony_ci self.fileobj.write(b'\377') 2697db96d56Sopenharmony_ci if fname: 2707db96d56Sopenharmony_ci self.fileobj.write(fname + b'\000') 2717db96d56Sopenharmony_ci 2727db96d56Sopenharmony_ci def write(self,data): 2737db96d56Sopenharmony_ci self._check_not_closed() 2747db96d56Sopenharmony_ci if self.mode != WRITE: 2757db96d56Sopenharmony_ci import errno 2767db96d56Sopenharmony_ci raise OSError(errno.EBADF, "write() on read-only GzipFile object") 2777db96d56Sopenharmony_ci 2787db96d56Sopenharmony_ci if self.fileobj is None: 2797db96d56Sopenharmony_ci raise ValueError("write() on closed GzipFile object") 2807db96d56Sopenharmony_ci 2817db96d56Sopenharmony_ci if isinstance(data, (bytes, bytearray)): 2827db96d56Sopenharmony_ci length = len(data) 2837db96d56Sopenharmony_ci else: 2847db96d56Sopenharmony_ci # accept any data that supports the buffer protocol 2857db96d56Sopenharmony_ci data = memoryview(data) 2867db96d56Sopenharmony_ci length = data.nbytes 2877db96d56Sopenharmony_ci 2887db96d56Sopenharmony_ci if length > 0: 2897db96d56Sopenharmony_ci self.fileobj.write(self.compress.compress(data)) 2907db96d56Sopenharmony_ci self.size += length 2917db96d56Sopenharmony_ci self.crc = zlib.crc32(data, self.crc) 2927db96d56Sopenharmony_ci self.offset += length 2937db96d56Sopenharmony_ci 2947db96d56Sopenharmony_ci return length 2957db96d56Sopenharmony_ci 2967db96d56Sopenharmony_ci def read(self, size=-1): 2977db96d56Sopenharmony_ci self._check_not_closed() 2987db96d56Sopenharmony_ci if self.mode != READ: 2997db96d56Sopenharmony_ci import errno 3007db96d56Sopenharmony_ci raise OSError(errno.EBADF, "read() on write-only GzipFile object") 3017db96d56Sopenharmony_ci return self._buffer.read(size) 3027db96d56Sopenharmony_ci 3037db96d56Sopenharmony_ci def read1(self, size=-1): 3047db96d56Sopenharmony_ci """Implements BufferedIOBase.read1() 3057db96d56Sopenharmony_ci 3067db96d56Sopenharmony_ci Reads up to a buffer's worth of data if size is negative.""" 3077db96d56Sopenharmony_ci self._check_not_closed() 3087db96d56Sopenharmony_ci if self.mode != READ: 3097db96d56Sopenharmony_ci import errno 3107db96d56Sopenharmony_ci raise OSError(errno.EBADF, "read1() on write-only GzipFile object") 3117db96d56Sopenharmony_ci 3127db96d56Sopenharmony_ci if size < 0: 3137db96d56Sopenharmony_ci size = io.DEFAULT_BUFFER_SIZE 3147db96d56Sopenharmony_ci return self._buffer.read1(size) 3157db96d56Sopenharmony_ci 3167db96d56Sopenharmony_ci def peek(self, n): 3177db96d56Sopenharmony_ci self._check_not_closed() 3187db96d56Sopenharmony_ci if self.mode != READ: 3197db96d56Sopenharmony_ci import errno 3207db96d56Sopenharmony_ci raise OSError(errno.EBADF, "peek() on write-only GzipFile object") 3217db96d56Sopenharmony_ci return self._buffer.peek(n) 3227db96d56Sopenharmony_ci 3237db96d56Sopenharmony_ci @property 3247db96d56Sopenharmony_ci def closed(self): 3257db96d56Sopenharmony_ci return self.fileobj is None 3267db96d56Sopenharmony_ci 3277db96d56Sopenharmony_ci def close(self): 3287db96d56Sopenharmony_ci fileobj = self.fileobj 3297db96d56Sopenharmony_ci if fileobj is None: 3307db96d56Sopenharmony_ci return 3317db96d56Sopenharmony_ci self.fileobj = None 3327db96d56Sopenharmony_ci try: 3337db96d56Sopenharmony_ci if self.mode == WRITE: 3347db96d56Sopenharmony_ci fileobj.write(self.compress.flush()) 3357db96d56Sopenharmony_ci write32u(fileobj, self.crc) 3367db96d56Sopenharmony_ci # self.size may exceed 2 GiB, or even 4 GiB 3377db96d56Sopenharmony_ci write32u(fileobj, self.size & 0xffffffff) 3387db96d56Sopenharmony_ci elif self.mode == READ: 3397db96d56Sopenharmony_ci self._buffer.close() 3407db96d56Sopenharmony_ci finally: 3417db96d56Sopenharmony_ci myfileobj = self.myfileobj 3427db96d56Sopenharmony_ci if myfileobj: 3437db96d56Sopenharmony_ci self.myfileobj = None 3447db96d56Sopenharmony_ci myfileobj.close() 3457db96d56Sopenharmony_ci 3467db96d56Sopenharmony_ci def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH): 3477db96d56Sopenharmony_ci self._check_not_closed() 3487db96d56Sopenharmony_ci if self.mode == WRITE: 3497db96d56Sopenharmony_ci # Ensure the compressor's buffer is flushed 3507db96d56Sopenharmony_ci self.fileobj.write(self.compress.flush(zlib_mode)) 3517db96d56Sopenharmony_ci self.fileobj.flush() 3527db96d56Sopenharmony_ci 3537db96d56Sopenharmony_ci def fileno(self): 3547db96d56Sopenharmony_ci """Invoke the underlying file object's fileno() method. 3557db96d56Sopenharmony_ci 3567db96d56Sopenharmony_ci This will raise AttributeError if the underlying file object 3577db96d56Sopenharmony_ci doesn't support fileno(). 3587db96d56Sopenharmony_ci """ 3597db96d56Sopenharmony_ci return self.fileobj.fileno() 3607db96d56Sopenharmony_ci 3617db96d56Sopenharmony_ci def rewind(self): 3627db96d56Sopenharmony_ci '''Return the uncompressed stream file position indicator to the 3637db96d56Sopenharmony_ci beginning of the file''' 3647db96d56Sopenharmony_ci if self.mode != READ: 3657db96d56Sopenharmony_ci raise OSError("Can't rewind in write mode") 3667db96d56Sopenharmony_ci self._buffer.seek(0) 3677db96d56Sopenharmony_ci 3687db96d56Sopenharmony_ci def readable(self): 3697db96d56Sopenharmony_ci return self.mode == READ 3707db96d56Sopenharmony_ci 3717db96d56Sopenharmony_ci def writable(self): 3727db96d56Sopenharmony_ci return self.mode == WRITE 3737db96d56Sopenharmony_ci 3747db96d56Sopenharmony_ci def seekable(self): 3757db96d56Sopenharmony_ci return True 3767db96d56Sopenharmony_ci 3777db96d56Sopenharmony_ci def seek(self, offset, whence=io.SEEK_SET): 3787db96d56Sopenharmony_ci if self.mode == WRITE: 3797db96d56Sopenharmony_ci if whence != io.SEEK_SET: 3807db96d56Sopenharmony_ci if whence == io.SEEK_CUR: 3817db96d56Sopenharmony_ci offset = self.offset + offset 3827db96d56Sopenharmony_ci else: 3837db96d56Sopenharmony_ci raise ValueError('Seek from end not supported') 3847db96d56Sopenharmony_ci if offset < self.offset: 3857db96d56Sopenharmony_ci raise OSError('Negative seek in write mode') 3867db96d56Sopenharmony_ci count = offset - self.offset 3877db96d56Sopenharmony_ci chunk = b'\0' * 1024 3887db96d56Sopenharmony_ci for i in range(count // 1024): 3897db96d56Sopenharmony_ci self.write(chunk) 3907db96d56Sopenharmony_ci self.write(b'\0' * (count % 1024)) 3917db96d56Sopenharmony_ci elif self.mode == READ: 3927db96d56Sopenharmony_ci self._check_not_closed() 3937db96d56Sopenharmony_ci return self._buffer.seek(offset, whence) 3947db96d56Sopenharmony_ci 3957db96d56Sopenharmony_ci return self.offset 3967db96d56Sopenharmony_ci 3977db96d56Sopenharmony_ci def readline(self, size=-1): 3987db96d56Sopenharmony_ci self._check_not_closed() 3997db96d56Sopenharmony_ci return self._buffer.readline(size) 4007db96d56Sopenharmony_ci 4017db96d56Sopenharmony_ci 4027db96d56Sopenharmony_cidef _read_exact(fp, n): 4037db96d56Sopenharmony_ci '''Read exactly *n* bytes from `fp` 4047db96d56Sopenharmony_ci 4057db96d56Sopenharmony_ci This method is required because fp may be unbuffered, 4067db96d56Sopenharmony_ci i.e. return short reads. 4077db96d56Sopenharmony_ci ''' 4087db96d56Sopenharmony_ci data = fp.read(n) 4097db96d56Sopenharmony_ci while len(data) < n: 4107db96d56Sopenharmony_ci b = fp.read(n - len(data)) 4117db96d56Sopenharmony_ci if not b: 4127db96d56Sopenharmony_ci raise EOFError("Compressed file ended before the " 4137db96d56Sopenharmony_ci "end-of-stream marker was reached") 4147db96d56Sopenharmony_ci data += b 4157db96d56Sopenharmony_ci return data 4167db96d56Sopenharmony_ci 4177db96d56Sopenharmony_ci 4187db96d56Sopenharmony_cidef _read_gzip_header(fp): 4197db96d56Sopenharmony_ci '''Read a gzip header from `fp` and progress to the end of the header. 4207db96d56Sopenharmony_ci 4217db96d56Sopenharmony_ci Returns last mtime if header was present or None otherwise. 4227db96d56Sopenharmony_ci ''' 4237db96d56Sopenharmony_ci magic = fp.read(2) 4247db96d56Sopenharmony_ci if magic == b'': 4257db96d56Sopenharmony_ci return None 4267db96d56Sopenharmony_ci 4277db96d56Sopenharmony_ci if magic != b'\037\213': 4287db96d56Sopenharmony_ci raise BadGzipFile('Not a gzipped file (%r)' % magic) 4297db96d56Sopenharmony_ci 4307db96d56Sopenharmony_ci (method, flag, last_mtime) = struct.unpack("<BBIxx", _read_exact(fp, 8)) 4317db96d56Sopenharmony_ci if method != 8: 4327db96d56Sopenharmony_ci raise BadGzipFile('Unknown compression method') 4337db96d56Sopenharmony_ci 4347db96d56Sopenharmony_ci if flag & FEXTRA: 4357db96d56Sopenharmony_ci # Read & discard the extra field, if present 4367db96d56Sopenharmony_ci extra_len, = struct.unpack("<H", _read_exact(fp, 2)) 4377db96d56Sopenharmony_ci _read_exact(fp, extra_len) 4387db96d56Sopenharmony_ci if flag & FNAME: 4397db96d56Sopenharmony_ci # Read and discard a null-terminated string containing the filename 4407db96d56Sopenharmony_ci while True: 4417db96d56Sopenharmony_ci s = fp.read(1) 4427db96d56Sopenharmony_ci if not s or s==b'\000': 4437db96d56Sopenharmony_ci break 4447db96d56Sopenharmony_ci if flag & FCOMMENT: 4457db96d56Sopenharmony_ci # Read and discard a null-terminated string containing a comment 4467db96d56Sopenharmony_ci while True: 4477db96d56Sopenharmony_ci s = fp.read(1) 4487db96d56Sopenharmony_ci if not s or s==b'\000': 4497db96d56Sopenharmony_ci break 4507db96d56Sopenharmony_ci if flag & FHCRC: 4517db96d56Sopenharmony_ci _read_exact(fp, 2) # Read & discard the 16-bit header CRC 4527db96d56Sopenharmony_ci return last_mtime 4537db96d56Sopenharmony_ci 4547db96d56Sopenharmony_ci 4557db96d56Sopenharmony_ciclass _GzipReader(_compression.DecompressReader): 4567db96d56Sopenharmony_ci def __init__(self, fp): 4577db96d56Sopenharmony_ci super().__init__(_PaddedFile(fp), zlib.decompressobj, 4587db96d56Sopenharmony_ci wbits=-zlib.MAX_WBITS) 4597db96d56Sopenharmony_ci # Set flag indicating start of a new member 4607db96d56Sopenharmony_ci self._new_member = True 4617db96d56Sopenharmony_ci self._last_mtime = None 4627db96d56Sopenharmony_ci 4637db96d56Sopenharmony_ci def _init_read(self): 4647db96d56Sopenharmony_ci self._crc = zlib.crc32(b"") 4657db96d56Sopenharmony_ci self._stream_size = 0 # Decompressed size of unconcatenated stream 4667db96d56Sopenharmony_ci 4677db96d56Sopenharmony_ci def _read_gzip_header(self): 4687db96d56Sopenharmony_ci last_mtime = _read_gzip_header(self._fp) 4697db96d56Sopenharmony_ci if last_mtime is None: 4707db96d56Sopenharmony_ci return False 4717db96d56Sopenharmony_ci self._last_mtime = last_mtime 4727db96d56Sopenharmony_ci return True 4737db96d56Sopenharmony_ci 4747db96d56Sopenharmony_ci def read(self, size=-1): 4757db96d56Sopenharmony_ci if size < 0: 4767db96d56Sopenharmony_ci return self.readall() 4777db96d56Sopenharmony_ci # size=0 is special because decompress(max_length=0) is not supported 4787db96d56Sopenharmony_ci if not size: 4797db96d56Sopenharmony_ci return b"" 4807db96d56Sopenharmony_ci 4817db96d56Sopenharmony_ci # For certain input data, a single 4827db96d56Sopenharmony_ci # call to decompress() may not return 4837db96d56Sopenharmony_ci # any data. In this case, retry until we get some data or reach EOF. 4847db96d56Sopenharmony_ci while True: 4857db96d56Sopenharmony_ci if self._decompressor.eof: 4867db96d56Sopenharmony_ci # Ending case: we've come to the end of a member in the file, 4877db96d56Sopenharmony_ci # so finish up this member, and read a new gzip header. 4887db96d56Sopenharmony_ci # Check the CRC and file size, and set the flag so we read 4897db96d56Sopenharmony_ci # a new member 4907db96d56Sopenharmony_ci self._read_eof() 4917db96d56Sopenharmony_ci self._new_member = True 4927db96d56Sopenharmony_ci self._decompressor = self._decomp_factory( 4937db96d56Sopenharmony_ci **self._decomp_args) 4947db96d56Sopenharmony_ci 4957db96d56Sopenharmony_ci if self._new_member: 4967db96d56Sopenharmony_ci # If the _new_member flag is set, we have to 4977db96d56Sopenharmony_ci # jump to the next member, if there is one. 4987db96d56Sopenharmony_ci self._init_read() 4997db96d56Sopenharmony_ci if not self._read_gzip_header(): 5007db96d56Sopenharmony_ci self._size = self._pos 5017db96d56Sopenharmony_ci return b"" 5027db96d56Sopenharmony_ci self._new_member = False 5037db96d56Sopenharmony_ci 5047db96d56Sopenharmony_ci # Read a chunk of data from the file 5057db96d56Sopenharmony_ci buf = self._fp.read(io.DEFAULT_BUFFER_SIZE) 5067db96d56Sopenharmony_ci 5077db96d56Sopenharmony_ci uncompress = self._decompressor.decompress(buf, size) 5087db96d56Sopenharmony_ci if self._decompressor.unconsumed_tail != b"": 5097db96d56Sopenharmony_ci self._fp.prepend(self._decompressor.unconsumed_tail) 5107db96d56Sopenharmony_ci elif self._decompressor.unused_data != b"": 5117db96d56Sopenharmony_ci # Prepend the already read bytes to the fileobj so they can 5127db96d56Sopenharmony_ci # be seen by _read_eof() and _read_gzip_header() 5137db96d56Sopenharmony_ci self._fp.prepend(self._decompressor.unused_data) 5147db96d56Sopenharmony_ci 5157db96d56Sopenharmony_ci if uncompress != b"": 5167db96d56Sopenharmony_ci break 5177db96d56Sopenharmony_ci if buf == b"": 5187db96d56Sopenharmony_ci raise EOFError("Compressed file ended before the " 5197db96d56Sopenharmony_ci "end-of-stream marker was reached") 5207db96d56Sopenharmony_ci 5217db96d56Sopenharmony_ci self._add_read_data( uncompress ) 5227db96d56Sopenharmony_ci self._pos += len(uncompress) 5237db96d56Sopenharmony_ci return uncompress 5247db96d56Sopenharmony_ci 5257db96d56Sopenharmony_ci def _add_read_data(self, data): 5267db96d56Sopenharmony_ci self._crc = zlib.crc32(data, self._crc) 5277db96d56Sopenharmony_ci self._stream_size = self._stream_size + len(data) 5287db96d56Sopenharmony_ci 5297db96d56Sopenharmony_ci def _read_eof(self): 5307db96d56Sopenharmony_ci # We've read to the end of the file 5317db96d56Sopenharmony_ci # We check that the computed CRC and size of the 5327db96d56Sopenharmony_ci # uncompressed data matches the stored values. Note that the size 5337db96d56Sopenharmony_ci # stored is the true file size mod 2**32. 5347db96d56Sopenharmony_ci crc32, isize = struct.unpack("<II", _read_exact(self._fp, 8)) 5357db96d56Sopenharmony_ci if crc32 != self._crc: 5367db96d56Sopenharmony_ci raise BadGzipFile("CRC check failed %s != %s" % (hex(crc32), 5377db96d56Sopenharmony_ci hex(self._crc))) 5387db96d56Sopenharmony_ci elif isize != (self._stream_size & 0xffffffff): 5397db96d56Sopenharmony_ci raise BadGzipFile("Incorrect length of data produced") 5407db96d56Sopenharmony_ci 5417db96d56Sopenharmony_ci # Gzip files can be padded with zeroes and still have archives. 5427db96d56Sopenharmony_ci # Consume all zero bytes and set the file position to the first 5437db96d56Sopenharmony_ci # non-zero byte. See http://www.gzip.org/#faq8 5447db96d56Sopenharmony_ci c = b"\x00" 5457db96d56Sopenharmony_ci while c == b"\x00": 5467db96d56Sopenharmony_ci c = self._fp.read(1) 5477db96d56Sopenharmony_ci if c: 5487db96d56Sopenharmony_ci self._fp.prepend(c) 5497db96d56Sopenharmony_ci 5507db96d56Sopenharmony_ci def _rewind(self): 5517db96d56Sopenharmony_ci super()._rewind() 5527db96d56Sopenharmony_ci self._new_member = True 5537db96d56Sopenharmony_ci 5547db96d56Sopenharmony_ci 5557db96d56Sopenharmony_cidef _create_simple_gzip_header(compresslevel: int, 5567db96d56Sopenharmony_ci mtime = None) -> bytes: 5577db96d56Sopenharmony_ci """ 5587db96d56Sopenharmony_ci Write a simple gzip header with no extra fields. 5597db96d56Sopenharmony_ci :param compresslevel: Compresslevel used to determine the xfl bytes. 5607db96d56Sopenharmony_ci :param mtime: The mtime (must support conversion to a 32-bit integer). 5617db96d56Sopenharmony_ci :return: A bytes object representing the gzip header. 5627db96d56Sopenharmony_ci """ 5637db96d56Sopenharmony_ci if mtime is None: 5647db96d56Sopenharmony_ci mtime = time.time() 5657db96d56Sopenharmony_ci if compresslevel == _COMPRESS_LEVEL_BEST: 5667db96d56Sopenharmony_ci xfl = 2 5677db96d56Sopenharmony_ci elif compresslevel == _COMPRESS_LEVEL_FAST: 5687db96d56Sopenharmony_ci xfl = 4 5697db96d56Sopenharmony_ci else: 5707db96d56Sopenharmony_ci xfl = 0 5717db96d56Sopenharmony_ci # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra 5727db96d56Sopenharmony_ci # fields added to header), mtime, xfl and os (255 for unknown OS). 5737db96d56Sopenharmony_ci return struct.pack("<BBBBLBB", 0x1f, 0x8b, 8, 0, int(mtime), xfl, 255) 5747db96d56Sopenharmony_ci 5757db96d56Sopenharmony_ci 5767db96d56Sopenharmony_cidef compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None): 5777db96d56Sopenharmony_ci """Compress data in one shot and return the compressed string. 5787db96d56Sopenharmony_ci 5797db96d56Sopenharmony_ci compresslevel sets the compression level in range of 0-9. 5807db96d56Sopenharmony_ci mtime can be used to set the modification time. The modification time is 5817db96d56Sopenharmony_ci set to the current time by default. 5827db96d56Sopenharmony_ci """ 5837db96d56Sopenharmony_ci if mtime == 0: 5847db96d56Sopenharmony_ci # Use zlib as it creates the header with 0 mtime by default. 5857db96d56Sopenharmony_ci # This is faster and with less overhead. 5867db96d56Sopenharmony_ci return zlib.compress(data, level=compresslevel, wbits=31) 5877db96d56Sopenharmony_ci header = _create_simple_gzip_header(compresslevel, mtime) 5887db96d56Sopenharmony_ci trailer = struct.pack("<LL", zlib.crc32(data), (len(data) & 0xffffffff)) 5897db96d56Sopenharmony_ci # Wbits=-15 creates a raw deflate block. 5907db96d56Sopenharmony_ci return (header + zlib.compress(data, level=compresslevel, wbits=-15) + 5917db96d56Sopenharmony_ci trailer) 5927db96d56Sopenharmony_ci 5937db96d56Sopenharmony_ci 5947db96d56Sopenharmony_cidef decompress(data): 5957db96d56Sopenharmony_ci """Decompress a gzip compressed string in one shot. 5967db96d56Sopenharmony_ci Return the decompressed string. 5977db96d56Sopenharmony_ci """ 5987db96d56Sopenharmony_ci decompressed_members = [] 5997db96d56Sopenharmony_ci while True: 6007db96d56Sopenharmony_ci fp = io.BytesIO(data) 6017db96d56Sopenharmony_ci if _read_gzip_header(fp) is None: 6027db96d56Sopenharmony_ci return b"".join(decompressed_members) 6037db96d56Sopenharmony_ci # Use a zlib raw deflate compressor 6047db96d56Sopenharmony_ci do = zlib.decompressobj(wbits=-zlib.MAX_WBITS) 6057db96d56Sopenharmony_ci # Read all the data except the header 6067db96d56Sopenharmony_ci decompressed = do.decompress(data[fp.tell():]) 6077db96d56Sopenharmony_ci if not do.eof or len(do.unused_data) < 8: 6087db96d56Sopenharmony_ci raise EOFError("Compressed file ended before the end-of-stream " 6097db96d56Sopenharmony_ci "marker was reached") 6107db96d56Sopenharmony_ci crc, length = struct.unpack("<II", do.unused_data[:8]) 6117db96d56Sopenharmony_ci if crc != zlib.crc32(decompressed): 6127db96d56Sopenharmony_ci raise BadGzipFile("CRC check failed") 6137db96d56Sopenharmony_ci if length != (len(decompressed) & 0xffffffff): 6147db96d56Sopenharmony_ci raise BadGzipFile("Incorrect length of data produced") 6157db96d56Sopenharmony_ci decompressed_members.append(decompressed) 6167db96d56Sopenharmony_ci data = do.unused_data[8:].lstrip(b"\x00") 6177db96d56Sopenharmony_ci 6187db96d56Sopenharmony_ci 6197db96d56Sopenharmony_cidef main(): 6207db96d56Sopenharmony_ci from argparse import ArgumentParser 6217db96d56Sopenharmony_ci parser = ArgumentParser(description= 6227db96d56Sopenharmony_ci "A simple command line interface for the gzip module: act like gzip, " 6237db96d56Sopenharmony_ci "but do not delete the input file.") 6247db96d56Sopenharmony_ci group = parser.add_mutually_exclusive_group() 6257db96d56Sopenharmony_ci group.add_argument('--fast', action='store_true', help='compress faster') 6267db96d56Sopenharmony_ci group.add_argument('--best', action='store_true', help='compress better') 6277db96d56Sopenharmony_ci group.add_argument("-d", "--decompress", action="store_true", 6287db96d56Sopenharmony_ci help="act like gunzip instead of gzip") 6297db96d56Sopenharmony_ci 6307db96d56Sopenharmony_ci parser.add_argument("args", nargs="*", default=["-"], metavar='file') 6317db96d56Sopenharmony_ci args = parser.parse_args() 6327db96d56Sopenharmony_ci 6337db96d56Sopenharmony_ci compresslevel = _COMPRESS_LEVEL_TRADEOFF 6347db96d56Sopenharmony_ci if args.fast: 6357db96d56Sopenharmony_ci compresslevel = _COMPRESS_LEVEL_FAST 6367db96d56Sopenharmony_ci elif args.best: 6377db96d56Sopenharmony_ci compresslevel = _COMPRESS_LEVEL_BEST 6387db96d56Sopenharmony_ci 6397db96d56Sopenharmony_ci for arg in args.args: 6407db96d56Sopenharmony_ci if args.decompress: 6417db96d56Sopenharmony_ci if arg == "-": 6427db96d56Sopenharmony_ci f = GzipFile(filename="", mode="rb", fileobj=sys.stdin.buffer) 6437db96d56Sopenharmony_ci g = sys.stdout.buffer 6447db96d56Sopenharmony_ci else: 6457db96d56Sopenharmony_ci if arg[-3:] != ".gz": 6467db96d56Sopenharmony_ci sys.exit(f"filename doesn't end in .gz: {arg!r}") 6477db96d56Sopenharmony_ci f = open(arg, "rb") 6487db96d56Sopenharmony_ci g = builtins.open(arg[:-3], "wb") 6497db96d56Sopenharmony_ci else: 6507db96d56Sopenharmony_ci if arg == "-": 6517db96d56Sopenharmony_ci f = sys.stdin.buffer 6527db96d56Sopenharmony_ci g = GzipFile(filename="", mode="wb", fileobj=sys.stdout.buffer, 6537db96d56Sopenharmony_ci compresslevel=compresslevel) 6547db96d56Sopenharmony_ci else: 6557db96d56Sopenharmony_ci f = builtins.open(arg, "rb") 6567db96d56Sopenharmony_ci g = open(arg + ".gz", "wb") 6577db96d56Sopenharmony_ci while True: 6587db96d56Sopenharmony_ci chunk = f.read(io.DEFAULT_BUFFER_SIZE) 6597db96d56Sopenharmony_ci if not chunk: 6607db96d56Sopenharmony_ci break 6617db96d56Sopenharmony_ci g.write(chunk) 6627db96d56Sopenharmony_ci if g is not sys.stdout.buffer: 6637db96d56Sopenharmony_ci g.close() 6647db96d56Sopenharmony_ci if f is not sys.stdin.buffer: 6657db96d56Sopenharmony_ci f.close() 6667db96d56Sopenharmony_ci 6677db96d56Sopenharmony_ciif __name__ == '__main__': 6687db96d56Sopenharmony_ci main() 669